From 94f7908fdbfe6e6e0d5006738395a1fc308f7f30 Mon Sep 17 00:00:00 2001 From: ZoeLeBlanc Date: Wed, 7 May 2025 12:33:56 -0500 Subject: [PATCH 1/7] upgrading ruby and jekyll versions to work on Anisa's computer and speed up build times --- .github/workflows/build.yml | 4 +- .ruby-version | 2 +- Gemfile | 2 +- Gemfile.lock | 228 ++++++++++++++++++++++-------------- 4 files changed, 141 insertions(+), 95 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 917cf03e3c..680e8fba8f 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -19,7 +19,7 @@ jobs: if : github.event.action != 'closed' uses: ruby/setup-ruby@v1 with: - ruby-version: 2.6 + ruby-version: 3.2.2 bundler-cache: true # runs 'bundle install' and caches installed gems automatically # We then check if we can build our Jekyll site (all this logic is built-in with Jekyll and this prevents us from merging in any syntax errors). - name: Jekyll build @@ -29,7 +29,7 @@ jobs: bundle exec jekyll build # Next we check all the links in our site to make sure we aren't pushing up broken links. - name: Check HTML - uses: zoeleblanc/htmlproofer@master + uses: chabad360/htmlproofer@master if : github.event.action != 'closed' with: directory: "./_site" diff --git a/.ruby-version b/.ruby-version index d5724cd41b..acf9bf09db 100644 --- a/.ruby-version +++ b/.ruby-version @@ -1 +1 @@ -2.6.2 \ No newline at end of file +3.2.2 \ No newline at end of file diff --git a/Gemfile b/Gemfile index 4779aebab5..4bbb9cb5f6 100644 --- a/Gemfile +++ b/Gemfile @@ -2,4 +2,4 @@ source 'https://rubygems.org' gem 'github-pages' gem 'jekyll-redirect-from' gem 'jekyll-paginate' -gem 'html-proofer', "~> 3.0", "< 3.18" \ No newline at end of file +gem 'html-proofer' \ No newline at end of file diff --git a/Gemfile.lock b/Gemfile.lock index 4484dd0b56..d84c726f61 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,47 +1,80 @@ GEM remote: https://rubygems.org/ specs: - activesupport (6.0.5.1) - concurrent-ruby (~> 1.0, >= 1.0.2) - i18n (>= 0.7, < 2) - minitest (~> 5.1) - tzinfo (~> 1.1) - zeitwerk (~> 2.2, >= 2.2.2) - addressable (2.8.0) - public_suffix (>= 2.0.2, < 5.0) + Ascii85 (2.0.1) + activesupport (8.0.2) + base64 + benchmark (>= 0.3) + bigdecimal + concurrent-ruby (~> 1.0, >= 1.3.1) + connection_pool (>= 2.2.5) + drb + i18n (>= 1.6, < 2) + logger (>= 1.4.2) + minitest (>= 5.1) + securerandom (>= 0.3) + tzinfo (~> 2.0, >= 2.0.5) + uri (>= 0.13.1) + addressable (2.8.7) + public_suffix (>= 2.0.2, < 7.0) + afm (0.2.2) + async (2.24.0) + console (~> 1.29) + fiber-annotation + io-event (~> 1.9) + metrics (~> 0.12) + traces (~> 0.15) + base64 (0.2.0) + benchmark (0.4.0) + bigdecimal (3.1.9) coffee-script (2.4.1) coffee-script-source execjs - coffee-script-source (1.11.1) + coffee-script-source (1.12.2) colorator (1.1.0) - commonmarker (0.23.5) - concurrent-ruby (1.1.10) - dnsruby (1.61.9) - simpleidn (~> 0.1) + commonmarker (0.23.11) + concurrent-ruby (1.3.5) + connection_pool (2.5.3) + console (1.30.2) + fiber-annotation + fiber-local (~> 1.1) + json + csv (3.3.4) + dnsruby (1.72.4) + base64 (~> 0.2.0) + logger (~> 1.6.5) + simpleidn (~> 0.2.1) + drb (2.2.1) em-websocket (0.5.3) eventmachine (>= 0.12.9) http_parser.rb (~> 0) - ethon (0.15.0) + ethon (0.16.0) ffi (>= 1.15.0) eventmachine (1.2.7) - execjs (2.8.1) - faraday (2.3.0) - faraday-net_http (~> 2.0) - ruby2_keywords (>= 0.0.4) - faraday-net_http (2.0.3) - ffi (1.15.5) + execjs (2.10.0) + faraday (2.13.1) + faraday-net_http (>= 2.0, < 3.5) + json + logger + faraday-net_http (3.4.0) + net-http (>= 0.5.0) + ffi (1.17.2-arm64-darwin) + fiber-annotation (0.2.0) + fiber-local (1.1.0) + fiber-storage + fiber-storage (1.0.1) forwardable-extended (2.6.0) - gemoji (3.0.1) - github-pages (227) - github-pages-health-check (= 1.17.9) - jekyll (= 3.9.2) - jekyll-avatar (= 0.7.0) - jekyll-coffeescript (= 1.1.1) - jekyll-commonmark-ghpages (= 0.2.0) - jekyll-default-layout (= 0.1.4) - jekyll-feed (= 0.15.1) + gemoji (4.1.0) + github-pages (232) + github-pages-health-check (= 1.18.2) + jekyll (= 3.10.0) + jekyll-avatar (= 0.8.0) + jekyll-coffeescript (= 1.2.2) + jekyll-commonmark-ghpages (= 0.5.1) + jekyll-default-layout (= 0.1.5) + jekyll-feed (= 0.17.0) jekyll-gist (= 1.5.0) - jekyll-github-metadata (= 2.13.0) + jekyll-github-metadata (= 2.16.1) jekyll-include-cache (= 0.2.1) jekyll-mentions (= 1.6.0) jekyll-optional-front-matter (= 0.3.2) @@ -68,40 +101,45 @@ GEM jekyll-theme-tactile (= 0.2.0) jekyll-theme-time-machine (= 0.2.0) jekyll-titles-from-headings (= 0.5.3) - jemoji (= 0.12.0) - kramdown (= 2.3.2) + jemoji (= 0.13.0) + kramdown (= 2.4.0) kramdown-parser-gfm (= 1.1.0) - liquid (= 4.0.3) + liquid (= 4.0.4) mercenary (~> 0.3) minima (= 2.5.1) - nokogiri (>= 1.13.6, < 2.0) - rouge (= 3.26.0) + nokogiri (>= 1.16.2, < 2.0) + rouge (= 3.30.0) terminal-table (~> 1.4) - github-pages-health-check (1.17.9) + webrick (~> 1.8) + github-pages-health-check (1.18.2) addressable (~> 2.3) dnsruby (~> 1.60) - octokit (~> 4.0) - public_suffix (>= 3.0, < 5.0) + octokit (>= 4, < 8) + public_suffix (>= 3.0, < 6.0) typhoeus (~> 1.3) - html-pipeline (2.14.2) + hashery (2.1.2) + html-pipeline (2.14.3) activesupport (>= 2) nokogiri (>= 1.4) - html-proofer (3.17.4) + html-proofer (5.0.10) addressable (~> 2.3) - mercenary (~> 0.3) - nokogumbo (~> 2.0) - parallel (~> 1.3) + async (~> 2.1) + nokogiri (~> 1.13) + pdf-reader (~> 2.11) rainbow (~> 3.0) typhoeus (~> 1.3) yell (~> 2.0) + zeitwerk (~> 2.5) http_parser.rb (0.8.0) - i18n (0.9.5) + i18n (1.14.7) concurrent-ruby (~> 1.0) - jekyll (3.9.2) + io-event (1.10.0) + jekyll (3.10.0) addressable (~> 2.4) colorator (~> 1.0) + csv (~> 3.0) em-websocket (~> 0.5) - i18n (~> 0.7) + i18n (>= 0.7, < 2) jekyll-sass-converter (~> 1.0) jekyll-watch (~> 2.0) kramdown (>= 1.17, < 3) @@ -110,27 +148,28 @@ GEM pathutil (~> 0.9) rouge (>= 1.7, < 4) safe_yaml (~> 1.0) - jekyll-avatar (0.7.0) + webrick (>= 1.0) + jekyll-avatar (0.8.0) jekyll (>= 3.0, < 5.0) - jekyll-coffeescript (1.1.1) + jekyll-coffeescript (1.2.2) coffee-script (~> 2.2) - coffee-script-source (~> 1.11.1) + coffee-script-source (~> 1.12) jekyll-commonmark (1.4.0) commonmarker (~> 0.22) - jekyll-commonmark-ghpages (0.2.0) - commonmarker (~> 0.23.4) - jekyll (~> 3.9.0) + jekyll-commonmark-ghpages (0.5.1) + commonmarker (>= 0.23.7, < 1.1.0) + jekyll (>= 3.9, < 4.0) jekyll-commonmark (~> 1.4.0) - rouge (>= 2.0, < 4.0) - jekyll-default-layout (0.1.4) - jekyll (~> 3.0) - jekyll-feed (0.15.1) + rouge (>= 2.0, < 5.0) + jekyll-default-layout (0.1.5) + jekyll (>= 3.0, < 5.0) + jekyll-feed (0.17.0) jekyll (>= 3.7, < 5.0) jekyll-gist (1.5.0) octokit (~> 4.2) - jekyll-github-metadata (2.13.0) + jekyll-github-metadata (2.16.1) jekyll (>= 3.4, < 5.0) - octokit (~> 4.0, != 4.4.0) + octokit (>= 4, < 7, != 4.4.0) jekyll-include-cache (0.2.1) jekyll (>= 3.7, < 5.0) jekyll-mentions (1.6.0) @@ -201,46 +240,52 @@ GEM jekyll (>= 3.3, < 5.0) jekyll-watch (2.2.1) listen (~> 3.0) - jemoji (0.12.0) - gemoji (~> 3.0) + jemoji (0.13.0) + gemoji (>= 3, < 5) html-pipeline (~> 2.2) jekyll (>= 3.0, < 5.0) - kramdown (2.3.2) + json (2.11.3) + kramdown (2.4.0) rexml kramdown-parser-gfm (1.1.0) kramdown (~> 2.0) - liquid (4.0.3) - listen (3.7.1) + liquid (4.0.4) + listen (3.9.0) rb-fsevent (~> 0.10, >= 0.10.3) rb-inotify (~> 0.9, >= 0.9.10) + logger (1.6.6) mercenary (0.3.6) - mini_portile2 (2.8.0) + metrics (0.12.2) minima (2.5.1) jekyll (>= 3.5, < 5.0) jekyll-feed (~> 0.9) jekyll-seo-tag (~> 2.1) - minitest (5.16.2) - nokogiri (1.13.8) - mini_portile2 (~> 2.8.0) + minitest (5.25.5) + net-http (0.6.0) + uri + nokogiri (1.18.8-arm64-darwin) racc (~> 1.4) - nokogumbo (2.0.5) - nokogiri (~> 1.8, >= 1.8.4) octokit (4.25.1) faraday (>= 1, < 3) sawyer (~> 0.9) - parallel (1.22.1) pathutil (0.16.2) forwardable-extended (~> 2.6) - public_suffix (4.0.7) - racc (1.6.0) + pdf-reader (2.14.1) + Ascii85 (>= 1.0, < 3.0, != 2.0.0) + afm (~> 0.2.1) + hashery (~> 2.0) + ruby-rc4 + ttfunk + public_suffix (5.1.1) + racc (1.8.1) rainbow (3.1.1) - rb-fsevent (0.11.1) - rb-inotify (0.10.1) + rb-fsevent (0.11.2) + rb-inotify (0.11.1) ffi (~> 1.0) - rexml (3.2.5) - rouge (3.26.0) - ruby2_keywords (0.0.5) - rubyzip (2.3.2) + rexml (3.4.1) + rouge (3.30.0) + ruby-rc4 (0.1.5) + rubyzip (2.4.1) safe_yaml (1.0.5) sass (3.7.4) sass-listen (~> 4.0.0) @@ -250,30 +295,31 @@ GEM sawyer (0.9.2) addressable (>= 2.3.5) faraday (>= 0.17.3, < 3) - simpleidn (0.2.1) - unf (~> 0.1.4) + securerandom (0.4.1) + simpleidn (0.2.3) terminal-table (1.8.0) unicode-display_width (~> 1.1, >= 1.1.1) - thread_safe (0.3.6) - typhoeus (1.4.0) + traces (0.15.2) + ttfunk (1.8.0) + bigdecimal (~> 3.1) + typhoeus (1.4.1) ethon (>= 0.9.0) - tzinfo (1.2.10) - thread_safe (~> 0.1) - unf (0.1.4) - unf_ext - unf_ext (0.0.8.2) + tzinfo (2.0.6) + concurrent-ruby (~> 1.0) unicode-display_width (1.8.0) + uri (1.0.3) + webrick (1.9.1) yell (2.2.2) - zeitwerk (2.6.0) + zeitwerk (2.7.2) PLATFORMS - ruby + arm64-darwin-24 DEPENDENCIES github-pages - html-proofer (~> 3.0, < 3.18) + html-proofer jekyll-paginate jekyll-redirect-from BUNDLED WITH - 2.1.4 + 2.4.22 From 2ab91caceeea2b9226d0c25287a84aff3ccb08a5 Mon Sep 17 00:00:00 2001 From: ZoeLeBlanc Date: Wed, 7 May 2025 12:39:19 -0500 Subject: [PATCH 2/7] adding bundle lock to see if that fixes build error --- Gemfile.lock | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/Gemfile.lock b/Gemfile.lock index d84c726f61..53baf00277 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -51,6 +51,7 @@ GEM ethon (0.16.0) ffi (>= 1.15.0) eventmachine (1.2.7) + eventmachine (1.2.7-x64-mingw32) execjs (2.10.0) faraday (2.13.1) faraday-net_http (>= 2.0, < 3.5) @@ -58,7 +59,7 @@ GEM logger faraday-net_http (3.4.0) net-http (>= 0.5.0) - ffi (1.17.2-arm64-darwin) + ffi (1.17.2) fiber-annotation (0.2.0) fiber-local (1.1.0) fiber-storage @@ -256,6 +257,7 @@ GEM logger (1.6.6) mercenary (0.3.6) metrics (0.12.2) + mini_portile2 (2.8.8) minima (2.5.1) jekyll (>= 3.5, < 5.0) jekyll-feed (~> 0.9) @@ -263,8 +265,15 @@ GEM minitest (5.25.5) net-http (0.6.0) uri + nokogiri (1.18.8) + mini_portile2 (~> 2.8.2) + racc (~> 1.4) nokogiri (1.18.8-arm64-darwin) racc (~> 1.4) + nokogiri (1.18.8-x86_64-darwin) + racc (~> 1.4) + nokogiri (1.18.8-x86_64-linux-gnu) + racc (~> 1.4) octokit (4.25.1) faraday (>= 1, < 3) sawyer (~> 0.9) @@ -314,6 +323,9 @@ GEM PLATFORMS arm64-darwin-24 + x64-mingw32 + x86_64-darwin-20 + x86_64-linux DEPENDENCIES github-pages From 80f88b881a36818227ec59a5f20c5eb1047ed4fd Mon Sep 17 00:00:00 2001 From: ZoeLeBlanc Date: Wed, 7 May 2025 13:26:42 -0500 Subject: [PATCH 3/7] fixing htmlproofer and a ton of link breaks --- .htmlproofer.yml | 24 +++++++++++++++++++++++ _build/build.sh | 9 ++++++++- _data/snippets.yml | 6 +++--- _includes/lesson-index.html | 6 +++--- pt/licoes/introducao-jupyter-notebooks.md | 2 +- 5 files changed, 39 insertions(+), 8 deletions(-) create mode 100644 .htmlproofer.yml diff --git a/.htmlproofer.yml b/.htmlproofer.yml new file mode 100644 index 0000000000..ee52dadfa6 --- /dev/null +++ b/.htmlproofer.yml @@ -0,0 +1,24 @@ +assume_extension: true +check_img_alt: false +only_4xx: true +http_status_ignore: + - 429 + - 403 + - 400 +url_ignore: + - /http:\/\/www.gutenberg.org\/.*?/ + - /https:\/\/github.com\/programminghistorian\/.*?/ + - /https:\/\/github.com\/orgs\/programminghistorian\/.*?/ + - /\#/ + - /espanol/ + - /deprecated/ + - /collection.britishmuseum.org/ + - /analytics.hathitrust.org/ + - /fr.wikipedia.org\/wiki/ + - /https:\/\/web.archive.org\/web\/20180831094856\/http:\/\/www.dlsi.ua.es\/~borja\/riilua\/6.TopicModeling_v02.pdf/ +directory_ignore: + - /assets/ + - /retired/ + - /retirada/ + - /retrait/ + - /posts/ \ No newline at end of file diff --git a/_build/build.sh b/_build/build.sh index 3ddf87a961..a057103ad3 100755 --- a/_build/build.sh +++ b/_build/build.sh @@ -2,7 +2,14 @@ # Build site, and then run htmlproofer to check for broken internal and external links -bundle exec jekyll build && htmlproofer _site --assume-extension --empty-alt-ignore --alt-ignore '/.*/' --file-ignore "/assets/,/retired/,/retirada/,/retrait/,/posts/" --timeframe '30d' --only-4xx --http-status-ignore 429,403,400 --url-ignore "/http://www.gutenberg.org/*/,/https://github.com/programminghistorian/jekyll/(commits|blob)/*/,/\#/,/espanol/,/deprecated/,/collection.britishmuseum.org/,/analytics.hathitrust.org/,/fr.wikipedia.org/wiki/,https://web.archive.org/web/20180831094856/http://www.dlsi.ua.es/~borja/riilua/6.TopicModeling_v02.pdf" --allow-hash-href +# bundle exec jekyll build && htmlproofer _site --assume-extension --empty-alt-ignore --alt-ignore '/.*/' --file-ignore "/assets/,/retired/,/retirada/,/retrait/,/posts/" --timeframe '30d' --only-4xx --http-status-ignore 429,403,400 --url-ignore "/http://www.gutenberg.org/*/,/https://github.com/programminghistorian/jekyll/(commits|blob)/*/,/\#/,/espanol/,/deprecated/,/collection.britishmuseum.org/,/analytics.hathitrust.org/,/fr.wikipedia.org/wiki/,https://web.archive.org/web/20180831094856/http://www.dlsi.ua.es/~borja/riilua/6.TopicModeling_v02.pdf" --allow-hash-href + +bundle exec jekyll build && bundle exec htmlproofer _site \ + --assume-extension \ + --check-img-alt=false \ + --only-4xx \ + --http-status-ignore 429 403 400 \ + --url-ignore "/http://www.gutenberg.org/.*/" "/https://github.com/programminghistorian/.*/" "/https://github.com/orgs/programminghistorian/.*/" "/#/" "/espanol/" "/deprecated/" "/collection.britishmuseum.org/" "/analytics.hathitrust.org/" "/fr.wikipedia.org/wiki/" "/https://web.archive.org/web/20180831094856/http://www.dlsi.ua.es/~borja/riilua/6.TopicModeling_v02.pdf/" ## Updated version of htmlproofer commands (internal linking still doesn't work for some reason). # htmlproofer ./_site \ diff --git a/_data/snippets.yml b/_data/snippets.yml index 365018bad8..e4a25dec9d 100644 --- a/_data/snippets.yml +++ b/_data/snippets.yml @@ -209,13 +209,13 @@ menu-contribute-support-donate: link: /en/individual es: title: Apóyanos - Donaciones - link: /es/apoyanos#donaciones + link: /es/donaciones fr: title: Dons individuels - link: /fr/nous-soutenir#dons + link: /fr/dons pt: title: Apoie-nos - Doações - link: /pt/apoie-nos#doacoes + link: /pt/doacoes menu-lessons: en: title: Lessons diff --git a/_includes/lesson-index.html b/_includes/lesson-index.html index 2eedd4b9d9..40350ae6f0 100644 --- a/_includes/lesson-index.html +++ b/_includes/lesson-index.html @@ -86,13 +86,13 @@

{{ site.data.snippets.filtering-results[page.lang] }}: - - + + {% if page.lang != "en" %} - + {% endif %} diff --git a/pt/licoes/introducao-jupyter-notebooks.md b/pt/licoes/introducao-jupyter-notebooks.md index 8d821f891e..0f90e9b02e 100644 --- a/pt/licoes/introducao-jupyter-notebooks.md +++ b/pt/licoes/introducao-jupyter-notebooks.md @@ -99,7 +99,7 @@ Anaconda é uma distribuição gratuita de código aberto de Python e R que vem Para a maioria dos propósitos, você deve optar pela versão Python 3 do Anaconda, mas alguns códigos ainda podem ser escritos em Python 2. Nesta lição, você usará Python 3. O instalador Anaconda tem mais de 500 MB, e após a instalação pode levar mais de 3 GB de espaço no disco rígido, por isso certifique-se de que você tem espaço suficiente no computador e uma conexão de rede rápida antes de começar.
-Se o espaço do disco rígido é uma preocupação, você pode empacotar um notebook para que ele possa ser executado usando recursos gratuitos de computação em nuvem, em vez de fazer com que os usuários instalem o Anaconda. Isso pode ser especialmente útil em situações de oficina. Veja a seção abaixo. +Se o espaço do disco rígido é uma preocupação, você pode empacotar um notebook para que ele possa ser executado usando recursos gratuitos de computação em nuvem, em vez de fazer com que os usuários instalem o Anaconda. Isso pode ser especialmente útil em situações de oficina. Veja a seção abaixo.
Para baixar e instalar a Anaconda, acesse o [site da Anaconda](https://www.anaconda.com/data-science-platform). Certifique-se de ter clicado no ícone do seu sistema operacional (que deve alterar o texto Anaconda [número da versão] para [sistema operacional selecionado], de forma a indicar o seu sistema operacional) e, em seguida, clique no botão Baixar na caixa para a versão atual do Python 3. Se você estiver no Windows, deve baixar um ficheiro `.exe`; em Mac, é `.pkg`; no Linux, é `.sh`. From c23f18856aa24fe29e351f22881e8047a8787787 Mon Sep 17 00:00:00 2001 From: ZoeLeBlanc Date: Wed, 7 May 2025 13:50:52 -0500 Subject: [PATCH 4/7] created a pipeline for dealing with the 6k link errors --- .htmlproofer.yml | 5 +- .../obo-t17800628-33.html | 54 +- htmlproofer-output.txt | 26044 ++++++++++++++++ htmlproofer-report.csv | 6450 ++++ parse_htmlproofer_log.rb | 41 + 5 files changed, 32566 insertions(+), 28 deletions(-) create mode 100644 htmlproofer-output.txt create mode 100644 htmlproofer-report.csv create mode 100644 parse_htmlproofer_log.rb diff --git a/.htmlproofer.yml b/.htmlproofer.yml index ee52dadfa6..52ca5b0104 100644 --- a/.htmlproofer.yml +++ b/.htmlproofer.yml @@ -21,4 +21,7 @@ directory_ignore: - /retired/ - /retirada/ - /retrait/ - - /posts/ \ No newline at end of file + - /posts/ +ignore_elements: + - pre + - code \ No newline at end of file diff --git a/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html b/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html index b24ffcff1d..bf6eb1da0f 100644 --- a/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html +++ b/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html @@ -1,29 +1,29 @@ - - - - - - - - - - - - - - + + + + + + + + + + + + + + Browse - Central Criminal Court - + - + --> diff --git a/_includes/contact-info.html b/_includes/contact-info.html index 547f8fef79..4285b52918 100644 --- a/_includes/contact-info.html +++ b/_includes/contact-info.html @@ -49,10 +49,10 @@ {{ member.email }} {% endif %} {% if member.twitter %} - {{ member.twitter }} + {{ member.twitter }} {% endif %} {% if member.github %} - {{ member.github }} + {{ member.github }} {% endif %} {% if member.orcid %} {% include orcid.html author=member %} diff --git a/_includes/figure.html b/_includes/figure.html index eeec53ce09..9d60547990 100644 --- a/_includes/figure.html +++ b/_includes/figure.html @@ -1,5 +1,5 @@ {% comment %} -figure tags without plugin: http://stackoverflow.com/questions/19331362/using-an-image-caption-in-markdown-jekyll +figure tags without plugin: https://stackoverflow.com/questions/19331362/using-an-image-caption-in-markdown-jekyll If figure.html is being called from a lesson page, it collects the lesson slug from lesson-slug.html in order to compute the correct path to the image. Otherwise, it just appends the basepath and constructs the figure tag normally. diff --git a/_includes/lesson-index.html b/_includes/lesson-index.html index 40350ae6f0..721beac7dc 100644 --- a/_includes/lesson-index.html +++ b/_includes/lesson-index.html @@ -4,7 +4,7 @@ {% endcomment %}
- diff --git a/_posts/2012-06-27-welcome-to-ph2.md b/_posts/2012-06-27-welcome-to-ph2.md index 7785afc67e..56aae6b1a7 100644 --- a/_posts/2012-06-27-welcome-to-ph2.md +++ b/_posts/2012-06-27-welcome-to-ph2.md @@ -53,4 +53,4 @@ site. Tell us what confuses you and how we can improve the experience. In using this resource, you’re joining a community of scholar-programmers, and we couldn’t be happier to welcome you. - [original *Programming Historian*]: http://niche-canada.org/programming-historian + [original *Programming Historian*]: https://niche-canada.org/programming-historian diff --git a/_posts/2014-11-05-how-we-moved-to-github.md b/_posts/2014-11-05-how-we-moved-to-github.md index 0570b6282d..1ab773d845 100644 --- a/_posts/2014-11-05-how-we-moved-to-github.md +++ b/_posts/2014-11-05-how-we-moved-to-github.md @@ -6,11 +6,11 @@ layout: post categories: posts --- -Earlier this year, the editors of The Programming Historian decided to move the site from a Wordpress installation to a [static website](http://en.wikipedia.org/wiki/Static_web_page) hosted on [GitHub Pages](http://pages.github.com). This post is a brief overview of how we made the switch, using some of the same tools and computational methods featured in our lessons. +Earlier this year, the editors of The Programming Historian decided to move the site from a Wordpress installation to a [static website](https://en.wikipedia.org/wiki/Static_web_page) hosted on [GitHub Pages](https://pages.github.com). This post is a brief overview of how we made the switch, using some of the same tools and computational methods featured in our lessons. I'm going to focus on how we converted the HTML pages generated by our Wordpress site into Markdown files that were ready to be deployed on GitHub. In the process, I'll show how it's possible to build on a series of Programming Historian lessons to solve new problems. Be aware, however, that this post will be slightly more technical than our usual lessons; it may be most beneficial for readers who are already comfortable using command line tools like Pandoc and are contemplating a similar conversion for their Wordpress website. -Our new website uses a publishing platform called [Jekyll](http://jekyllrb.com) to turn a repository of files written in [Markdown](https://help.github.com/articles/markdown-basics) into an HTML website. In the case of the Programming Historian, Jekyll uses [this repository](https://github.com/programminghistorian/jekyll) to generate [this website](). Lessons that look like [this](https://raw.githubusercontent.com/programminghistorian/jekyll/gh-pages/en/lessons/data-mining-the-internet-archive.md) are converted by Jekyll into lessons that look like [that](/lessons/data-mining-the-internet-archive). +Our new website uses a publishing platform called [Jekyll](https://jekyllrb.com) to turn a repository of files written in [Markdown](https://help.github.com/articles/markdown-basics) into an HTML website. In the case of the Programming Historian, Jekyll uses [this repository](https://github.com/programminghistorian/jekyll) to generate [this website](). Lessons that look like [this](https://raw.githubusercontent.com/programminghistorian/jekyll/gh-pages/en/lessons/data-mining-the-internet-archive.md) are converted by Jekyll into lessons that look like [that](/lessons/data-mining-the-internet-archive). Thanks to the power of Jekyll, generating our new website was easy once all of our lessons and pages were formatted correctly in Markdown. Our challenge was to get all of the HTML pages from the Wordpress site and convert them into Markdown that Jekyll could understand. This was a multi-stage process made easier by tools like Wget, Pandoc, and Python. @@ -22,7 +22,7 @@ Our first step was to get HTML versions of all the pages and lessons on our old For the next step---the conversion of these HTML files to Markdown---we decided to use [Pandoc](https://pandoc.org/), a powerful tool described by Dennis Tenen and Grant Wythoff in [Sustainable Authorship in Plain Text using Pandoc and Markdown](/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown). -That lesson focuses on using Pandoc to convert from Markdown into other formats, but Pandoc is also able to turn HTML to Markdown, which is what we wanted to do. It can even locate metadata in the HTML, such as the author, title, and date, and convert it into a [YAML metadata block](http://jekyllrb.com/docs/frontmatter/) in the Markdown output that Jekyll will recognize. +That lesson focuses on using Pandoc to convert from Markdown into other formats, but Pandoc is also able to turn HTML to Markdown, which is what we wanted to do. It can even locate metadata in the HTML, such as the author, title, and date, and convert it into a [YAML metadata block](https://jekyllrb.com/docs/frontmatter/) in the Markdown output that Jekyll will recognize. But Pandoc needs some help to do this. For example, it expects to find metadata in `` tags that look like this: @@ -84,14 +84,14 @@ But in our case, the conversion was not quite so simple. Consider what happens i Lesson Goals ------------ - The collections of the [Internet Archive](http://archive.org/) (IA) + The collections of the [Internet Archive](https://archive.org/) (IA) include many digitized sources of interest to historians, including [early JSTOR journal content](https://archive.org/details/jstor_ejc), [John Adams's personal library](https://archive.org/details/johnadamsBPL), and the [Haiti collection](https://archive.org/details/jcbhaiti) at the John Carter Brown Library. In short, to quote Programming Historian [Ian - Milligan](http://activehistory.ca/2013/09/the-internet-archive-rocks-or-two-million-plus-free-sources-to-explore/), + Milligan](https://activehistory.ca/2013/09/the-internet-archive-rocks-or-two-million-plus-free-sources-to-explore/), "The Internet Archive rocks." In this lesson, you'll learn how to download files from such collections @@ -102,7 +102,7 @@ But in our case, the conversion was not quite so simple. Consider what happens i For demonstration purposes, this lesson will focus on working with the digitized version of the [Anti-Slavery - Collection](http://archive.org/details/bplscas) at the Boston Public + Collection](https://archive.org/details/bplscas) at the Boston Public Library in Copley Square. We will first download a large collection of MARC records from this collection, and then use Python to retrieve and analyze bibliographic information about items in the collection. For @@ -203,14 +203,14 @@ When we do, the start of our Markdown output should now look like this: Lesson Goals ------------ - The collections of the [Internet Archive](http://archive.org/) (IA) + The collections of the [Internet Archive](https://archive.org/) (IA) include many digitized sources of interest to historians, including [early JSTOR journal content](https://archive.org/details/jstor_ejc), [John Adams's personal library](https://archive.org/details/johnadamsBPL), and the [Haiti collection](https://archive.org/details/jcbhaiti) at the John Carter Brown Library. In short, to quote Programming Historian [Ian - Milligan](http://activehistory.ca/2013/09/the-internet-archive-rocks-or-two-million-plus-free-sources-to-explore/), + Milligan](https://activehistory.ca/2013/09/the-internet-archive-rocks-or-two-million-plus-free-sources-to-explore/), "The Internet Archive rocks." In this lesson, you'll learn how to download files from such collections @@ -221,7 +221,7 @@ When we do, the start of our Markdown output should now look like this: For demonstration purposes, this lesson will focus on working with the digitized version of the [Anti-Slavery - Collection](http://archive.org/details/bplscas) at the Boston Public + Collection](https://archive.org/details/bplscas) at the Boston Public Library in Copley Square. We will first download a large collection of MARC records from this collection, and then use Python to retrieve and analyze bibliographic information about items in the collection. For @@ -269,7 +269,7 @@ When we do, the start of our Markdown output should now look like this: Now you are ready to go to work! -Notice that our metadata is now inserted in the output as a [Jekyll metadata block](http://jekyllrb.com/docs/frontmatter/). Hooray! +Notice that our metadata is now inserted in the output as a [Jekyll metadata block](https://jekyllrb.com/docs/frontmatter/). Hooray! ### Converting Code Block Syntax @@ -301,14 +301,14 @@ Run that command on the same [modified HTML file](https://github.com/programming Lesson Goals ------------ - The collections of the [Internet Archive](http://archive.org/) (IA) + The collections of the [Internet Archive](https://archive.org/) (IA) include many digitized sources of interest to historians, including [early JSTOR journal content](https://archive.org/details/jstor_ejc), [John Adams’s personal library](https://archive.org/details/johnadamsBPL), and the [Haiti collection](https://archive.org/details/jcbhaiti) at the John Carter Brown Library. In short, to quote Programming Historian [Ian - Milligan](http://activehistory.ca/2013/09/the-internet-archive-rocks-or-two-million-plus-free-sources-to-explore/), + Milligan](https://activehistory.ca/2013/09/the-internet-archive-rocks-or-two-million-plus-free-sources-to-explore/), “The Internet Archive rocks.” In this lesson, you’ll learn how to download files from such collections @@ -319,7 +319,7 @@ Run that command on the same [modified HTML file](https://github.com/programming For demonstration purposes, this lesson will focus on working with the digitized version of the [Anti-Slavery - Collection](http://archive.org/details/bplscas) at the Boston Public + Collection](https://archive.org/details/bplscas) at the Boston Public Library in Copley Square. We will first download a large collection of MARC records from this collection, and then use Python to retrieve and analyze bibliographic information about items in the collection. For diff --git a/_posts/2015-06-02-programming-historian-live-london.md b/_posts/2015-06-02-programming-historian-live-london.md index f174b66294..892d7596d7 100644 --- a/_posts/2015-06-02-programming-historian-live-london.md +++ b/_posts/2015-06-02-programming-historian-live-london.md @@ -16,6 +16,6 @@ The provisional schedule will include introductions to: - Corpus Analysis (AntConc, Shell) - Web Scrapping (Wget) -Places are limited and can be booked on [Eventbrite](http://proghistlive.eventbrite.co.uk). Please note that attendees will be required to bring their own laptop. Questions and queries should be directed to James Baker at drjameswbaker@gmail.com. +Places are limited and can be booked on [Eventbrite](https://proghistlive.eventbrite.co.uk). Please note that attendees will be required to bring their own laptop. Questions and queries should be directed to James Baker at drjameswbaker@gmail.com. -Programming Historian Live is funded by the [Software Sustainability Institute](http://www.software.ac.uk/), and is supported by the [British Library](http://www.bl.uk/), the [Institute of Historical Research](http://www.history.ac.uk/), and [The Programming Historian](/). +Programming Historian Live is funded by the [Software Sustainability Institute](https://www.software.ac.uk/), and is supported by the [British Library](https://www.bl.uk/), the [Institute of Historical Research](https://www.history.ac.uk/), and [The Programming Historian](/). diff --git a/_posts/2016-03-28-reintroducing-the-ph-blog.md b/_posts/2016-03-28-reintroducing-the-ph-blog.md index eb8fb8c652..34331af93b 100644 --- a/_posts/2016-03-28-reintroducing-the-ph-blog.md +++ b/_posts/2016-03-28-reintroducing-the-ph-blog.md @@ -13,7 +13,7 @@ Currently, *The Programming Historian'*s content focuses on lessons that explain While individuals who are already steeped in the digital humanities may find themselves on PH because they already know they'd like to learn a particular skill, those who are just learning about the digital humanities might be less sure about how or why PH could be a valuable resource for them—particularly those who do not self-identify as "programmers." The revitalized PH blog seeks to bridge the gap between script writing, data mining, code wrangling scholars, and scholars who are just beginning to learn about the different ways they can use digital methods to conduct their research, present their research to wider audiences, and inform their teaching. -In keeping with the spirit of introducing the revitalized PH blog, I'd like to introduce myself and say how thrilled I am to have recently joined the PH editorial board. My name is Evan Taparata, and I am a doctoral candidate in History at the University of Minnesota, where I am writing a dissertation about the history of refugee law and policy in the United States over the long nineteenth century. Before joining PH, I've explored my interest in the intersection of public history and digital humanities by working with my UMN colleagues on two [Humanities Action Lab](http://humanitiesactionlab.org) initiatives: the ["Guantánamo Public Memory Project,"](http://gitmomemory.org) which examines the history of the U.S. naval base at Guantánamo Bay, Cuba, and "States of Incarceration"—launching in April 2016—which explores the past, present, and future of mass incarceration in the United States. I am interested in cultivating the PH blog as a resource for educators who would like to learn more about using digital humanities methods and tools in the classroom, and I am especially invested in using the blog to explore how digital methods can help students do innovative and original historical research that reaches beyond academia and engages public audiences. I am very excited to work toward achieving this goal with the support of my fellow PH editorial board members. +In keeping with the spirit of introducing the revitalized PH blog, I'd like to introduce myself and say how thrilled I am to have recently joined the PH editorial board. My name is Evan Taparata, and I am a doctoral candidate in History at the University of Minnesota, where I am writing a dissertation about the history of refugee law and policy in the United States over the long nineteenth century. Before joining PH, I've explored my interest in the intersection of public history and digital humanities by working with my UMN colleagues on two [Humanities Action Lab](https://humanitiesactionlab.org) initiatives: the ["Guantánamo Public Memory Project,"](https://gitmomemory.org) which examines the history of the U.S. naval base at Guantánamo Bay, Cuba, and "States of Incarceration"—launching in April 2016—which explores the past, present, and future of mass incarceration in the United States. I am interested in cultivating the PH blog as a resource for educators who would like to learn more about using digital humanities methods and tools in the classroom, and I am especially invested in using the blog to explore how digital methods can help students do innovative and original historical research that reaches beyond academia and engages public audiences. I am very excited to work toward achieving this goal with the support of my fellow PH editorial board members. ## What Will Be the Focus of the Blog? diff --git a/_posts/2016-05-10-PH-commitment-to-diversity.md b/_posts/2016-05-10-PH-commitment-to-diversity.md index 161734450c..ce0cf58d62 100644 --- a/_posts/2016-05-10-PH-commitment-to-diversity.md +++ b/_posts/2016-05-10-PH-commitment-to-diversity.md @@ -10,7 +10,7 @@ If you spend too much time inside a project, you soon become unable to see its f Since 2012 we have published 46 tutorials to help people learn new ways that technology can aid them in their research processes. We're proud to say that almost two-hundred thousand unique users around the world have made use of our resources. But we are not so proud to admit that amongst our contributing authors, we're predominantly male: Only 7 women and 23 men. We're also predominantly white and North American -- another fact we're not proud of. -Of course, that hadn't been planned. We weren't doing anything intentionally to dissuade women, people of color, or people from other nations from contributing. But we do know that a number of [well-documented](http://www.aauw.org/research/why-so-few/) [institutional factors](https://web.archive.org/web/20160318111901/http://www.codedoc.co/about/) within the culture of technology and programming work against marginalized groups' participation in technical forums. So, as a start, we began to address these questions of inclusivity by opening up a [discussion on our message board](https://github.com/programminghistorian/jekyll/issues/152), and later an [anonymous survey](https://www.surveymonkey.co.uk/r/SFSRHHD) to ask why our gender numbers are so imbalanced. Special thanks to Heather Froehlich for helping us build our survey. We've since collected those results and wanted to share with you both what people said, and what we plan to do about it. +Of course, that hadn't been planned. We weren't doing anything intentionally to dissuade women, people of color, or people from other nations from contributing. But we do know that a number of [well-documented](https://www.aauw.org/research/why-so-few/) [institutional factors](https://web.archive.org/web/20160318111901/https://www.codedoc.co/about/) within the culture of technology and programming work against marginalized groups' participation in technical forums. So, as a start, we began to address these questions of inclusivity by opening up a [discussion on our message board](https://github.com/programminghistorian/jekyll/issues/152), and later an [anonymous survey](https://www.surveymonkey.co.uk/r/SFSRHHD) to ask why our gender numbers are so imbalanced. Special thanks to Heather Froehlich for helping us build our survey. We've since collected those results and wanted to share with you both what people said, and what we plan to do about it. Our survey received 47 responses (49% women), in addition to 58 comments on our message board. The respondents, of course, are self-selecting, but we thought they had important things to say. @@ -72,13 +72,13 @@ Phew, thanks! That's a lot of ideas! We'll be doing our best to chip away at the ### Submission System (Technical) -It's clear that for many people our submission system is a barrier to entry. We chose to use a combination of Markdown and Github pull requests because it was free and was meant to make life easy for editors (who are volunteers). We were quoted £4,000 per year for a commercial option and hoped this free option would work. We do recognize, however, that women in particular have good reasons for hesitating to involve themselves in platforms for producing and distributing digital content that, until recently, have been predominantly composed of men and that [inadvertently favored the contributions of men](http://web.archive.org/web/20160507170231/http://www.usnews.com/news/blogs/data-mine/2016/02/18/study-shows-women-are-better-coders-but-only-when-gender-is-hidden). +It's clear that for many people our submission system is a barrier to entry. We chose to use a combination of Markdown and Github pull requests because it was free and was meant to make life easy for editors (who are volunteers). We were quoted £4,000 per year for a commercial option and hoped this free option would work. We do recognize, however, that women in particular have good reasons for hesitating to involve themselves in platforms for producing and distributing digital content that, until recently, have been predominantly composed of men and that [inadvertently favored the contributions of men](https://web.archive.org/web/20160507170231/https://www.usnews.com/news/blogs/data-mine/2016/02/18/study-shows-women-are-better-coders-but-only-when-gender-is-hidden). To address this, our website editor, Caleb McDaniel, is currently experimenting with options that we hope will make this easier. Stay tuned to the blog for updates on our continued efforts to make submitting a lesson as accessible as possible while maintaining the zero cost and low maintenance requirements for our team. ### Outreach -We'd love to reach more people. So we've brought on a new member to the editorial team, Evan Taparata, who will be working closely with our editorial board to restart our blog, recruit a wider array of voices to PH, and expand our audiences. For example, we are interested in including the perspectives of feminist, POC, LGBTQ, and women-friendly programming and technology groups, such as [PyLadies](http://www.pyladies.com/) and [FemTechNet](http://femtechnet.org/). Watch this space, and [email Evan](mailto:tapar001@umn.edu) if you know of potential contributors or collaborators. +We'd love to reach more people. So we've brought on a new member to the editorial team, Evan Taparata, who will be working closely with our editorial board to restart our blog, recruit a wider array of voices to PH, and expand our audiences. For example, we are interested in including the perspectives of feminist, POC, LGBTQ, and women-friendly programming and technology groups, such as [PyLadies](https://www.pyladies.com/) and [FemTechNet](https://femtechnet.org/). Watch this space, and [email Evan](mailto:tapar001@umn.edu) if you know of potential contributors or collaborators. ### Gender Balance and Submission (Social) diff --git a/_posts/2016-06-10-adding-to-library-catalogue.md b/_posts/2016-06-10-adding-to-library-catalogue.md index acc1496f2d..203327258c 100644 --- a/_posts/2016-06-10-adding-to-library-catalogue.md +++ b/_posts/2016-06-10-adding-to-library-catalogue.md @@ -8,15 +8,15 @@ categories: posts One of the suggested ways to help *The Progamming Historian* (PH) out in Adam Crymble's recent post, ["*The Progamming Historian*'s Commitment to Diversity"](/posts/PH-commitment-to-diversity), was adding PH to your library's catalogue. Doing so not only helps legitimize the efforts of PH's authors as the scholarship it is, it also increases public access to a strong, free resource for exploring the digital humanities (DH). By listing PH in library catalogues, we can help anyone using library search engines to seek DH knowledge find PH and have know it's been vetted by librarians as a trustworthy resource. -I recently worked with colleagues in the cataloguing and metadata services units at the Purdue University Libraries to add PH to our library catalogue [permalink](http://purdue-primo-prod.hosted.exlibrisgroup.com/PURDUE:everything:PURDUE_ALMA51671812890001081). Our criteria for adding catalogue resources listed an OCLC number as a preference, so because PH didn't yet have one, we went ahead and created it: 951537099. This means PH is also now [listed in WorldCat](http://www.worldcat.org/oclc/951537099). (Note that while PH can be added to as many library catalogues as users wish, we only needed one OCLC number creation and WorldCat record—no need to create another!) +I recently worked with colleagues in the cataloguing and metadata services units at the Purdue University Libraries to add PH to our library catalogue [permalink](https://purdue-primo-prod.hosted.exlibrisgroup.com/PURDUE:everything:PURDUE_ALMA51671812890001081). Our criteria for adding catalogue resources listed an OCLC number as a preference, so because PH didn't yet have one, we went ahead and created it: 951537099. This means PH is also now [listed in WorldCat](https://www.worldcat.org/oclc/951537099). (Note that while PH can be added to as many library catalogues as users wish, we only needed one OCLC number creation and WorldCat record—no need to create another!) -If you'd like to help PH's accessibility and trustworthiness by adding it to your school, local, or other library catalogue, starting by contacting either a humanities subject librarian or a digital "something" librarian (humanities, initiatives, scholarship...) is a good first step. You're welcome to use the email template shared below (drawn from ["About The Programming Historian"](/)) to make this even simpler. If you add PH to your catalogue, let PH know! Tweet [@ProgHist](https://twitter.com/proghist) or use any of the otehr contact info on PH's [feedback page](/feedback). Or if you have any questions about adding *The Progamming Historian* to your library's catalogue, feel free to tweet [@Literature_Geek](http://www.twitter.com/Literature_Geek) or email me at aviscont@purdue.edu. +If you'd like to help PH's accessibility and trustworthiness by adding it to your school, local, or other library catalogue, starting by contacting either a humanities subject librarian or a digital "something" librarian (humanities, initiatives, scholarship...) is a good first step. You're welcome to use the email template shared below (drawn from ["About The Programming Historian"](/)) to make this even simpler. If you add PH to your catalogue, let PH know! Tweet [@ProgHist](https://twitter.com/proghist) or use any of the otehr contact info on PH's [feedback page](/feedback). Or if you have any questions about adding *The Progamming Historian* to your library's catalogue, feel free to tweet [@Literature_Geek](https://www.twitter.com/Literature_Geek) or email me at aviscont@purdue.edu. ## Information to share with library cataloguers I'm interested in adding a peer-reviewed digital humanities resource to our library catalogue. Here's some information to help you assess this resource: *The Programming Historian* is a respected digital resource for free, peer-reviewed, community-authored guides to digital humanities methodologies (not just history, despite the title). It aims to help researchers at all levels of higher education learn a wide range of digital tools, techniques, and workflows to facilitate their humanities research; importantly, its tutorials offer not just the steps to use a given tool or method, but also examples drawn from humanities research of a tool's potential use and usefulness. It is actively maintained and releasing new tutorials. -*The Programming Historian* is [listed in WorldCat](http://www.worldcat.org/oclc/951537099) as well as by libraries including the [Purdue University Libraries](http://purdue-primo-prod.hosted.exlibrisgroup.com/PURDUE:everything:PURDUE_ALMA51671812890001081). +*The Programming Historian* is [listed in WorldCat](https://www.worldcat.org/oclc/951537099) as well as by libraries including the [Purdue University Libraries](https://purdue-primo-prod.hosted.exlibrisgroup.com/PURDUE:everything:PURDUE_ALMA51671812890001081). It is **free of charge** with no additional registration or specialized software required. **Open Source**: "*The Progamming Historian* is committed to open source and open access principles. All contributed lessons must make use of open source programming languages and open source software whenever possible. This policy is meant to minimize costs for all parties, and to allow the greatest possible level of participation. We believe everyone should be able to benefit from these tutorials, not just those with large research budgets for expensive proprietary software." @@ -25,4 +25,4 @@ It is **free of charge** with no additional registration or specialized software **Funding & Ownership**: "*The Progamming Historian* is a volunteer-led initiative, controlled entirely by the ‘Editorial Board of the Programming Historian’ with the help of community contributors. It is not a legal entity, and does not currently receive direct funding from any source." -**Suggested citation**: Crymble, Adam, Fred Gibbs, Allison Hegel, Caleb McDaniel, Ian Milligan, Evan Taparata, and Jeri Wieringa, eds. *The Progamming Historian*. 2nd ed., 2016. http://programminghistorian.org/. +**Suggested citation**: Crymble, Adam, Fred Gibbs, Allison Hegel, Caleb McDaniel, Ian Milligan, Evan Taparata, and Jeri Wieringa, eds. *The Progamming Historian*. 2nd ed., 2016. https://programminghistorian.org/. diff --git a/_posts/2016-07-05-spanish-editor.md b/_posts/2016-07-05-spanish-editor.md index ba4921b230..d66353753d 100644 --- a/_posts/2016-07-05-spanish-editor.md +++ b/_posts/2016-07-05-spanish-editor.md @@ -16,7 +16,7 @@ Adam Crymble (University of Hertfordshire) will offer support on the practices o Launched in 2012, The Programming Historian offers more than 45 novice-friendly, peer-reviewed tutorials that help humanists learn a wide range of digital tools, techniques, and workflows to facilitate their research. The Programming Historian is a volunteer-led initiative, controlled entirely by the ‘Editorial Board of the Programming Historian’ with the help of community contributors. It is not a legal entity, and does not currently receive direct funding from any source. Read our reviews: * Lincoln Mullen, '[Review of the Programming Historian](https://academic.oup.com/jah/article/103/1/299/1751315)', The Journal of American History, vol. 103, no. 1 (2016), pp. 299-301. -* Cameron Blevins, '[Review of the Programming Historian](http://jitp.commons.gc.cuny.edu/review-of-the-programming-historian/)', The Journal of Interactive Technology & Pedagogy, vol. 8 (2015) +* Cameron Blevins, '[Review of the Programming Historian](https://jitp.commons.gc.cuny.edu/review-of-the-programming-historian/)', The Journal of Interactive Technology & Pedagogy, vol. 8 (2015) Interested candidates should submit a 1-page expression of interest outlining your interests, experience, and vision for the role, to Adam Crymble (adam.crymble@gmail.com) by 31 July 2016. Please direct any questions to Adam in the first instance. diff --git a/_posts/2016-07-20-digital-storytelling-immigrant-stories.md b/_posts/2016-07-20-digital-storytelling-immigrant-stories.md index 23a65bca77..f64f21377a 100644 --- a/_posts/2016-07-20-digital-storytelling-immigrant-stories.md +++ b/_posts/2016-07-20-digital-storytelling-immigrant-stories.md @@ -8,15 +8,15 @@ categories: posts So you’re interested in the digital humanities. You’re considering a new skill or tool, maybe through a [lesson here at the Programming Historian](/lessons/). But your research involves working with individuals and the stories they tell, rather than abstract data. Is there a place for you in the digital humanities?  -Yes! Digital storytelling would be a natural fit. The [Immigrant Stories](http://cla.umn.edu/ihrc/research/immigrant-stories) project provides a model for digital storytelling as a research tool. In 2013, the University of Minnesota’s Immigration History Research Center (IHRC) began Immigrant Stories to collect contemporary migration stories through digital storytelling and preserve them in the IHRC Archives. Our growing collection of digital stories is available to researchers. Our training materials allow a novice to teach others to make digital stories, and to make digital stories themselves. +Yes! Digital storytelling would be a natural fit. The [Immigrant Stories](https://cla.umn.edu/ihrc/research/immigrant-stories) project provides a model for digital storytelling as a research tool. In 2013, the University of Minnesota’s Immigration History Research Center (IHRC) began Immigrant Stories to collect contemporary migration stories through digital storytelling and preserve them in the IHRC Archives. Our growing collection of digital stories is available to researchers. Our training materials allow a novice to teach others to make digital stories, and to make digital stories themselves. -Digital storytelling is a way of sharing personal narratives through brief digital videos. The process is simple. You begin by writing a 300-500 word story about a personal experience. Next, you produce a voiceover by making an audio recording of yourself reading your story. Then you finish your video by adding images to your narration in a video editing program. A digital story can be quite sophisticated and include video clips, background music, and special effects. But there’s no need for camera crews or retraining as a film editor. A powerful digital story comes from a compelling story and images. [Saengmany Ratsabout’s Immigrant Story](http://immigrants.mndigital.org/exhibits/show/immigrantstories-exhibit/item/508) about his family’s experience as refugees is a great example. +Digital storytelling is a way of sharing personal narratives through brief digital videos. The process is simple. You begin by writing a 300-500 word story about a personal experience. Next, you produce a voiceover by making an audio recording of yourself reading your story. Then you finish your video by adding images to your narration in a video editing program. A digital story can be quite sophisticated and include video clips, background music, and special effects. But there’s no need for camera crews or retraining as a film editor. A powerful digital story comes from a compelling story and images. [Saengmany Ratsabout’s Immigrant Story](https://immigrants.mndigital.org/exhibits/show/immigrantstories-exhibit/item/508) about his family’s experience as refugees is a great example. Immigrant Stories teaches participants to make a digital story about a personal or family migration experience. Digital storytelling allows the participant to choose their story’s form and content rather than respond to a researcher's questions. Our digital stories include first-person accounts of migration, reflections on ethnic and racial identity, and descriptions of transnational families and labor. Participants come from more than 45 ethnic groups and include diverse types of migrants, such as refugees, international students, and transnational adoptees. These digital stories also raise interesting questions about how people craft personal narratives and how we document and share history in a digital age. -The entire Immigrant Stories collection is publicly available to researchers worldwide. When a participant chooses to share their story with us, we post their digital story, transcript, and metadata online through the [Minnesota Digital Library](http://immigrants.mndigital.org/exhibits/show/immigrantstories-exhibit) and the [Digital Public Library of America](https://dp.la/). To date, the [Immigrant Stories collection](http://immigrants.mndigital.org/items/browse) contains more than 200 digital stories.  +The entire Immigrant Stories collection is publicly available to researchers worldwide. When a participant chooses to share their story with us, we post their digital story, transcript, and metadata online through the [Minnesota Digital Library](https://immigrants.mndigital.org/exhibits/show/immigrantstories-exhibit) and the [Digital Public Library of America](https://dp.la/). To date, the [Immigrant Stories collection](https://immigrants.mndigital.org/items/browse) contains more than 200 digital stories.  -Hosting our collection online presents possibilities for utilizing the collection in combination with other digital humanities tools. We share all Immigrant Stories under a [Creative Commons license](https://creativecommons.org/licenses/by-nc/4.0/) so they may be included in digital exhibits [created with Omeka](/lessons/). We’ve already created such an exhibit, [Immigrant Stories: 40 Years Of Southeast Asian Stories](http://immigrants.mndigital.org/exhibits/show/immigrantstories1975). You could add some of our digital stories to an exhibit highlighting your own research. Immigrant Stories transcripts can be utilized for data mining--which you can learn about in lessons right here at [the Programming Historian](/lessons/). +Hosting our collection online presents possibilities for utilizing the collection in combination with other digital humanities tools. We share all Immigrant Stories under a [Creative Commons license](https://creativecommons.org/licenses/by-nc/4.0/) so they may be included in digital exhibits [created with Omeka](/lessons/). We’ve already created such an exhibit, [Immigrant Stories: 40 Years Of Southeast Asian Stories](https://immigrants.mndigital.org/exhibits/show/immigrantstories1975). You could add some of our digital stories to an exhibit highlighting your own research. Immigrant Stories transcripts can be utilized for data mining--which you can learn about in lessons right here at [the Programming Historian](/lessons/). You can train others to create digital stories with our simple digital storytelling training and participate in the project, even if you’ve never made a video yourself. Download one of the  Immigrant Stories toolkits, our comprehensive guides to teaching digital storytelling in a class or a two-day workshop. Each toolkit provides step-by-step instructions for writing a migration-related script, selecting images, recording a voiceover, and editing a video. We include just enough technical instructions to complete each step without overwhelming detail. Anyone who makes a video is welcome to contribute their digital story to the Immigrant Stories collection. We’ve included instructions and donation forms in the toolkits.   diff --git a/_posts/2016-08-22-announcing-new-team-spanish-language-editors.md b/_posts/2016-08-22-announcing-new-team-spanish-language-editors.md index 58d348420e..4a100dc729 100644 --- a/_posts/2016-08-22-announcing-new-team-spanish-language-editors.md +++ b/_posts/2016-08-22-announcing-new-team-spanish-language-editors.md @@ -18,6 +18,6 @@ We are now very excited to announce that Maria José Afanador-Llach, Víctor Gay [Víctor Gayol](https://twitter.com/victor_gayol) is a researcher and professor at El Colegio de Michoacán, A.C. (CPI-CONACYT), México, with a PhD in History. He always encourages his graduate students to use computer resources as widely as possible in their doctoral research. He is interested in working with authors to improve and disseminate the possibilities of computer assisted research among Spanish-speaking scholars in History, and to build a network to explore the possibilities and limitations of big data analysis in Spanish and Hispanic-American archival repositories. -[Antonio Rojas Castro](https://twitter.com/RojasCastroA) holds a PhD in Humanities from the Universitat Pompeu Fabra in Barcelona, Spain. As part of his dissertation he built a digital critical edition of Luis de Góngora's Solitudes (currently in beta). In 2013 he created the [Grupo de Humanidades Digitales de Zotero](https://www.zotero.org/groups/humanidades_digitales), and he is currently the Communication Coordinator for the [European Association for Digital Humanities (EADH)](http://eadh.org/). +[Antonio Rojas Castro](https://twitter.com/RojasCastroA) holds a PhD in Humanities from the Universitat Pompeu Fabra in Barcelona, Spain. As part of his dissertation he built a digital critical edition of Luis de Góngora's Solitudes (currently in beta). In 2013 he created the [Grupo de Humanidades Digitales de Zotero](https://www.zotero.org/groups/humanidades_digitales), and he is currently the Communication Coordinator for the [European Association for Digital Humanities (EADH)](https://eadh.org/). Please join us in welcoming Maria, Víctor, and Antonio! You can say hello to them on Twitter by tweeting [@ProgHist](https://twitter.com/proghist), [@mariajoafana](https://twitter.com/mariajoafana), [@Victor_Gayol](https://twitter.com/victor_gayol), and [@RojasCastroA](https://twitter.com/RojasCastroA). And of course, please stay tuned to [The Programming Historian](/) for their contributions. diff --git a/_posts/2016-08-25-presentando-al-nuevo-equipo-de-editores-de-contenidos-en-espanol.md b/_posts/2016-08-25-presentando-al-nuevo-equipo-de-editores-de-contenidos-en-espanol.md index e5ab375813..167e761c6a 100644 --- a/_posts/2016-08-25-presentando-al-nuevo-equipo-de-editores-de-contenidos-en-espanol.md +++ b/_posts/2016-08-25-presentando-al-nuevo-equipo-de-editores-de-contenidos-en-espanol.md @@ -14,11 +14,11 @@ Recibimos muchas aplicaciones excelentes y fue difícil escoger un solo editor. Nos emociona mucho anunciar que Maria José Afanador-Llach, Víctor Gayol y Antonio Rojas Castro han aceptado la invitación. Sin más preámbulos, les presentamos a los nuevos miembros del comité editorial de PH: -[Maria José Afanador-Llach](https://twitter.com/mariajoafana) obtuvo recientemente un doctorado en Historia de la Universidad de Texas en Austin. Actualmente está colaborando con colegas en la [Fundación Histórica Neogranadina](http://neogranadina.org/), una organización sin ánimo de lucro que está digitalizando archivos coloniales en peligro y promoviendo proyectos de humanidades digitales. Tiene amplia experiencia traduciendo escritos académicos del inglés al español y está comprometida con cultivar iniciativas de investigación digital en América Latina. +[Maria José Afanador-Llach](https://twitter.com/mariajoafana) obtuvo recientemente un doctorado en Historia de la Universidad de Texas en Austin. Actualmente está colaborando con colegas en la [Fundación Histórica Neogranadina](https://neogranadina.org/), una organización sin ánimo de lucro que está digitalizando archivos coloniales en peligro y promoviendo proyectos de humanidades digitales. Tiene amplia experiencia traduciendo escritos académicos del inglés al español y está comprometida con cultivar iniciativas de investigación digital en América Latina. [Víctor Gayol](https://twitter.com/victor_gayol) es investigador y profesor de El Colegio de Michoacán, A.C (CPI-CONACYT), México, con un doctorado en Historia. Víctor siempre incentiva a sus estudiantes para que utilicen recursos informáticos tan ampliamente como sea posible en sus investigaciones doctorales. Le interesa trabajar con autores para mejorar y diseminar las posibilidades de investigación asistida por computadores entre los historiadores de habla hispana y construir una red para explorar las posibilidades y limitaciones del análisis con big data en repositorios archivísticos en Hispanoamérica. -[Antonio Rojas Castro](https://twitter.com/RojasCastroA) tiene un doctorado en Humanidades de la Universitat Pompeu Fabra en Barcelona, España. Como parte de su disertación construyó una edición digital crítica de las Soledades de Luis de Góngora (actualmente en versión beta). En 2013 creó el [Grupo de Humanidades Digitales de Zotero](https://www.zotero.org/groups/humanidades_digitales), y actualmente es el Coordinador de Comunicaciones de la [European Association for Digital Humanities (EADH)](http://eadh.org/). +[Antonio Rojas Castro](https://twitter.com/RojasCastroA) tiene un doctorado en Humanidades de la Universitat Pompeu Fabra en Barcelona, España. Como parte de su disertación construyó una edición digital crítica de las Soledades de Luis de Góngora (actualmente en versión beta). En 2013 creó el [Grupo de Humanidades Digitales de Zotero](https://www.zotero.org/groups/humanidades_digitales), y actualmente es el Coordinador de Comunicaciones de la [European Association for Digital Humanities (EADH)](https://eadh.org/). Por favor acompáñenos para darles la bienvenida a Maria, Víctor y Antonio! Los pueden saludar en Twitter tuiteando [@ProgHist](https://twitter.com/proghist), [@mariajoafana](https://twitter.com/mariajoafana), [@Victor_Gayol](https://twitter.com/victor_gayol), y [@RojasCastroA](https://twitter.com/RojasCastroA). Y por supuesto, manténganse en sintonía con [The Programming Historian](/) para ver sus contribuciones. diff --git a/_posts/2016-09-19-distant-reading-in-the-undergraduate-classroom.md b/_posts/2016-09-19-distant-reading-in-the-undergraduate-classroom.md index 2a64e3a2f9..bc1830ce8d 100644 --- a/_posts/2016-09-19-distant-reading-in-the-undergraduate-classroom.md +++ b/_posts/2016-09-19-distant-reading-in-the-undergraduate-classroom.md @@ -16,9 +16,9 @@ What might this look like in the classroom? Here’s an example from one of my o 1892 to 1924 was a foundational era in the history of American immigration law and policy. 1892 was the first year when the regulation of immigration in the U.S. was brought under the supervision of a single federal office. In 1924, Congress passed the Johnson-Reed Act, a law that for the first time in American history limited immigration via numerical quotas based on national origin. Although the Johnson-Reed Act is known as one of the most exclusionary immigration laws of its time, it was hardly the first law in American history to restrict migration to the United States. The Page Act of 1875 and the Chinese Exclusion Act of 1882 paved the way for a tide of legislation that excluded many categories of immigrants from entering the country according to race, class, ability, sexuality, gender, and political affiliation. -There are many avenues for researching the evolution of exclusion in American immigration law. But one of the most accessible ways to do so is through the annual reports of immigration that the U.S. federal government published at the turn of the twentieth century. These annual reports are digitized and accessible to researchers and students through databases like [HathiTrust](https://www.hathitrust.org/), [HeinOnline](http://home.heinonline.org/), and other databases available through many university library subscriptions. +There are many avenues for researching the evolution of exclusion in American immigration law. But one of the most accessible ways to do so is through the annual reports of immigration that the U.S. federal government published at the turn of the twentieth century. These annual reports are digitized and accessible to researchers and students through databases like [HathiTrust](https://www.hathitrust.org/), [HeinOnline](https://home.heinonline.org/), and other databases available through many university library subscriptions. -As a first foray into distant reading, instructors might encourage students to pick one annual report and run it through [Voyant Tools](http://voyant-tools.org/). For example, a group of students could plug in the U.S. Department of Labor’s Annual Report on Immigration for the year 1910 and do a group think on the document’s major word frequencies. +As a first foray into distant reading, instructors might encourage students to pick one annual report and run it through [Voyant Tools](https://voyant-tools.org/). For example, a group of students could plug in the U.S. Department of Labor’s Annual Report on Immigration for the year 1910 and do a group think on the document’s major word frequencies.
@@ -29,7 +29,7 @@ As a first foray into distant reading, instructors might encourage students to p
-

Screenshots of a distant reading of the US Department of Labor’s Annual Report on Immigration, 1910, conducted with VoyantTools.org.

+

Screenshots of a distant reading of the US Department of Labor’s Annual Report on Immigration, 1910, conducted with VoyantTools.org.

@@ -41,9 +41,9 @@ Or how about “Chinese”? Students might dig deeper into the report and read m On the other end of the spectrum, students might discuss why words like “border” and “deportation” — words that are extremely important in today’s debates about immigration — are not among the most frequent words in the report. -The possibilities for coming up with questions about primary sources through distant reading become even more powerful when working with multiple texts. For example, an instructor who teaches themselves how to topic model with *Programming Historian* could pass that knowledge on to students by having them run every single annual report on immigration from 1892 to 1924 through the Topic Modeling Tool, a [user-friendly interface for topic modeling that students who have no coding knowledge can use in the classroom](http://www.themacroscope.org/?page_id=391). Using the Topic Modeling Tool with the annual reports from this era of immigration history would give students a chance to begin identifying the topics that were of concern to government officials, and begin to make observations about how those concerns changed over time. +The possibilities for coming up with questions about primary sources through distant reading become even more powerful when working with multiple texts. For example, an instructor who teaches themselves how to topic model with *Programming Historian* could pass that knowledge on to students by having them run every single annual report on immigration from 1892 to 1924 through the Topic Modeling Tool, a [user-friendly interface for topic modeling that students who have no coding knowledge can use in the classroom](https://www.themacroscope.org/?page_id=391). Using the Topic Modeling Tool with the annual reports from this era of immigration history would give students a chance to begin identifying the topics that were of concern to government officials, and begin to make observations about how those concerns changed over time. -As [Ted Underwood notes](http://tedunderwood.com/2015/06/04/seven-ways-humanists-are-using-computers-to-understand-text/), topic modeling may be especially useful for individuals who are approaching a set of texts without a particular research question in mind. Distant reading is a great way for students in particular to begin analyzing historical documents — and could also be a great way for students to start coming up with questions for research papers that will allow them to practice close readings of primary sources and further develop their digital research skills. +As [Ted Underwood notes](https://tedunderwood.com/2015/06/04/seven-ways-humanists-are-using-computers-to-understand-text/), topic modeling may be especially useful for individuals who are approaching a set of texts without a particular research question in mind. Distant reading is a great way for students in particular to begin analyzing historical documents — and could also be a great way for students to start coming up with questions for research papers that will allow them to practice close readings of primary sources and further develop their digital research skills. Have you taught with distant reading, or used a PH tutorial on distant reading in your classroom? Let us know! Tweet @ProgHist with the hashtag #teachDH and share your experiences with us. diff --git a/_posts/2016-10-18-promoting-digital-archives.md b/_posts/2016-10-18-promoting-digital-archives.md index 6af6470db9..2320e96e40 100644 --- a/_posts/2016-10-18-promoting-digital-archives.md +++ b/_posts/2016-10-18-promoting-digital-archives.md @@ -8,7 +8,7 @@ categories: posts Libraries and digital archival repositories are getting in on a popular new trend. Since 2015, the adult coloring book market has exploded. Featuring everything from cats to science fiction TV shows and more, these exquisitely detailed coloring books aren’t just fun—they can also be an accessible way to raise interest in a variety of topics. Archivists and librarians, for example, are using them to transform their digitized archival materials into free, downloadable coloring pages and books that promote their unique collections. -In February of this year the [New York Academy of Medicine](https://nyamcenterforhistory.org/2016/01/06/colorourcollections-february-1-5/) launched #ColorOurCollections, an online event to highlight and spark interest in unique materials ranging from early English book illustrations to textile design patterns, city maps, fashion and nature drawings, and all kinds of other fascinating things. Participating institutions included the [New York Public Library](https://www.nypl.org/blog/2016/02/01/color-our-collections), the [DPLA](https://dp.la/info/2016/02/01/color-our-collections/), the [Bodleian Library](https://www.bodleian.ox.ac.uk) (as of 03/30/2021 the coloring books in this institution are not available), and the [Smithsonian Libraries](http://library.si.edu/event/colorourcollections-coloring-event), among others. More libraries and institutions have continued to create their own coloring pages as the year has gone on, and the [#ColorOurCollections](https://twitter.com/search?q=%23colorourcollections&src=typd) hashtag continues to grow on Twitter. +In February of this year the [New York Academy of Medicine](https://nyamcenterforhistory.org/2016/01/06/colorourcollections-february-1-5/) launched #ColorOurCollections, an online event to highlight and spark interest in unique materials ranging from early English book illustrations to textile design patterns, city maps, fashion and nature drawings, and all kinds of other fascinating things. Participating institutions included the [New York Public Library](https://www.nypl.org/blog/2016/02/01/color-our-collections), the [DPLA](https://dp.la/info/2016/02/01/color-our-collections/), the [Bodleian Library](https://www.bodleian.ox.ac.uk) (as of 03/30/2021 the coloring books in this institution are not available), and the [Smithsonian Libraries](https://library.si.edu/event/colorourcollections-coloring-event), among others. More libraries and institutions have continued to create their own coloring pages as the year has gone on, and the [#ColorOurCollections](https://twitter.com/search?q=%23colorourcollections&src=typd) hashtag continues to grow on Twitter. One of the things I enjoy most in my current position as a Cataloging and Metadata Librarian is interacting with digital archival materials for the Houston Area Digital Archives website. My primary goal is always to find new and exciting ways to make history and digitized archival materials interesting to people. It often is not enough to simply upload items into content management systems and expect them to be discoverable or even for people to understand their significance. Coloring books are a great promotional tool because they encourage interaction with collections and promote the value of preservation. They also help make materials more accessible. Specifically, the Houston Area Digital Archives promotes our coloring books to students, educators, and other librarians online and at conferences like Digital Frontiers and the Texas Library Association as a fun way to encourage interaction with local history.  I also enjoy creating coloring books because it is an opportunity to expand archival representation by featuring the records of groups such as women and the LGBT community. @@ -17,18 +17,18 @@ One of the things I enjoy most in my current position as a Cataloging and Metada
-

An example of a coloring book from http://digital.houstonlibrary.org/cdm/ref/collection/curriculum-project/id/10 [Ed. note: this link is no longer live] Color the Big Top: Houston Area Digital Archives Coloring Book, Volume 2.

+

An example of a coloring book from https://digital.houstonlibrary.org/cdm/ref/collection/curriculum-project/id/10 [Ed. note: this link is no longer live] Color the Big Top: Houston Area Digital Archives Coloring Book, Volume 2.

So how can you be a part of #ColorOurCollections and make your own digital archival coloring book? Check out the following resources to get started: -* [How to Make a Coloring Book](http://www.instructables.com/id/How-to-Make-a-Coloring-Book/?ALLSTEPS) via Instructables is a great starting point for working with [Pixlr Editor](https://pixlr.com/editor/), a free online image editor, to transform digitized archival materials into coloring pages. +* [How to Make a Coloring Book](https://www.instructables.com/id/How-to-Make-a-Coloring-Book/?ALLSTEPS) via Instructables is a great starting point for working with [Pixlr Editor](https://pixlr.com/editor/), a free online image editor, to transform digitized archival materials into coloring pages. * \#ColorOurCollections:Creating a coloring book with the Houston Area Digital Archives `http://digital.houstonlibrary.org/cdm/singleitem/collection/curriculum-project/id/17/rec/2` [Ed. note: this link is no longer live] provides complete instructions specifically for working with digital archival materials. -* Other websites such as [Canva](https://www.canva.com/) and [COLOROURlovers](http://www.colourlovers.com/) can help create professional looking coloring book covers and other elements. +* Other websites such as [Canva](https://www.canva.com/) and [COLOROURlovers](https://www.colourlovers.com/) can help create professional looking coloring book covers and other elements. -* Find inspiration from some of the amazing examples created by other libraries listed on [Open Culture](http://www.openculture.com/2016/02/download-free-coloring-books-from-world-class-libraries-museums.html). +* Find inspiration from some of the amazing examples created by other libraries listed on [Open Culture](https://www.openculture.com/2016/02/download-free-coloring-books-from-world-class-libraries-museums.html). You can also click here `http://digital.houstonlibrary.org/cdm/search/searchterm/colorourcollections/order/nosort` [Ed. note: this link is no longer live] to view and print the Houston Area Digital Archives coloring books. I hope you’ll explore the [#ColorOurCollections](https://twitter.com/search?q=%23colorourcollections&src=typd) hashtag and have fun creating your own coloring book! diff --git a/_posts/2017-01-21-call-to-action.md b/_posts/2017-01-21-call-to-action.md index 242f76a5bc..2fee436522 100644 --- a/_posts/2017-01-21-call-to-action.md +++ b/_posts/2017-01-21-call-to-action.md @@ -22,7 +22,7 @@ We've got a great lesson on how to [conduct a topic model using MALLET](/lessons 2) **How do you conduct a stylometric analysis (well)?** -Stylometry, the process of computationally attributing (probable) authorship to an anonymous text, has grown in popularity in recent years, even outing [J.K. Rowling as 'Robert Galbraith'](http://languagelog.ldc.upenn.edu/nll/?p=5315) in 2013. But how do you DO it? And what are the pitfalls you need to beware of? Given the vast amount of machine-readable text out there, we think it's time stylometry came into the mainstream of historical research. +Stylometry, the process of computationally attributing (probable) authorship to an anonymous text, has grown in popularity in recent years, even outing [J.K. Rowling as 'Robert Galbraith'](https://languagelog.ldc.upenn.edu/nll/?p=5315) in 2013. But how do you DO it? And what are the pitfalls you need to beware of? Given the vast amount of machine-readable text out there, we think it's time stylometry came into the mainstream of historical research. 3) **How do you conduct spatial clustering of geographic data?** diff --git a/_posts/2017-03-02-dh-award-2016.md b/_posts/2017-03-02-dh-award-2016.md index 4b363f98e5..6de5355721 100644 --- a/_posts/2017-03-02-dh-award-2016.md +++ b/_posts/2017-03-02-dh-award-2016.md @@ -9,7 +9,7 @@ categories: posts

Programming Historian is DH 2016 Award Winner - Best Series of Posts.

-Congratulations to our 2016 authors for winning the [2016 Digital Humanities Award for best series of posts](http://dhawards.org/dhawards2016/results/). +Congratulations to our 2016 authors for winning the [2016 Digital Humanities Award for best series of posts](https://dhawards.org/dhawards2016/results/). A full recap of their contributions can be found on our [2016 Roundup](/posts/twenty-sixteen-review). diff --git a/_posts/2017-03-02-launch-PH-Spanish.md b/_posts/2017-03-02-launch-PH-Spanish.md index 4ef569e02f..ef8d8d5a02 100644 --- a/_posts/2017-03-02-launch-PH-Spanish.md +++ b/_posts/2017-03-02-launch-PH-Spanish.md @@ -10,7 +10,7 @@ categories: posts Programming Historian launches its Spanish site.

We are very excited to introduce *The Programming Historian en español*! *The Programming Historian* offers novice-friendly, peer-reviewed tutorials that help humanists learn a wide range of digital tools, techniques, and workflows to facilitate their research. -Starting today, the website of *The Programming Historian en español* will be available in the following link: [http://programminghistorian.org/es](/es) We launch the site with complete translations of the Python tutorial series. Python is a high level programming language which can be used to write programs to manipulate text files, download web pages, reorganize its contents into useful chunks of information for analysis, and count word frequencies. +Starting today, the website of *The Programming Historian en español* will be available in the following link: [https://programminghistorian.org/es](/es) We launch the site with complete translations of the Python tutorial series. Python is a high level programming language which can be used to write programs to manipulate text files, download web pages, reorganize its contents into useful chunks of information for analysis, and count word frequencies. In the following months we will continue to translate and publish the more than fifty tutorials that are already available in the English PH site. Among them, there are tutorials for learning [how to make Omeka exhibits](/lessons/creating-an-omeka-exhibit), [create network visualizations](/lessons/creating-network-diagrams-from-historical-sources) and [edit plain text using Markdown](/lessons/getting-started-with-markdown). We will also have translations of tutorials on visualization of historical data using Geographic Information Systems, data mining and data set analysis using R programming language, among many others. diff --git a/_posts/2017-03-05-lanzamiento-PH-espanol.md b/_posts/2017-03-05-lanzamiento-PH-espanol.md index 7f975ad413..2de860180d 100644 --- a/_posts/2017-03-05-lanzamiento-PH-espanol.md +++ b/_posts/2017-03-05-lanzamiento-PH-espanol.md @@ -10,7 +10,7 @@ redirect_from: /posts/lanzamiento-PH-español

Programming Historian lanza su sitio en español.

-¡Estamos muy emocionados de presentar *The Programming Historian en español*! *The Programming Historian* ofrece tutoriales que ayudan en el aprendizaje de una amplia gama de herramientas digitales, técnicas y flujos de trabajo para facilitar investigación en las humanidades. El sitio de The Programming Historian en español estará disponible a partir de hoy en el siguiente enlace: [http://programminghistorian.org/es](/es) El sitio en español se inaugura con las traducciones completas de la serie de tutoriales de Python. Python es un lenguaje de programación de alto nivel con el cual se pueden escribir programas que permiten manipular archivos de texto, descargar páginas web, reorganizar los contenidos en fragmentos de información útiles para el análisis y contar frecuencias de palabras, entre otros. +¡Estamos muy emocionados de presentar *The Programming Historian en español*! *The Programming Historian* ofrece tutoriales que ayudan en el aprendizaje de una amplia gama de herramientas digitales, técnicas y flujos de trabajo para facilitar investigación en las humanidades. El sitio de The Programming Historian en español estará disponible a partir de hoy en el siguiente enlace: [https://programminghistorian.org/es](/es) El sitio en español se inaugura con las traducciones completas de la serie de tutoriales de Python. Python es un lenguaje de programación de alto nivel con el cual se pueden escribir programas que permiten manipular archivos de texto, descargar páginas web, reorganizar los contenidos en fragmentos de información útiles para el análisis y contar frecuencias de palabras, entre otros. En los próximos meses continuaremos traduciendo y publicando los más de cincuenta tutoriales que ya se encuentran disponibles en el sitio de PH en inglés. Entre ellos se encuentran tutoriales para aprender a [hacer exhibiciones en Omeka](/lessons/creating-an-omeka-exhibit), [crear visualizaciones de redes](/lessons/creating-network-diagrams-from-historical-sources) y [editar texto plano usando Markdown](/lessons/getting-started-with-markdown). También contaremos con traducciones de tutoriales para aprender a visualizar datos históricos en sistemas de información geográfica, hacer minería de datos y utilizar el lenguaje de programación R para analizar series de datos, entre muchos más. diff --git a/_posts/2017-03-31-history-of-protest.md b/_posts/2017-03-31-history-of-protest.md index 1e70a40441..b60c078b4c 100644 --- a/_posts/2017-03-31-history-of-protest.md +++ b/_posts/2017-03-31-history-of-protest.md @@ -11,10 +11,10 @@ categories: posts The Northern Star newspaper, 9 February 1839.

-In the 1830s, the British grass-roots protest movement that came to be known as the 'Chartists' gathered in pubs around Britain to rally for an expansion of voting rights. Katrina Navickas is a historian of political movements and has become increasingly interested in the study of spaces, which was the focus of her recent monograph, [*Protest and the politics of space and place, 1789-1848*](http://www.manchesteruniversitypress.co.uk/9781526116703/). +In the 1830s, the British grass-roots protest movement that came to be known as the 'Chartists' gathered in pubs around Britain to rally for an expansion of voting rights. Katrina Navickas is a historian of political movements and has become increasingly interested in the study of spaces, which was the focus of her recent monograph, [*Protest and the politics of space and place, 1789-1848*](https://www.manchesteruniversitypress.co.uk/9781526116703/). -We're pleased to announce that the *Programming Historian* has been able to contribute to her most recent project to map the distribution of Chartist meetings in nineteenth century London. In 2015, Navickas was one of the winners of the [British Library Labs award](http://labs.bl.uk/British+Library+Labs+Competition), which gave her access to British Library digital collections and expertise. Building on her interest of the history of protest, she extracted details of Chartist meetings from nineteenth century digitised newspapers, to produce the '[Political Meetings Mapper](http://politicalmeetingsmapper.co.uk)' project. Part of Navickas' workflow involved adapting Adam Crymble's lesson on '[Using Gazetteers to Extract Sets of Keywords from Free-Flowing Texts](/lessons/extracting-keywords)' to isolate text related to meetings within her wider newspaper collection. +We're pleased to announce that the *Programming Historian* has been able to contribute to her most recent project to map the distribution of Chartist meetings in nineteenth century London. In 2015, Navickas was one of the winners of the [British Library Labs award](https://labs.bl.uk/British+Library+Labs+Competition), which gave her access to British Library digital collections and expertise. Building on her interest of the history of protest, she extracted details of Chartist meetings from nineteenth century digitised newspapers, to produce the '[Political Meetings Mapper](https://politicalmeetingsmapper.co.uk)' project. Part of Navickas' workflow involved adapting Adam Crymble's lesson on '[Using Gazetteers to Extract Sets of Keywords from Free-Flowing Texts](/lessons/extracting-keywords)' to isolate text related to meetings within her wider newspaper collection. Navickas and Crymble have since co-authored a full description of the project and of the workflow used to build this project, which has been published open access in the *Journal of Victorian Culture*. We're very pleased to see this clear impact of the work of *Programming Historian* lessons in academic research, and we'd be thrilled to hear from other scholars with similar stories. -Read '[From Chartist Newspaper to Digital Map of Grass-roots Meetings, 1841-44: Documenting Workflows](http://www.tandfonline.com/doi/full/10.1080/13555502.2017.1301179)' (open access). +Read '[From Chartist Newspaper to Digital Map of Grass-roots Meetings, 1841-44: Documenting Workflows](https://www.tandfonline.com/doi/full/10.1080/13555502.2017.1301179)' (open access). diff --git a/_posts/2017-06-18-sonic-word-clouds.md b/_posts/2017-06-18-sonic-word-clouds.md index 4d629ee874..3a14169115 100644 --- a/_posts/2017-06-18-sonic-word-clouds.md +++ b/_posts/2017-06-18-sonic-word-clouds.md @@ -9,7 +9,7 @@ date: 2017-06-19 My name is Daniel Ruten, and I have just finished my undergraduate studies majoring in History at the University of Saskatchewan. During my last term, I took a course on Digital History (HIST396) with Dr. Jim Clifford. In it, I became familiarized with the various emerging digital tools and methodologies that are becoming increasingly important for historians to learn. The course also required that I create some kind of digital history project myself. For my project, I took inspiration from one lesson in particular featured on the Programming Historian website: [historian Shawn Graham’s lesson on data sonification](/lessons/sonification). Building off of what this lesson taught me, I developed my own method to represent and analyze textual data through sound, which I have termed Sonic Word Clouds. In this post I will briefly explain this method of sonification, while reflecting a bit on the learning process that both inspired the idea for this project and allowed me to make it a reality. -At first, I was somewhat at a loss of what I should do for my digital history project. While scanning the various lessons on the Programming Historian website in order to get some ideas, one lesson in particular caught my eye. It was entitled “The Sound of Data (a gentle introduction to sonification for historians),” by Shawn Graham. I hadn’t really known that data sonification was a thing, much less a method that could be used by historians, and so my curiosity was piqued. In [the lesson](/lessons/sonification), Graham provides an introduction to the theory of data sonification, before detailing some of the sonification methods and resources that are available to historians. In particular, he discusses some methods to convert historical data into MIDI notation that can then be mapped to instrumentation. Some of these tools, such as [Musicalgorithms](http://www.musicalgorithms.org/3.2/) and the [MIDITime package for Python](https://pypi.python.org/pypi/miditime), were particularly designed with time-series/quantitative data in mind. But Graham also provides an example of the potential of MIDITime to analyze historical texts, as he uses it to sonify topic modelling data from John Adams’ diaries. By [mapping the resultant MIDI data to different instruments in Garageband](https://www.youtube.com/watch?v=ikqRXtI3JeA&feature=youtu.be), he offers us a means to hear the relative occurrence of different topics in the diaries over a 50-year period. Being able to listen to the relations between these different concepts over time this way provided a very unique and intriguing representation of a textual narrative. As Graham emphasizes, the choices one makes regarding how to represent data sonically in this fashion reveal the ways in which we privilege, condense and transform information as historians. The lesson gave me an idea: what if there was a way to sonify this kind of textual data in a more readily intelligible way? This set the stage for my own project. +At first, I was somewhat at a loss of what I should do for my digital history project. While scanning the various lessons on the Programming Historian website in order to get some ideas, one lesson in particular caught my eye. It was entitled “The Sound of Data (a gentle introduction to sonification for historians),” by Shawn Graham. I hadn’t really known that data sonification was a thing, much less a method that could be used by historians, and so my curiosity was piqued. In [the lesson](/lessons/sonification), Graham provides an introduction to the theory of data sonification, before detailing some of the sonification methods and resources that are available to historians. In particular, he discusses some methods to convert historical data into MIDI notation that can then be mapped to instrumentation. Some of these tools, such as [Musicalgorithms](https://www.musicalgorithms.org/3.2/) and the [MIDITime package for Python](https://pypi.python.org/pypi/miditime), were particularly designed with time-series/quantitative data in mind. But Graham also provides an example of the potential of MIDITime to analyze historical texts, as he uses it to sonify topic modelling data from John Adams’ diaries. By [mapping the resultant MIDI data to different instruments in Garageband](https://www.youtube.com/watch?v=ikqRXtI3JeA&feature=youtu.be), he offers us a means to hear the relative occurrence of different topics in the diaries over a 50-year period. Being able to listen to the relations between these different concepts over time this way provided a very unique and intriguing representation of a textual narrative. As Graham emphasizes, the choices one makes regarding how to represent data sonically in this fashion reveal the ways in which we privilege, condense and transform information as historians. The lesson gave me an idea: what if there was a way to sonify this kind of textual data in a more readily intelligible way? This set the stage for my own project. The idea for my project was essentially to develop a method to take a given historical text, determine the 25 most frequent words in it, and then use MIDITime to convert each word’s occurrences throughout the text into MIDI data. From there, all 25 resultant MIDI files would be brought into music sequencing software and mapped to instrumentation. Up to this point, the idea is very similar to what Graham demonstrated in the lesson. The key difference is that instead of using conventional instruments, I planned to assign the MIDI data to samplers. (A [sampler](https://goo.gl/1nfuuo) is a kind of digital instrument that plays back any audio file that is loaded into it.) From there, each sampler would be loaded with a Text-to-Speech audio file of its corresponding word. The MIDI notation would then tell each sampler when to trigger its spoken word, corresponding to the occurrences of the word in the text. When all of this is combined, then, we would be able to hear the linear frequency of multiple spoken words over time in a text in a sort of sonic word cloud, thus quickly getting a sense of both shifting patterns of common word usage as well as the relations between the usage of different words over time. diff --git a/_posts/2017-07-28-new-lessons-page.md b/_posts/2017-07-28-new-lessons-page.md index 2949697b9b..dc9c9a7bd5 100644 --- a/_posts/2017-07-28-new-lessons-page.md +++ b/_posts/2017-07-28-new-lessons-page.md @@ -15,7 +15,7 @@ A brief description of our new lessons page functionality: we have tagged each o {% include figure.html caption="Buttons for filtering lessons by phases and topics" filename="images/blog/new-lessons-page/lesson-filter.gif" %} -You can also sort lessons by difficulty and publication date. If you want a complete list of all lessons, just hit the reset button. By default, we list lessons in reverse chronological order so that when you visit from time to time, you'll always see what's new at the top of the page (also using any filter you want to apply). Of course we hope all this is totally obvious once it's in front of you. Our super-fast on-the-fly sorting is powered by [list.js](http://listjs.com/), which was a breeze to implement thanks to the skill and ingenuity of our editor Amanda Visconti. As you filter and sort, you'll notice that the URL updates, too, so that you can bookmark or share any particular set of lessons. +You can also sort lessons by difficulty and publication date. If you want a complete list of all lessons, just hit the reset button. By default, we list lessons in reverse chronological order so that when you visit from time to time, you'll always see what's new at the top of the page (also using any filter you want to apply). Of course we hope all this is totally obvious once it's in front of you. Our super-fast on-the-fly sorting is powered by [list.js](https://listjs.com/), which was a breeze to implement thanks to the skill and ingenuity of our editor Amanda Visconti. As you filter and sort, you'll notice that the URL updates, too, so that you can bookmark or share any particular set of lessons. {% include figure.html caption="Sort lessons by publication date or difficulty" filename="images/blog/new-lessons-page/lesson-sorting.gif" %} diff --git a/_posts/2017-07-31-infrastructure-at-ph.md b/_posts/2017-07-31-infrastructure-at-ph.md index cacd5cb205..fc00dae97d 100644 --- a/_posts/2017-07-31-infrastructure-at-ph.md +++ b/_posts/2017-07-31-infrastructure-at-ph.md @@ -17,7 +17,7 @@ This post will highlight three behind-the-scenes, technical changes to the way t ## Searching for Link Rot -We have built _PH_ on the [Jekyll](http://jekyllrb.com/) site generation platform in part because it creates simple HTML files without needing a database server to run at all times in order to keep the site live. +We have built _PH_ on the [Jekyll](https://jekyllrb.com/) site generation platform in part because it creates simple HTML files without needing a database server to run at all times in order to keep the site live. However, no content management system is safe from the ravages of "link rot": when published links to other web pages go dead because their owners moved the content, deleted it, or otherwise shut down their website. This is particularly troublesome for _PH_, since so many of our lessons link to external references, tutorials, and examples. While we strive to make sure all the links in a lesson are operating when we first publish it, it's all but impossible to manually check old lessons on a regular basis to make sure the links are _still_ working. @@ -33,7 +33,7 @@ Running this check on _PH_ [revealed several dozen links](https://github.com/pro Once we identified these links, we tried to find the new location to which the linked content had been moved. When that was not possible, we instead pointed to a version of the content archived in the [Wayback Machine]. -[Wayback Machine]: http://web.archive.org/ +[Wayback Machine]: https://web.archive.org/ [buildsh]: https://github.com/programminghistorian/jekyll/blob/gh-pages/_build/build.sh#L15-L40 @@ -59,7 +59,7 @@ As we've expanded the capabilities of the site, the metadata has had to expand t If an editor forgets to include some of these YAML fields, it can result in a site build error, a missing lesson, or blank spots where we might expect to find the name of a lesson's editors or reviewers. This makes the life of an editor more and more difficult, and we frequently found ourselves needing to go back in to published lessons to tweak metadata so everything appeared correctly on the site. -Using Jekyll's [custom plugin](http://jekyllrb.com/docs/plugins/) capabilities, we are able to specify the metadata schema needed for lessons, and cause Jekyll to throw informative errors when it finds a lesson file that is missing a required field. +Using Jekyll's [custom plugin](https://jekyllrb.com/docs/plugins/) capabilities, we are able to specify the metadata schema needed for lessons, and cause Jekyll to throw informative errors when it finds a lesson file that is missing a required field. Unlike htmlproofer, this code does not come as a fully-fledged package - we had to compose it ourselves. However [you can see our commented source code here](https://github.com/programminghistorian/jekyll/blob/gh-pages/_plugins/validate_yaml.rb) to understand how we specify and evaluate required metadata fields. diff --git a/_posts/2017-09-21-corpus-linguistics-in-action.md b/_posts/2017-09-21-corpus-linguistics-in-action.md index 8af4fd1254..3718d16322 100644 --- a/_posts/2017-09-21-corpus-linguistics-in-action.md +++ b/_posts/2017-09-21-corpus-linguistics-in-action.md @@ -12,15 +12,15 @@ categories: posts Here at the Programming Historian, we have a number of lessons focused on "[distant reading](/lessons/?topic=distant-reading)." These lessons pull from a variety of fields to demonstrate different ways to computationally surface patterns across a large collection of digital objects. But how do you build on those patterns as part of a research project? That question of what to do next is what the authors of this post have set out to answer. -In this blog post, authors Viola Wiegand, Michaela Mahlberg, and Peter Stockwell offer a sample of their research analyzing the language used in 19th century English novels. Using [CLiC](http://clic.bham.ac.uk/), a corpus analysis application that the authors are developing in a joint project between the University of Birmingham and the University of Nottingham, they explore the 'fireplace pose' in Dickens's novels. Their goal is to "find textual patterns that are shared across novels and point to socially and culturally relevant behaviours and conventions in the real world." +In this blog post, authors Viola Wiegand, Michaela Mahlberg, and Peter Stockwell offer a sample of their research analyzing the language used in 19th century English novels. Using [CLiC](https://clic.bham.ac.uk/), a corpus analysis application that the authors are developing in a joint project between the University of Birmingham and the University of Nottingham, they explore the 'fireplace pose' in Dickens's novels. Their goal is to "find textual patterns that are shared across novels and point to socially and culturally relevant behaviours and conventions in the real world." -You can find out more about the CLiC Dickens research project on the [project's website](http://www.birmingham.ac.uk/schools/edacs/departments/englishlanguage/research/projects/clic/index.aspx). +You can find out more about the CLiC Dickens research project on the [project's website](https://www.birmingham.ac.uk/schools/edacs/departments/englishlanguage/research/projects/clic/index.aspx). If you are interested in learning how to use collocations and keywords in your own research, we recommend starting with [Corpus Analysis with AntConc](/lessons/corpus-analysis-with-antconc) by Heather Froehlich. In this lesson, Froehlich introduces techniques from corpus linguistics, showing how to identify significant patterns of language use within and between sets of texts. And, as always, if you have an idea for a lesson or want to get involved with the *Programming Historian,* please visit our [contribute page](/contribute) for more information. --- -[CLiC](http://clic.bham.ac.uk) (Corpus Linguistics in Context) is a web app specifically designed for the corpus linguistic study of literary texts. While CLiC shares much of its functionality with other corpus tools — similarly to what is described in the [Programming Historian’s lesson ‘Corpus Analysis with AntConc’](/lessons/corpus-analysis-with-antconc) — it also contains additional features that are particularly relevant to literary analysis. These include the ability to search subsets of the text – such as character speech – and a sorting function that goes beyond alphabetic sorting: the ‘KWICGrouper’, which this post focuses on. The CLiC web app has been developed as part of the [CLiC Dickens project](http://www.birmingham.ac.uk/schools/edacs/departments/englishlanguage/research/projects/clic/index.aspx) for the analysis of patterns in 19th century fiction, particularly novels by Charles Dickens. CLiC currently contains 15 Dickens novels and 29 novels by other 19th century authors and a corpus of 19th century children's literature will soon be added. +[CLiC](https://clic.bham.ac.uk) (Corpus Linguistics in Context) is a web app specifically designed for the corpus linguistic study of literary texts. While CLiC shares much of its functionality with other corpus tools — similarly to what is described in the [Programming Historian’s lesson ‘Corpus Analysis with AntConc’](/lessons/corpus-analysis-with-antconc) — it also contains additional features that are particularly relevant to literary analysis. These include the ability to search subsets of the text – such as character speech – and a sorting function that goes beyond alphabetic sorting: the ‘KWICGrouper’, which this post focuses on. The CLiC web app has been developed as part of the [CLiC Dickens project](https://www.birmingham.ac.uk/schools/edacs/departments/englishlanguage/research/projects/clic/index.aspx) for the analysis of patterns in 19th century fiction, particularly novels by Charles Dickens. CLiC currently contains 15 Dickens novels and 29 novels by other 19th century authors and a corpus of 19th century children's literature will soon be added. Apart from aiding literary study, the corpus stylistic analysis of historical fiction can reveal insights into the social context of the texts more widely. In this post, we’ll discuss the so-called ‘fireplace pose’ in 19th century fiction that has been identified in literature and other cultural material from the time (for example paintings; see [Korte 1997: 212](https://books.google.co.uk/books?id=o9o4gLzrRPEC&lpg=PP1&pg=PA212#v=onepage&q&f=false)). In CLiC it is possible, for example, to 1) trace textual patterns which describe how fictional characters sit or stand in front of the fire or look at it and 2) compare the patterns found in Dickens with those of other authors. @@ -45,7 +45,7 @@ Looking at the characters represented by this pattern, it is striking that they
-

Left image from David Copperfield, Chapter 63; right image from Dombey and Son, Chapter 51

+

Left image from David Copperfield, Chapter 63; right image from Dombey and Son, Chapter 51

@@ -81,4 +81,4 @@ In this post, we have introduced corpus linguistic techniques for interrogating · [Mahlberg, M. (2013). *Corpus Stylistics and Dickens’s Fiction*. New York & London: Routledge.](https://books.google.co.uk/books?id=v98rcxoYUbYC&lpg=PP1&pg=PP1#v=onepage&q&f=false) -· [Mahlberg, M., Stockwell, P., de Joode, J., Smith, C., O’Donnell, M. Brook, (2016) CLiC Dickens – Novel uses of concordances for the integration of corpus stylistics and cognitive poetics, Corpora, 11 (3), 433-463.](http://www.euppublishing.com/doi/full/10.3366/cor.2016.0102) \ No newline at end of file +· [Mahlberg, M., Stockwell, P., de Joode, J., Smith, C., O’Donnell, M. Brook, (2016) CLiC Dickens – Novel uses of concordances for the integration of corpus stylistics and cognitive poetics, Corpora, 11 (3), 433-463.](https://www.euppublishing.com/doi/full/10.3366/cor.2016.0102) \ No newline at end of file diff --git a/_posts/2018-03-04-DH-Award-2017.md b/_posts/2018-03-04-DH-Award-2017.md index eacc0fe1b6..6fbf3aa5fc 100644 --- a/_posts/2018-03-04-DH-Award-2017.md +++ b/_posts/2018-03-04-DH-Award-2017.md @@ -9,7 +9,7 @@ categories: posts

Programming Historian en español is DH 2017 Award Winner - Best Series of Posts.

-For the [second year in a row](/posts/dh-award-2016), our team is proud to report that the project has won a [Digital Humanities award](http://dhawards.org/dhawards2017/results/). +For the [second year in a row](/posts/dh-award-2016), our team is proud to report that the project has won a [Digital Humanities award](https://dhawards.org/dhawards2017/results/). This time, the award recognised the hard work of our Spanish Team in creating a translation of the project, and building a Spanish-language community of scholars interested in digital methods. diff --git a/_posts/2018-05-22-Uses-Of-The-Programming-Historian.md b/_posts/2018-05-22-Uses-Of-The-Programming-Historian.md index 0036d318c6..79e1411aa9 100644 --- a/_posts/2018-05-22-Uses-Of-The-Programming-Historian.md +++ b/_posts/2018-05-22-Uses-Of-The-Programming-Historian.md @@ -19,57 +19,57 @@ If you are using the _Programming Historian_ in interesting ways, we'd love to h * S Fox Lee, "Digital methods for the history of psychology", _History of Psychology_ (2016). * Nicholas Terpstra, Colin Rose, 'Mapping Space, Sense, and Movement in Florence: Historical GIS and the Early Modern City' (Routledge, 2016). * Gary Osmond and Murray G. Phillips, 'Sport History in the Digital Era' (University of Illinois Press, 2015). -* Tim Sherratt, '[Unremembering the Forgotten](http://discontents.com.au/unremembering-the-forgotten)', Keynote at DH2015, University of Western Australia (July 3, 2015). +* Tim Sherratt, '[Unremembering the Forgotten](https://discontents.com.au/unremembering-the-forgotten)', Keynote at DH2015, University of Western Australia (July 3, 2015). * Shawn Graham, Ian Milligan, and Scott Weingart, 'Exploring Big Historical Data: The Historian's Macroscope' (Imperial College Press, 2015). -* Cheryl LaGuardia, '[Connecting Researchers to New Digital Tools. Not Dead Yet](http://lj.libraryjournal.com/2014/09/opinion/not-dead-yet/connecting-researchers-to-new-digital-tools-not-dead-yet/#_ )', _Library Journal_ (18 September 2014). -* William J. Turkel, Shenzan Muhammedi, Mary Beth Start, '[Grounding Digital History in the History of Computing](http://muse.jhu.edu/login?auth=0&type=summary&url=/journals/ieee_annals_of_the_history_of_computing/v036/36.2.turkel.html)', _IEEE Annals of the History of Computing_, Vol. 36, No. 2 (2014), pp. 72-75. -* Elijah Meeks and Scott Weingart, '[The Digital Humanities Contribution to Topic Modeling](http://journalofdigitalhumanities.org/2-1/dh-contribution-to-topic-modeling/)', _Journal of Digital Humanities_, Vol. 2, No. 1 (2012). -* Ted Underwood, '[What can topic models of PMLA teach us about the history of literary scholarship?](http://tedunderwood.com/2012/12/14/what-can-topic-models-of-pmla-teach-us-about-the-history-of-literary-scholarship/)' _The Stone and the Shell_ (2012). -* Wingyan Chung, Edward A. Fox, Steven D. Sheetz, Seungwon Yang, '[LIKES: Educating the Next Generation of Knowledge Society Builders](http://aisel.aisnet.org/cgi/viewcontent.cgi?article=1072&context=amcis2009)', _Association for Information Systems: AMCIS Proceedings_ (2009). +* Cheryl LaGuardia, '[Connecting Researchers to New Digital Tools. Not Dead Yet](https://lj.libraryjournal.com/2014/09/opinion/not-dead-yet/connecting-researchers-to-new-digital-tools-not-dead-yet/#_ )', _Library Journal_ (18 September 2014). +* William J. Turkel, Shenzan Muhammedi, Mary Beth Start, '[Grounding Digital History in the History of Computing](https://muse.jhu.edu/login?auth=0&type=summary&url=/journals/ieee_annals_of_the_history_of_computing/v036/36.2.turkel.html)', _IEEE Annals of the History of Computing_, Vol. 36, No. 2 (2014), pp. 72-75. +* Elijah Meeks and Scott Weingart, '[The Digital Humanities Contribution to Topic Modeling](https://journalofdigitalhumanities.org/2-1/dh-contribution-to-topic-modeling/)', _Journal of Digital Humanities_, Vol. 2, No. 1 (2012). +* Ted Underwood, '[What can topic models of PMLA teach us about the history of literary scholarship?](https://tedunderwood.com/2012/12/14/what-can-topic-models-of-pmla-teach-us-about-the-history-of-literary-scholarship/)' _The Stone and the Shell_ (2012). +* Wingyan Chung, Edward A. Fox, Steven D. Sheetz, Seungwon Yang, '[LIKES: Educating the Next Generation of Knowledge Society Builders](https://aisel.aisnet.org/cgi/viewcontent.cgi?article=1072&context=amcis2009)', _Association for Information Systems: AMCIS Proceedings_ (2009). ## University Syllabi (2011-2016 only) ### 2016 -* John Garrigus, 'Transatlantic Revolutions and Transformations', University of Texas at Arlington (http://johngarrigus.com/syllabi/5360_f2016/) +* John Garrigus, 'Transatlantic Revolutions and Transformations', University of Texas at Arlington (https://johngarrigus.com/syllabi/5360_f2016/) * Simon Dixon, University of Leicester -* Aurélien Berra, 'Classiques et numériquesLes humanités numériques dans un master d’antiquisants', Université Paris Ouest (http://classnum.hypotheses.org/) -* Shawn Graham, '[Digital History Methods as Public History Performance](http://grad.craftingdigitalhistory.ca/weekly.html)', Carleton University, Canada (Spring 2016). +* Aurélien Berra, 'Classiques et numériquesLes humanités numériques dans un master d’antiquisants', Université Paris Ouest (https://classnum.hypotheses.org/) +* Shawn Graham, '[Digital History Methods as Public History Performance](https://grad.craftingdigitalhistory.ca/weekly.html)', Carleton University, Canada (Spring 2016). * Adam Crymble, 'Intro to Digital History', University of Hertfordshire, UK (Spring 2016). * Adam Crymble, 'Digital History Workshop', University of Hertfordshire, UK (Spring 2016). ### 2015 -* Shawn Graham, 'Digital Humanities (formerly available from: http://dhcu.ca/2015/what-is-a-tool-tutorial)', Carleton University, Canada (Autumn 2015). -* Manan Ahmed, 'Borderlands: Towards a Spatial History of Empire (formerly available from: http://mananahmed.github.io/borderlands.html)', Columbia University, USA (Spring 2015). +* Shawn Graham, 'Digital Humanities (formerly available from: https://dhcu.ca/2015/what-is-a-tool-tutorial)', Carleton University, Canada (Autumn 2015). +* Manan Ahmed, 'Borderlands: Towards a Spatial History of Empire (formerly available from: https://mananahmed.github.io/borderlands.html)', Columbia University, USA (Spring 2015). * Andrew Ross, '[Exploring Digital Humanities (HIST 4170)](https://www.uoguelph.ca/history/sites/uoguelph.ca.history/files/syllabus/4170%20W15.pdf)', University of Guelph, Canada (Winter 2015). -* Adam Crymble, '[Intro to Digital History](http://adamcrymble.org/intro-to-digital-history-2015/)', University of Hertfordshire, UK (Spring 2015). -* John Russell, '[Digital Scholarship Methods](http://web.archive.org/web/20150905233647/https://library.uoregon.edu/node/4570)', University of Oregon, USA (Winter 2015). +* Adam Crymble, '[Intro to Digital History](https://adamcrymble.org/intro-to-digital-history-2015/)', University of Hertfordshire, UK (Spring 2015). +* John Russell, '[Digital Scholarship Methods](https://web.archive.org/web/20150905233647/https://library.uoregon.edu/node/4570)', University of Oregon, USA (Winter 2015). ### 2014 -* Christopher Church, 'Introduction to Digital Humanities' (University of Nevada, Reno) (http://www.christophermchurch.com/draft-for-new-course-digital-toolbox-for-historians-unr/). -* Lincoln Mullen, '[Clio 3: Programming for Historians (HIST 698)](http://lincolnmullen.com/files/clio3.syllabus.hist698.2014f.pdf)', George Mason University, USA (Autumn 2014). -* Wilko Graf von Hardenberg, '[Digital History (History 795)](http://www.wilkohardenberg.net/content/Hardenberg_DigitalHistory_Hist795.pdf)', University of Wisconsin-Madison, USA (Spring 2014). -* Christopher Church, '[Introduction to the Digital Humanities](http://www.christophermchurch.com/draft-for-new-course-digital-toolbox-for-historians-unr/)', University of Nevada - Reno, USA (2014). -* Jason A. Heppler, 'Digital History: Concepts, Methods, Problems (History 205F)' (formerly available from: http://stanford.edu/~jheppler/stanford.syllabus.hist205f.2014f.pdf), Stanford University, USA (Autumn 2014). -* Elisha E. Besherho-Bondar, '[Digital Humanities / Digital Studies](https://web.archive.org/web/20170309170558/http://www.pitt.edu/~ebb8/DHDS/)', University of Pittsburg, USA (Autumn 2014). -* Andrew M Shocket, '[Intro to DH (ACS 6820)](http://intro-dh-2014.andyschocket.net/syllabus/)', Bowling Green State University, USA (Spring 2014). -* Jeff McClurken, '[Adventures in Digital History (HIST 428)](http://dh2014.umwblogs.org/syllabus/)', University of Mary Washington, USA (Spring 2014). -* Jennifer Guiliano, '[Making/Building Digital History](http://devdh.org/files/downloads/Guiliano_Digital_History_Syllabus_Fall2014_IUPUI.pdf)', Indiana University-Purdue University Indianapolis, USA (Autumn 2014). -* Andrew J. Torget, '[Introduction to Digital Scholarship (HIST 5100)](https://web.archive.org/web/20200919093522/http://torget.us/HIST5100/syllabus/)', University of North Texas, USA (Spring 2014). -* Anne Mitchell Whisnant, 'Introduction to Public History (History 671) (formerly available from: http://publichistory.web.unc.edu/syllabus/)', UNC-Chapel Hill, USA (Autumn 2014). +* Christopher Church, 'Introduction to Digital Humanities' (University of Nevada, Reno) (https://www.christophermchurch.com/draft-for-new-course-digital-toolbox-for-historians-unr/). +* Lincoln Mullen, '[Clio 3: Programming for Historians (HIST 698)](https://lincolnmullen.com/files/clio3.syllabus.hist698.2014f.pdf)', George Mason University, USA (Autumn 2014). +* Wilko Graf von Hardenberg, '[Digital History (History 795)](https://www.wilkohardenberg.net/content/Hardenberg_DigitalHistory_Hist795.pdf)', University of Wisconsin-Madison, USA (Spring 2014). +* Christopher Church, '[Introduction to the Digital Humanities](https://www.christophermchurch.com/draft-for-new-course-digital-toolbox-for-historians-unr/)', University of Nevada - Reno, USA (2014). +* Jason A. Heppler, 'Digital History: Concepts, Methods, Problems (History 205F)' (formerly available from: https://stanford.edu/~jheppler/stanford.syllabus.hist205f.2014f.pdf), Stanford University, USA (Autumn 2014). +* Elisha E. Besherho-Bondar, '[Digital Humanities / Digital Studies](https://web.archive.org/web/20170309170558/https://www.pitt.edu/~ebb8/DHDS/)', University of Pittsburg, USA (Autumn 2014). +* Andrew M Shocket, '[Intro to DH (ACS 6820)](https://intro-dh-2014.andyschocket.net/syllabus/)', Bowling Green State University, USA (Spring 2014). +* Jeff McClurken, '[Adventures in Digital History (HIST 428)](https://dh2014.umwblogs.org/syllabus/)', University of Mary Washington, USA (Spring 2014). +* Jennifer Guiliano, '[Making/Building Digital History](https://devdh.org/files/downloads/Guiliano_Digital_History_Syllabus_Fall2014_IUPUI.pdf)', Indiana University-Purdue University Indianapolis, USA (Autumn 2014). +* Andrew J. Torget, '[Introduction to Digital Scholarship (HIST 5100)](https://web.archive.org/web/20200919093522/https://torget.us/HIST5100/syllabus/)', University of North Texas, USA (Spring 2014). +* Anne Mitchell Whisnant, 'Introduction to Public History (History 671) (formerly available from: https://publichistory.web.unc.edu/syllabus/)', UNC-Chapel Hill, USA (Autumn 2014). * Ian Milligan, '[Digital History (HIST 303)](https://ianmilli.files.wordpress.com/2014/01/w2014-hist-303.pdf)', University of Waterloo, Canada (Winter 2014). ### 2013 -* Jim English, '[Empirical Method in Literary Studies](http://web.archive.org/web/20180127231436/http://www.english.upenn.edu/~jenglish/Courses/Fall2014/505Syllabus.pdf)', University of Pennsylvania, USA (Autumn 2013). -* Melissa Bailar and Lisa Spiro, '[Introduction to Digital Humanities](http://digitalhumanities.rice.edu/fall-2013-syllabus/)', Rice University, USA (Autumn 2013). -* Devon Elliott, 'Digital History and American Popular Culture (HIST2897F)'(formerly available from: http://www.huronuc.on.ca/Assets/website/Document/FASS/HIS/HIS2897FDElliott2013.pdf), Huron College, Canada (Autumn 2013). -* Chad Black, '[Theory and Practice of Digital History](http://dh.chadblack.net/info/syllabus/)', University of Tennessee, USA (Autumn 2013). -* Aaron Shapiro, '[Seminar in Digital History and New Media (History 7970)](https://web.archive.org/web/20160121064807/http://wp.auburn.edu/dighist/?page_id=127)', Auburn University, USA (2013). +* Jim English, '[Empirical Method in Literary Studies](https://web.archive.org/web/20180127231436/https://www.english.upenn.edu/~jenglish/Courses/Fall2014/505Syllabus.pdf)', University of Pennsylvania, USA (Autumn 2013). +* Melissa Bailar and Lisa Spiro, '[Introduction to Digital Humanities](https://digitalhumanities.rice.edu/fall-2013-syllabus/)', Rice University, USA (Autumn 2013). +* Devon Elliott, 'Digital History and American Popular Culture (HIST2897F)'(formerly available from: https://www.huronuc.on.ca/Assets/website/Document/FASS/HIS/HIS2897FDElliott2013.pdf), Huron College, Canada (Autumn 2013). +* Chad Black, '[Theory and Practice of Digital History](https://dh.chadblack.net/info/syllabus/)', University of Tennessee, USA (Autumn 2013). +* Aaron Shapiro, '[Seminar in Digital History and New Media (History 7970)](https://web.archive.org/web/20160121064807/https://wp.auburn.edu/dighist/?page_id=127)', Auburn University, USA (2013). ### 2012 -* Matthew Wilkens, '[Digital Humanities (English 90127)](http://www.scottbot.net/HIAL/wp-content/uploads/2012/09/Wilkens_DH_Syllabus_Init.pdf)', Notre Dame, USA (Autumn 2012). +* Matthew Wilkens, '[Digital Humanities (English 90127)](https://www.scottbot.net/HIAL/wp-content/uploads/2012/09/Wilkens_DH_Syllabus_Init.pdf)', Notre Dame, USA (Autumn 2012). ### 2011 @@ -87,4 +87,4 @@ ___Using [the intro Jekyll lesson](/lessons/building-static-sites-with-jekyll-gi * [Stewart Varner](https://twitter.com/StewartVarner/status/722520696606298112) * [Eric Loy](https://twitter.com/eric_loy/status/758039397539409921) * [Jamie Howe](https://twitter.com/Gaymerbrarian/status/721490542366994432) -* [Will Hanley](https://twitter.com/HanleyWill/status/725880236315934720) for [prosop.org](http://prosop.org) +* [Will Hanley](https://twitter.com/HanleyWill/status/725880236315934720) for [prosop.org](https://prosop.org) diff --git a/_posts/2018-05-24-anna-maria-sichani.md b/_posts/2018-05-24-anna-maria-sichani.md index 2763a1f0c5..6d09db71c5 100644 --- a/_posts/2018-05-24-anna-maria-sichani.md +++ b/_posts/2018-05-24-anna-maria-sichani.md @@ -11,7 +11,7 @@ categories: posts We are pleased to announce that Anna-Maria Sichani has joined the *Programming Historian* project team. She is a literary and cultural historian, with a specialisation in Modern Greek studies, and a Digital Humanist. Anna-Maria is currently a Research Fellow in Media History and Historical Data Modelling, working on the AHRC-funded project "BBC Connected Histories", affiliated with the Department of Media, Film and Music at University of Sussex and Sussex Humanities Lab. -She has been a Marie Skłodowska-Curie Research Fellow (DiXiT ITN) at Huygens-ING and a PhD Research Fellow at King's Digital Lab and she has collaborated with a number of Digital Humanities projects (COST Distant Reading for European Literary History, Transcribe Bentham, DARIAH etc). Her skills include modelling, encoding and digital publication of textual and cross-domain materials, sustainable project design and management, data architecture and ​analysis. She is currently also serving as Communications Fellow for the [Alliance of Digital Humanities Organizations (ADHO)](http://adho.org/). +She has been a Marie Skłodowska-Curie Research Fellow (DiXiT ITN) at Huygens-ING and a PhD Research Fellow at King's Digital Lab and she has collaborated with a number of Digital Humanities projects (COST Distant Reading for European Literary History, Transcribe Bentham, DARIAH etc). Her skills include modelling, encoding and digital publication of textual and cross-domain materials, sustainable project design and management, data architecture and ​analysis. She is currently also serving as Communications Fellow for the [Alliance of Digital Humanities Organizations (ADHO)](https://adho.org/). She will work as an editor and contribute to our internationalization and outreach strategy. diff --git a/_posts/2018-07-13-FR-team.md b/_posts/2018-07-13-FR-team.md index 710a0bb4a0..923db3e685 100644 --- a/_posts/2018-07-13-FR-team.md +++ b/_posts/2018-07-13-FR-team.md @@ -12,12 +12,12 @@ Marie Puren and Sofia Papastamkou have joined the Programming Historian diff --git a/_posts/2018-11-09-welcome-zoe-leblanc.md b/_posts/2018-11-09-welcome-zoe-leblanc.md index 547cf9b47f..6aeaf81f1d 100644 --- a/_posts/2018-11-09-welcome-zoe-leblanc.md +++ b/_posts/2018-11-09-welcome-zoe-leblanc.md @@ -11,7 +11,7 @@ categories: posts We are pleased to share that Zoe LeBlanc has joined the editorial board of _The Programming Historian_. -Zoe is a digital humanities developer at the [Scholars’ Lab](http://scholarslab.org) at the University of Virginia. In this position she regularly works on building large data pipelines for humanities text data, web apps for mapping and data visualization, and statistical modeling for text and image analysis projects in addition to teaching and mentoring students and faculty. +Zoe is a digital humanities developer at the [Scholars’ Lab](https://scholarslab.org) at the University of Virginia. In this position she regularly works on building large data pipelines for humanities text data, web apps for mapping and data visualization, and statistical modeling for text and image analysis projects in addition to teaching and mentoring students and faculty. She is also entering her final year as a doctoral candidate in history at Vanderbilt University. Her dissertation, “Circulating Anti-Colonial Cairo”, is about Cairo’s role as a hub for international anti-colonial movements and anti-colonial media production. She explores how the establishment of the Middle East News Agency in Cairo in 1955 was a vehicle for the UAR to present a counter narrative to the perceived biased coverage from Western press agencies. Ultimately, she argues that anti-colonialism was both locally constructed and deeply enmeshed in international debates over the future of decolonization. Zoe works primarily in print newspapers and periodicals from Cairo and other Third World capitals, as well as diplomatic cables from Western embassies in Cairo and Western newspapers. Under the hood of the project, Zoe relies on a web app she built to manage her data, and extract text and images from her archival research. She also utilizes statistical models, machine learning, natural language processing and computer vision algorithms to understand how discourses and symbolism in these sources changed over time. diff --git a/_posts/2019-04-08-bienvenue-ph-fr.md b/_posts/2019-04-08-bienvenue-ph-fr.md index e3bf262d43..26907cddd1 100644 --- a/_posts/2019-04-08-bienvenue-ph-fr.md +++ b/_posts/2019-04-08-bienvenue-ph-fr.md @@ -8,14 +8,14 @@ categories: posts

Schéma de Paul Otlet: documentation et télécommunication, Mundaneum, Centre d'archives de la Fédération Wallonie-Bruxelles, Mons, Belgique. © Mundaneum

-Le _[Programming Historian en français](http://programminghistorian.org/fr)_ est en ligne! Lancée deux ans après l'inauguration d'une édition hispanophone, qui rencontre un franc succès, la version francophone confirme ainsi [l'internationalisation en cours](https://programminghistorian.org/fr/apropos#histoire-du-projet) d'un projet initialement anglophone. Nous proposons des tutoriels ouvertement évalués par les pairs qui peuvent aider les historien(ne)s, mais aussi d'autres chercheurs et chercheuses en sciences humaines et sociales, à s'approprier des méthodes interdisciplinaires, des outils numériques et des flux de travail leur permettant de conduire des recherches et de mener des enseignements avec utilisation de données numériques. La diversité étant au coeur du projet global, toutes les variations de la langue française sont acceptées. Par ailleurs, le _Programming Historian en français_ s'engage en faveur de l'utilisation de l'écriture inclusive suivant les principes arrêtés par l'Office québécois de la langue française. +Le _[Programming Historian en français](https://programminghistorian.org/fr)_ est en ligne! Lancée deux ans après l'inauguration d'une édition hispanophone, qui rencontre un franc succès, la version francophone confirme ainsi [l'internationalisation en cours](https://programminghistorian.org/fr/apropos#histoire-du-projet) d'un projet initialement anglophone. Nous proposons des tutoriels ouvertement évalués par les pairs qui peuvent aider les historien(ne)s, mais aussi d'autres chercheurs et chercheuses en sciences humaines et sociales, à s'approprier des méthodes interdisciplinaires, des outils numériques et des flux de travail leur permettant de conduire des recherches et de mener des enseignements avec utilisation de données numériques. La diversité étant au coeur du projet global, toutes les variations de la langue française sont acceptées. Par ailleurs, le _Programming Historian en français_ s'engage en faveur de l'utilisation de l'écriture inclusive suivant les principes arrêtés par l'Office québécois de la langue française. A l'heure actuelle, nous nous employons principalement à traduire les tutoriels déjà publiés. Nous vous invitons à proposer une traduction, si celle-ci peut par exemple vous être utile dans le cadre de vos enseignements, ou de vous manifester, si vous souhaitez contribuer soit en traduisant soit en évaluant des traductions de tutoriels. Vous pouvez en outre proposer une leçon originale, en espérant que vous serez de plus en plus nombreux et nombreuses à le faire. N'hésitez pas à faire un tour sur [notre page dédiée](https://programminghistorian.org/fr/contribuer) pour vous renseigner sur toutes les manières possibles qui s'offrent à ceux et à celles qui souhaitent contribuer au projet. Nous avons hâte de recevoir vos propositions! Depuis ses débuts, l'inititiative fracophone a pu compter sur l'appui d'une communauté. Nous tenons ainsi à remercier chaleureusement [ceux et celles qui ont contibué avec des traductions et/ou des relectures](https://github.com/programminghistorian/ph-submissions/issues?q=is%3Aissue+is%3Aopen+label%3AFrench), dont le master 2 "Technologies numériques appliquées à l'histoire" de l'École nationale des chartes qui a fourni un corpus de traductions préparées par les étudiant(e)s dans le cadre de leurs cours d'anglais. Nous remercions aussi tous les amis qui ont apporté leur petite pierre lors des [échanges sur le titre et le logo de la version francophone](https://github.com/programminghistorian/jekyll/issues/850). -Intégrer une nouvelle langue dans le _Programming Historian_, outre l'anglais et l'espagnol qui en faisaient déjà partie, a nécessité un effort considérable de la part de l'équipe technique du projet, en particulier de [Matthew Lincoln](https://github.com/mdlincoln). Il a pu consacrer sont temps au développement du projet francophone grâce à une subvention non-financière du centre [dSHARP lab at Carnegie Mellon University](http://dsharp.library.cmu.edu/) et de la [Fondation Andrew W. Mellon](https://mellon.org/), que nous remercions chaleureusement pour leur aide. +Intégrer une nouvelle langue dans le _Programming Historian_, outre l'anglais et l'espagnol qui en faisaient déjà partie, a nécessité un effort considérable de la part de l'équipe technique du projet, en particulier de [Matthew Lincoln](https://github.com/mdlincoln). Il a pu consacrer sont temps au développement du projet francophone grâce à une subvention non-financière du centre [dSHARP lab at Carnegie Mellon University](https://dsharp.library.cmu.edu/) et de la [Fondation Andrew W. Mellon](https://mellon.org/), que nous remercions chaleureusement pour leur aide. Un dernier mot pour souligner à quel point le lancement du _Programming Historian en français_ a été une réalisation véritablement collective au sein du projet global. Ainsi, le travail intensif de l'équipe francophone a bénéficié grandement de l'expérience de son homologue hispanophone, mais aussi du soutien sans faille des membres de longue date du projet anglophone. Leurs compétences techniques et pédagogiques ont facilité l'intégration d'une nouvelle équipe et l'acculturation aux pratiques de la gestion d'un projet à accès ouvert sur Github. Un grand merci à toutes et à tous pour ce beau travail d'équipe. Nous sommes ravi(e)s de faire partie de ce projet global et nous avons hâte pour la suite! -_Merci au Mundaneum, Centre d'archives de la Fédération Wallonie-Bruxelles, en Belgique, d'avoir permis l'utilisation de [l'image du schéma de Paul Otlet: documentation et télécommunication](http://archives.mundaneum.org/fr/versions-digitalisees/schema-de-paul-otlet-documentation-et-telecommunication). © Mundaneum_ +_Merci au Mundaneum, Centre d'archives de la Fédération Wallonie-Bruxelles, en Belgique, d'avoir permis l'utilisation de [l'image du schéma de Paul Otlet: documentation et télécommunication](https://archives.mundaneum.org/fr/versions-digitalisees/schema-de-paul-otlet-documentation-et-telecommunication). © Mundaneum_ diff --git a/_posts/2019-04-08-welcome-ph-fr.md b/_posts/2019-04-08-welcome-ph-fr.md index 706c3aeb54..782d43518b 100644 --- a/_posts/2019-04-08-welcome-ph-fr.md +++ b/_posts/2019-04-08-welcome-ph-fr.md @@ -8,14 +8,14 @@ categories: posts

Paul Otlet's scheme on information and distant communication, Mundaneum, Archive Centre of the French Community of Wallonia-Brussels, Mons, Belgium. © Mundaneum

-_The Programming Historian en français_ is on air! You can find us on . The launch of a francophone version comes two years after the inauguration of a successful Spanish-speaking edition, and confirms the [ongoing internationalisation](https://programminghistorian.org/en/about#history-of-the-project) of the original English-speaking project. We propose open peer-reviewed tutorials that aim at helping historians and other humanists to gain familiarity with interdisciplinary methods, digital tools, and workflows that enable them to conduct research projects and teaching duties with use of digital data. As diversity is at the core of the global project, all variations of the French language are accepted. Moreover, _The Programming Historian en français_ is committed to the use of the gender-inclusive writing following the principles set by the Office québécois de la langue française. +_The Programming Historian en français_ is on air! You can find us on . The launch of a francophone version comes two years after the inauguration of a successful Spanish-speaking edition, and confirms the [ongoing internationalisation](https://programminghistorian.org/en/about#history-of-the-project) of the original English-speaking project. We propose open peer-reviewed tutorials that aim at helping historians and other humanists to gain familiarity with interdisciplinary methods, digital tools, and workflows that enable them to conduct research projects and teaching duties with use of digital data. As diversity is at the core of the global project, all variations of the French language are accepted. Moreover, _The Programming Historian en français_ is committed to the use of the gender-inclusive writing following the principles set by the Office québécois de la langue française. We currently work principally on translations of the existing tutorials. If you wish to suggest an existing lesson to translate, for example for use in your classroom, or if you want to contribute yourself either translating or reviewing an existing translation, or even proposing an original lesson, please let us know. You can find more information on how to [contribute](https://programminghistorian.org/fr/contribuer) in the dedicated page of our web site. We are eager to receive your proposals! The francophone project has found spontaneous support from the community since its beginnings. We thank warmly [all volunteers who acted either as translators or reviewers](https://github.com/programminghistorian/ph-submissions/issues?q=is%3Aissue+is%3Aopen+label%3AFrench), and the students of the master "Digital Technologies Applied to History" of the École nationale des chartes, Paris, France for offering a corpus of translations prepared as an exercice for their English class. A big thanks also to [all friends who participated in our exchanges regarding the title and the logo of the francophone version](https://github.com/programminghistorian/jekyll/issues/850). -Preparing _The Programming Historian_ to accommodate languages beyond English and Spanish required extended effort from our technical team, in particular from [Matthew Lincoln](https://github.com/mdlincoln), whose development time was supported through an in-kind grant from the [dSHARP lab at Carnegie Mellon University](http://dsharp.library.cmu.edu/) and the [Andrew W. Mellon Foundation](https://mellon.org/). We express all of our gratitude for their help. +Preparing _The Programming Historian_ to accommodate languages beyond English and Spanish required extended effort from our technical team, in particular from [Matthew Lincoln](https://github.com/mdlincoln), whose development time was supported through an in-kind grant from the [dSHARP lab at Carnegie Mellon University](https://dsharp.library.cmu.edu/) and the [Andrew W. Mellon Foundation](https://mellon.org/). We express all of our gratitude for their help. Last but not least: the launch of _The Programming Historian en français_ is a team achievement in the sense that the intensive work of the French-speaking sub-team was greatly helped by exchanges with its Spanish-speaking counterpart, and the continuing support from more experienced members of the global team. Their technical and pedagogical skills counted a lot for a new sub-team to integrate and get familiar with running an open-access project on Github. We are grateful to them for this beautiful team accomplishment. We are thrilled to be part of this global project and look forward for what comes next! -_We are grateful to Mundaneum, Archive Centre of the French Community of Wallonia-Brussels, Mons, Belgium, for allowing use of [the digitized image of Paul Otlet's scheme on information and distant communication](http://archives.mundaneum.org/fr/versions-digitalisees/schema-de-paul-otlet-documentation-et-telecommunication). © Mundaneum_ +_We are grateful to Mundaneum, Archive Centre of the French Community of Wallonia-Brussels, Mons, Belgium, for allowing use of [the digitized image of Paul Otlet's scheme on information and distant communication](https://archives.mundaneum.org/fr/versions-digitalisees/schema-de-paul-otlet-documentation-et-telecommunication). © Mundaneum_ diff --git a/_posts/2019-06-30-boletin-informativo.md b/_posts/2019-06-30-boletin-informativo.md index a94fa596ca..d721a737b4 100644 --- a/_posts/2019-06-30-boletin-informativo.md +++ b/_posts/2019-06-30-boletin-informativo.md @@ -14,7 +14,7 @@ En este primer boletín informativo nos gustaría destacar los siguientes logros ## Publicación de PH en francés -Le [*Programming Historian en français*](http://programminghistorian.org/fr) est en ligne!  +Le [*Programming Historian en français*](https://programminghistorian.org/fr) est en ligne!  Con el propósito de expandir nuestro proyecto en el ámbito internacional, *PH en français* fue publicado oficialmente en abril de 2019. Añadir un tercer idioma a la revista solo fue posible gracias al arduo trabajo y la disciplina del equipo editorial francés y de nuestro equipo técnico. El equipo francófono trabajó durante más de diez meses en la traducción de la infraestructura de la revista mientras que el equipo técnico reestructuraba la web para ser trilingüe. @@ -71,4 +71,4 @@ Con el objetivo de poder publicar más lecciones originales en español y contin ## Próximos eventos Los próximos 8 y 11 de julio Antonio Rojas Castro, Anna-Maria Sichani y Sofia Papastamkou presentarán su poster "Designing Multilingual Digital Pedagogy Initiatives: The Programming Historian for English, Spanish, and French speaking DH Communities" o "El diseño de iniciativas pedagógicas digitales y multilingües: Programming Historian para las comunidades HD hablantes de inglés, español y francés" en la [**conferencia DH 2019**](https://dh2019.adho.org) en Utrecht, Países Bajos. Además, participarán en el evento "Accelerating DH Education" previo a la conferencia con un taller titulado "Three Challenges in Developing Open Multilingual DH Educational Resources: The Case of The Programming Historian" o "Tres retos en la producción de recursos multilingües en abierto de HD: El caso de Programming Historian". -Parte del equipo de *Programming Historian en español* estará en la [**conferencia ACH 2019**](http://ach2019.ach.org) el 25 de julio en Pittsburgh, Estados Unidos. Maria José Afanador-Llach, Jennifer Isasi y Antonio Rojas Castro presentarán su trabajo "Retos en la producción de tutoriales de HD en contextos hispanohablantes." +Parte del equipo de *Programming Historian en español* estará en la [**conferencia ACH 2019**](https://ach2019.ach.org) el 25 de julio en Pittsburgh, Estados Unidos. Maria José Afanador-Llach, Jennifer Isasi y Antonio Rojas Castro presentarán su trabajo "Retos en la producción de tutoriales de HD en contextos hispanohablantes." diff --git a/_posts/2019-06-30-buletin-de-information.md b/_posts/2019-06-30-buletin-de-information.md index 2e51698ef4..d4d0f1bd3f 100644 --- a/_posts/2019-06-30-buletin-de-information.md +++ b/_posts/2019-06-30-buletin-de-information.md @@ -12,7 +12,7 @@ categories: posts ## Le PH en français est en ligne! -Le [*Programming Historian en français*](http://programminghistorian.org/fr) est en ligne!  +Le [*Programming Historian en français*](https://programminghistorian.org/fr) est en ligne!  Dans le but d'élargir notre projet sur le plan international, le *Programming Historian en français* a été officiellement lancé en avril 2019. Cette troisième version de notre revue n'a été possible que grâce au travail assidu et discipliné du comité de rédaction français et de notre équipe technique. Sofia Papastamkou, Marie Puren et François Dominic Laramée ont travaillé pendant plus de dix mois sur la traduction de l’infrastructure du site, que le travail en parallèle de l'équipe technique a permis de restructurer pour le rendre trilingue. @@ -73,4 +73,4 @@ Pour atteindre nos objectifs, à savoir la publication à la fois des leçons or ## Évènements à venir Lors de la [**conférence DH 2019**](https://dh2019.adho.org) qui aura lieu à Utrecht, Pays-Bas, Antonio Rojas Castro, Anna-Maria Sichani et Sofia Papastamkou présenteront le 11 juillet leur poster "Designing Multilingual Digital Pedagogy Initiatives: The Programming Historian for English, Spanish, and French speaking DH Communities" (Concevoir des initiatives de pédagogie numérique multilingue: le *Programming Historian* pour les communautés anglophones, hispanophones et francophones des humanités numériques). Avant la conférence, la même équipe participe également, le 8 juillet, à l'atelier "Accelerating DH Education" (Accélérant l'éducation en humanités numériques) avec une présentation intitulée "Three Challenges in Developing Open Multilingual DH Educational Resources: The Case of The Programming Historian" (Trois défis pour le développement de ressources éducationnelles multilingues en humanités numériques: le cas du Programming Historian". -En outre, une partie de l'équipe du *Programming Historian en español* participe à la [**conférence ACH 2019**](http://ach2019.ach.org), le 25 juillet, à Pittsburgh, Pennsylvanie, aux Etats-Unis. Plus particulièrement, Maria José Afanador-Llach, Jennifer Isasi et Antonio Rojas Castro présentent "Retos en la producción de tutoriales de HD en contextos hispanohablantes" ou "Les défis de la production de leçons en humanités numériques dans des contextes hispanophones". +En outre, une partie de l'équipe du *Programming Historian en español* participe à la [**conférence ACH 2019**](https://ach2019.ach.org), le 25 juillet, à Pittsburgh, Pennsylvanie, aux Etats-Unis. Plus particulièrement, Maria José Afanador-Llach, Jennifer Isasi et Antonio Rojas Castro présentent "Retos en la producción de tutoriales de HD en contextos hispanohablantes" ou "Les défis de la production de leçons en humanités numériques dans des contextes hispanophones". diff --git a/_posts/2019-06-30-mid-year-newsletter.md b/_posts/2019-06-30-mid-year-newsletter.md index 82c581ee89..19a14df4c2 100644 --- a/_posts/2019-06-30-mid-year-newsletter.md +++ b/_posts/2019-06-30-mid-year-newsletter.md @@ -14,7 +14,7 @@ In this first newsletter we would like to highlight the following achievements, ## Launch of PH in French -Le [*Programming Historian en français*](http://programminghistorian.org/fr) est en ligne!  +Le [*Programming Historian en français*](https://programminghistorian.org/fr) est en ligne!  In order to expand our project internationally, *Programming Historian en français* [officially launched](https://programminghistorian.org/posts/welcome-ph-fr) in April 2019. The addition of a third language was only possible thanks to the hard work of the French editorial board and our technical team. Sofia Papastamkou, Marie Puren and François Dominic Laramée worked for more than ten months on the translation of the journal’s infrastructure before its launch. At the same time, the technical team restructured and made the site trilingual. @@ -73,4 +73,4 @@ In order to achieve our goals of publishing more original lessons in Spanish and ## Upcoming Events On July 11, Antonio Rojas Castro, Anna-Maria Sichani and Sofia Papastamkou will present their poster "Designing Multilingual Digital Pedagogy Initiatives: The Programming Historian for English, Spanish, and French speaking DH Communities" at the [**DH 2019 Conference**](https://dh2019.adho.org) in Utrecht, The Netherlands. They also participate to the pre-conference workshop "Accelerating DH Education", on July 8, where they present "Three Challenges in Developing Open Multilingual DH Educational Resources: The Case of The Programming Historian". -Part of the team of *The Programming Historian en español* will present at the [**ACH 2019 Conference**](http://ach2019.ach.org) on July 25, in Pittsburgh. Maria José Afanador-Llach, Jennifer Isasi and Antonio Rojas Castro are presenting "Retos en la producción de tutoriales de HD en contextos hispanohablantes" or "Challenges in DH lesson production in Spanish speaking contexts." +Part of the team of *The Programming Historian en español* will present at the [**ACH 2019 Conference**](https://ach2019.ach.org) on July 25, in Pittsburgh. Maria José Afanador-Llach, Jennifer Isasi and Antonio Rojas Castro are presenting "Retos en la producción de tutoriales de HD en contextos hispanohablantes" or "Challenges in DH lesson production in Spanish speaking contexts." diff --git a/_posts/2019-07-09-premio-hdh-2018.md b/_posts/2019-07-09-premio-hdh-2018.md index 1d1f2862e9..504c62d030 100644 --- a/_posts/2019-07-09-premio-hdh-2018.md +++ b/_posts/2019-07-09-premio-hdh-2018.md @@ -9,9 +9,9 @@ categories: posts

Programming Historian en español recibe el premio a Mejor iniciativa formativa desarrollada durante el año 2018 de la HDH.

-¡Estamos de enhorabuena! El trabajo del equipo de *Programming Historian en español* ha sido reconodio como **"Mejor iniciativa formativa desarrollada durante el año 2018"** en la [I Edición de los Premios HDH 2018](http://humanidadesdigitaleshispanicas.es/resolucion-convocatoria-i-edicion-premios-hdh/), otorgados por la Asociación de Humanidades Digitales Hispánicas. +¡Estamos de enhorabuena! El trabajo del equipo de *Programming Historian en español* ha sido reconodio como **"Mejor iniciativa formativa desarrollada durante el año 2018"** en la [I Edición de los Premios HDH 2018](https://humanidadesdigitaleshispanicas.es/resolucion-convocatoria-i-edicion-premios-hdh/), otorgados por la Asociación de Humanidades Digitales Hispánicas. -Con estos premios la [Asociación HDH](http://humanidadesdigitaleshispanicas.es) ha querido reconocer social y académicamente las diferentes iniciativas desarrolladas en el ámbito de las humanidades digitales desde las bases de la contribución al avance del conocimiento, la reflexión crítica, el desarrollo de currículos formativos y la difusión y visibilidad de las HD, entre otras cosas. +Con estos premios la [Asociación HDH](https://humanidadesdigitaleshispanicas.es) ha querido reconocer social y académicamente las diferentes iniciativas desarrolladas en el ámbito de las humanidades digitales desde las bases de la contribución al avance del conocimiento, la reflexión crítica, el desarrollo de currículos formativos y la difusión y visibilidad de las HD, entre otras cosas. El premio será entregado el 23 de octubre en el marco del [IV Congreso de la HDH (HDH2019)](https://eventos.uclm.es/24964/detail/iv-congreso-internacional-de-la-asociacion-de-humanidades-digitales-hispanicas.html) en la ciudad de Toledo, España. Nuestro compañero Antonio Rojas Castro, que estará presentando el trabajo "*The Programming Historian en español*: de la traducción a la creación de recursos educativos abiertos" escrito junto con Jennifer Isasi, será el encargado de recibir el galardón. diff --git a/_posts/2019-09-26-welcome-martin-grandjean.md b/_posts/2019-09-26-welcome-martin-grandjean.md index 56e29e2490..3a01adec19 100644 --- a/_posts/2019-09-26-welcome-martin-grandjean.md +++ b/_posts/2019-09-26-welcome-martin-grandjean.md @@ -9,7 +9,7 @@ categories: posts We are excited to announce that Martin Grandjean joined the editorial board of the *Programming Historian*! -Martin is currently a junior lecturer in contemporary history at the University of Lausanne. He also teaches history and digital humanities at the [Ecole polytechnique fédérale de Lausanne](https://people.epfl.ch/303254). He is a member of the board of Humanistica, the French-speaking association for Digital Humanities, and the Steering Committee of the Alliance of Digital Humanities Organizations (ADHO). His main research domain is network analysis in history with a focus on intellectual history and history of international organizations. His recent publications range from the [analysis of the digital humanities community on Twitter](https://hal.archives-ouvertes.fr/hal-01517493/document) to the study of specific [historical networks](http://www.martingrandjean.ch/complex-structures-and-international-organizations/) and theoretical contributions on [the use of network analysis in history](https://halshs.archives-ouvertes.fr/halshs-02179024/document). You can find out more on Martin's work on his personal [blog](http://www.martingrandjean.ch). +Martin is currently a junior lecturer in contemporary history at the University of Lausanne. He also teaches history and digital humanities at the [Ecole polytechnique fédérale de Lausanne](https://people.epfl.ch/303254). He is a member of the board of Humanistica, the French-speaking association for Digital Humanities, and the Steering Committee of the Alliance of Digital Humanities Organizations (ADHO). His main research domain is network analysis in history with a focus on intellectual history and history of international organizations. His recent publications range from the [analysis of the digital humanities community on Twitter](https://hal.archives-ouvertes.fr/hal-01517493/document) to the study of specific [historical networks](https://www.martingrandjean.ch/complex-structures-and-international-organizations/) and theoretical contributions on [the use of network analysis in history](https://halshs.archives-ouvertes.fr/halshs-02179024/document). You can find out more on Martin's work on his personal [blog](https://www.martingrandjean.ch). Martin will reinforce the *Programming Historian en français* where he will contribute as an editor and translator. We are all eager to work with him on further explanding the French-speaking project and wish him a very warm welcome. Sois le bienvenu, Martin! diff --git a/_posts/2019-09-30-bolentin-informativo.md b/_posts/2019-09-30-bolentin-informativo.md index f8f19372e1..774a4e18f0 100644 --- a/_posts/2019-09-30-bolentin-informativo.md +++ b/_posts/2019-09-30-bolentin-informativo.md @@ -56,7 +56,7 @@ Al mismo tiempo, participaron en un taller pre-conferencia titulado "La acelerac -Unos días más tarde y al otro lado del charco en la [Conferencia ACH 2019](http://ach2019.ach.org) en Pittsburgh, Estados Unidos, Maria José Afanador-Llach y Jennifer Isasi presentaron "Retos en la producción de tutoriales de HD en contextos hispanohablantes." Se enfocaaron en la experienca de llegar a la audiencia de *Programming Historian en español.* Asistir a la conferencia también les dio la oportunidad de conocer a Matthew Lincoln y Brandon Walsh además de presentar el proyecto multilingüe a más personas. +Unos días más tarde y al otro lado del charco en la [Conferencia ACH 2019](https://ach2019.ach.org) en Pittsburgh, Estados Unidos, Maria José Afanador-Llach y Jennifer Isasi presentaron "Retos en la producción de tutoriales de HD en contextos hispanohablantes." Se enfocaaron en la experienca de llegar a la audiencia de *Programming Historian en español.* Asistir a la conferencia también les dio la oportunidad de conocer a Matthew Lincoln y Brandon Walsh además de presentar el proyecto multilingüe a más personas.

Una foto de Brandon Walsh, Matthew Lincoln, María José Afanador-Llach y Jennifer Isasi en ACH 2019.
Brandon Walsh, Matthew Lincoln, María José Afanador-Llach y Jennifer Isasi en ACH 2019.

@@ -68,7 +68,7 @@ Unos días más tarde y al otro lado del charco en la [Conferencia ACH 2019](htt ## PH en español recibe el Premio HDH 2018 -La [Asociación de Humanidades Digitales Hispánicas ha premiado a *Programming Historian en español*](https://programminghistorian.org/posts/premio-hdh-2018)) como una de la **“Mejor iniciativa formativa desarrollada durante el año 2018”** en su [I Edición de los Premios HDH 2018](http://humanidadesdigitaleshispanicas.es/resolucion-convocatoria-i-edicion-premios-hdh/). +La [Asociación de Humanidades Digitales Hispánicas ha premiado a *Programming Historian en español*](https://programminghistorian.org/posts/premio-hdh-2018)) como una de la **“Mejor iniciativa formativa desarrollada durante el año 2018”** en su [I Edición de los Premios HDH 2018](https://humanidadesdigitaleshispanicas.es/resolucion-convocatoria-i-edicion-premios-hdh/). El objetivo de este premio es reconocer social y académicamente las diferentes iniciativas desarrolladas desde las bases de la contribución al avance del conocimiento, la reflexión crítica, el desarrollo de currículos formativos y la difusión y visibilidad de las humanidades digitales en español. Nos gustaría de nuevo agradecer a la Junta Directiva de HDH su decisión y confianza en nosotros. diff --git a/_posts/2019-09-30-buletin-de-information.md b/_posts/2019-09-30-buletin-de-information.md index 685eb3ece3..6101c3c5d8 100644 --- a/_posts/2019-09-30-buletin-de-information.md +++ b/_posts/2019-09-30-buletin-de-information.md @@ -50,7 +50,7 @@ Antonio Rojas Castro, Anna-Maria Sichani et Sofia Papastamkou ont présenté leu


 Une photo d'Anna-Maria Sichani et Sofia Papastamkou à côté de leur affiche à DH 2019.
Anna-Maria Sichani et Sofia Papastamkou présentent une affiche à DH 2019.

-Quelques jours plus tard, lors de la [conférence ACH 2019](http://ach2019.ach.org) à Pittsburgh, de l'autre côté de l'océan, Maria José Afanador-Llach et Jennifer Isasi ont présenté "Retouches sur les tutoriels en HD et les contextes hispanophones" (en anglais), qui mettaient l'accent sur l'expérience publique de la *Programming Historian en español*. La participation à la conférence leur a permis de rencontrer Matthew Lincoln et Brandon Walsh, ainsi que de familiariser davantage de personnes au projet multilingue. +Quelques jours plus tard, lors de la [conférence ACH 2019](https://ach2019.ach.org) à Pittsburgh, de l'autre côté de l'océan, Maria José Afanador-Llach et Jennifer Isasi ont présenté "Retouches sur les tutoriels en HD et les contextes hispanophones" (en anglais), qui mettaient l'accent sur l'expérience publique de la *Programming Historian en español*. La participation à la conférence leur a permis de rencontrer Matthew Lincoln et Brandon Walsh, ainsi que de familiariser davantage de personnes au projet multilingue.

Une photo de Brandon Walsh, Matthew Lincoln, María José Afanador-Llach et Jennifer Isasi à ACH 2019.
Brandon Walsh, Matthew Lincoln, María José Afanador-Llach et Jennifer Isasi se rencontrent à ACH 2019.

Félicitations à tous ceux qui ont présenté des conférences cet été! diff --git a/_posts/2019-09-30-newsletter.md b/_posts/2019-09-30-newsletter.md index a250b52d72..fe19b21aeb 100644 --- a/_posts/2019-09-30-newsletter.md +++ b/_posts/2019-09-30-newsletter.md @@ -54,7 +54,7 @@ Antonio Rojas Castro, Anna-Maria Sichani and Sofia Papastamkou presented their p

A photo of Anna-Maria Sichani and Sofia Papastamkou next to their poster at DH 2019.
Anna-Maria Sichani and Sofia Papastamkou present a poster at DH 2019.

-A few days later, and on the other side of the pond at the [**ACH 2019 Conference**](http://ach2019.ach.org) in Pittsburgh, Maria José Afanador-Llach and Jennifer Isasi presented "Retos en la producción de tutoriales de HD en contextos hispanohablantes" which focused on the experience of reaching to the audience of *Programming Historian en español*. Attending the conference gave them the opportunity to also meet Matthew Lincoln and Brandon Walsh as well as introducing more people to the multilingual project. +A few days later, and on the other side of the pond at the [**ACH 2019 Conference**](https://ach2019.ach.org) in Pittsburgh, Maria José Afanador-Llach and Jennifer Isasi presented "Retos en la producción de tutoriales de HD en contextos hispanohablantes" which focused on the experience of reaching to the audience of *Programming Historian en español*. Attending the conference gave them the opportunity to also meet Matthew Lincoln and Brandon Walsh as well as introducing more people to the multilingual project.

A photo of Brandon Walsh, Matthew Lincoln, María José Afanador-Llach and Jennifer Isasi at ACH 2019.
Brandon Walsh, Matthew Lincoln, María José Afanador-Llach and Jennifer Isasi meet at ACH 2019.

diff --git a/_posts/2020-02-26-merci-les-amis.md b/_posts/2020-02-26-merci-les-amis.md index eaf7d34181..5e3bb8c30a 100644 --- a/_posts/2020-02-26-merci-les-amis.md +++ b/_posts/2020-02-26-merci-les-amis.md @@ -15,6 +15,6 @@ Nous étions trois lorsque la version francophone a vu le jour; nous sommes à p À l'heure actuelle, nous avons mis en ligne sept traductions de tutoriels à l'origine publiés en anglais; une de plus le sera prochainement et d'autres sont en cours ou en attente de relecture. Pour le moment, l'ensemble de ce travail a impliqué pas moins de 17 contributeurs et contributrices externes à l'équipe éditoriale. Nous souhaitons rebondir sur cet aspect car, dès le début, le Programming Historian en français a pu compter sur la bonne volonté d'une communauté qui s'est manifestée spontanément. -Ainsi, dès sa formation, l'équipe francophone a reçu deux importantes contributions. L'une venait de l'équipe du Master 2 ["Technologies numériques appliquées à l'histoire"](http://www.chartes.psl.eu/fr/rubrique-admissions/master-technologies-numeriques-appliquees-histoire) de l'École nationale des chartes, Paris, France, qui nous a fourni un corpus de traductions de tutoriels préparées par les étudiant(e)s dans le cadre de leur cours d'anglais et nous autorisant à les publier. Merci à Thibault Clérice et Meg Roussel pour cette proposition, ainsi qu'à Sybille Clochet et Alix Chagué - pour l'instant! - pour leur travail supplémentaire. La deuxième contribution émanait de Sylvain Machefert, qui a mis gentiment à notre disposition trois traductions préalablement préparées par ses soins. À l'annonce de la mise en ligne du Programming Historian en français, nous avons reçu davantage de propositions spontanées de traductions de tutoriels qui avaient servi ou intéressaient d'une manière ou d'une autre les instigateurs de ces propositions: Hugo Bonin, Anne-Sophie Besserro-Lagarde, Géraldine Castel, Antoine Gourlay ont toute notre reconaissance. Enfin, nous sommes redevables à nos relecteurs et relectrices d'avoir consacré leur temps et énergie pour peaufiner, à la fois sur la forme et sur le fond, ce travail de traduction: Alix Chagué, Frédéric Clavert, Isabelle Gribomont, Fanny Mezard, Sylvain Machefert, Marie Puren, Antoine Champigny, Antoine Courtin, Thomas Soubiran, Catherine Paulin, Florian Cafiero, et Déborah Dubald. +Ainsi, dès sa formation, l'équipe francophone a reçu deux importantes contributions. L'une venait de l'équipe du Master 2 ["Technologies numériques appliquées à l'histoire"](https://www.chartes.psl.eu/fr/rubrique-admissions/master-technologies-numeriques-appliquees-histoire) de l'École nationale des chartes, Paris, France, qui nous a fourni un corpus de traductions de tutoriels préparées par les étudiant(e)s dans le cadre de leur cours d'anglais et nous autorisant à les publier. Merci à Thibault Clérice et Meg Roussel pour cette proposition, ainsi qu'à Sybille Clochet et Alix Chagué - pour l'instant! - pour leur travail supplémentaire. La deuxième contribution émanait de Sylvain Machefert, qui a mis gentiment à notre disposition trois traductions préalablement préparées par ses soins. À l'annonce de la mise en ligne du Programming Historian en français, nous avons reçu davantage de propositions spontanées de traductions de tutoriels qui avaient servi ou intéressaient d'une manière ou d'une autre les instigateurs de ces propositions: Hugo Bonin, Anne-Sophie Besserro-Lagarde, Géraldine Castel, Antoine Gourlay ont toute notre reconaissance. Enfin, nous sommes redevables à nos relecteurs et relectrices d'avoir consacré leur temps et énergie pour peaufiner, à la fois sur la forme et sur le fond, ce travail de traduction: Alix Chagué, Frédéric Clavert, Isabelle Gribomont, Fanny Mezard, Sylvain Machefert, Marie Puren, Antoine Champigny, Antoine Courtin, Thomas Soubiran, Catherine Paulin, Florian Cafiero, et Déborah Dubald. Forte de ces soutiens, l'équipe du Programming Historian en français est optimiste pour l'avenir, surtout que produire des leçons originales en français est notre prochain but. Un sincère merci, à nouveau, pour toutes les contributions passées, en cours et à venir! diff --git a/_posts/2020-05-04-call-for-editors.md b/_posts/2020-05-04-call-for-editors.md index 063d6eceae..c911dba096 100644 --- a/_posts/2020-05-04-call-for-editors.md +++ b/_posts/2020-05-04-call-for-editors.md @@ -13,7 +13,7 @@ Launched in 2012, *The Programming Historian* offers more than 80 novice-friendl - Lincoln Mullen, '[Review of the Programming Historian](https://academic.oup.com/jah/article-abstract/103/1/299/1751315)', *The Journal of American History*, vol. 103, no. 1 (2016), pp. 299-301. -- Cameron Blevins, '[Review of the Programming Historian](http://jitp.commons.gc.cuny.edu/review-of-the-programming-historian/)', *The Journal of Interactive Technology & Pedagogy*, vol. 8 (2015). +- Cameron Blevins, '[Review of the Programming Historian](https://jitp.commons.gc.cuny.edu/review-of-the-programming-historian/)', *The Journal of Interactive Technology & Pedagogy*, vol. 8 (2015). Interested candidates should submit a 1-page expression of interest outlining your interests, experience, and vision for the publication, to Sarah Melton (sarah.melton@bc.edu) by 1 June 2020. Please direct any questions to Sarah in the first instance. Please note that this is a VOLUNTEER ACADEMIC SERVICE POSITION and there is no salary or stipend associated with this role. diff --git a/_posts/2021-04-01-newsletter-april-21.md b/_posts/2021-04-01-newsletter-april-21.md index ca037a54f6..0284effd19 100644 --- a/_posts/2021-04-01-newsletter-april-21.md +++ b/_posts/2021-04-01-newsletter-april-21.md @@ -16,7 +16,7 @@ As usual, none of these achievements would have been possible without the suppor These first months of the year have been specially busy with virtual events, as we have presented our work on *Programming Historian* in different venues all over. Nos gustaría en especial felicitar y agradecer a nuestra editora Riva Quiroga su gran esfuerzo en marzo, pues ha participado en casi todos estos eventos. -Early in January, Brandon Walsh presented our project at the Modern Languages Association Conference 2021. His talk, ‘[The Programming Historian and Editorial Process in Digital Publishing](http://walshbr.com/blog/the-programming-historian-and-editorial-process-in-digital-publishing/)’ focused on the technical side or how we use GitHub for the editorial process to submit and edit new lessons or translations for publication. +Early in January, Brandon Walsh presented our project at the Modern Languages Association Conference 2021. His talk, ‘[The Programming Historian and Editorial Process in Digital Publishing](https://walshbr.com/blog/the-programming-historian-and-editorial-process-in-digital-publishing/)’ focused on the technical side or how we use GitHub for the editorial process to submit and edit new lessons or translations for publication. *Programming Historian en español* organizó dos talleres para el 6 de marzo, con motivo del [Día de los Datos Abiertos](https://opendataday.org/th/events/2021/reports/open-data-day-datos-abiertos-y-humanidades-digitales/) y con la financiación de la [Open Knowledge Foundation](https://okfn.org/). Riva Quiroga presentó el proyecto y dio paso a Silvia Gutiérrez, quien dio un taller sobre Voyant-Tools con textos en español. Tras un descanso, Jairo Melo nos enseñó a descargar, procesar y visualizar datos abiertos a través de un cuaderno Jupyter. Ambos talleres quedaron grabados y puedes verlos en [opendataday-2021](https://github.com/programminghistorian/opendataday-2021). También puedes leer un resumen del evento en [este hilo de Twitter](https://twitter.com/ProgHist/status/1368246763962966021). @@ -24,7 +24,7 @@ A few days later, on March 10th, Riva presented on the "Multilingual Digital Hum Our global team members Sofia Papastamkou, Jessica Parr and Riva Quiroga presented at NewsEye’s International Conference. Their paper was titled "Challenges for Digital Literacy in the Humanities: The Open, Community-Based and Multilinguistic Approach of *The Programming Historian*" and centered on the community we have formed to make digital methods available to more people in the world. -El 25 de marzo, nuestras editoras Jennifer Isasi y Riva Quiroga presentaron el proyecto a la Red INTELE (Infraestructura de Tecnologías del Lenguaje) de España. Con "*Programming Historian*: Un proyecto colaborativo para poner la programación al alcance de los humanistas" dieron buena razón de las directrices éticas que guían el proceso editorial en la revista y mostraron un ejemplo de cómo preparar una lección para su publicación. La charla quedó grabada en video y podéis verla en la [colección de webinarios de INTELE](http://ixa2.si.ehu.eus/intele/?q=webinars). +El 25 de marzo, nuestras editoras Jennifer Isasi y Riva Quiroga presentaron el proyecto a la Red INTELE (Infraestructura de Tecnologías del Lenguaje) de España. Con "*Programming Historian*: Un proyecto colaborativo para poner la programación al alcance de los humanistas" dieron buena razón de las directrices éticas que guían el proceso editorial en la revista y mostraron un ejemplo de cómo preparar una lección para su publicación. La charla quedó grabada en video y podéis verla en la [colección de webinarios de INTELE](https://ixa2.si.ehu.eus/intele/?q=webinars). On April 12th, *Programming Historian* team convened in a panel at [Global Digital Humanities Symposium 2021](https://msuglobaldh.org/) to present its achievements and challenges in our inclusive, four-language journal initiative. During this session the presenters focused on the strategies each journal has adopted to bring digital scholarship methods to a global audience that reached 1,5 million in 2020. In turn, it served as an overview of the changes the journal has undergone in its twelve-year history in order to become the flagship journal in DH methods. The audience responded positively to the presentation and asked about the addition of new languages. @@ -90,7 +90,7 @@ We will address the issues that we can fix ourselves and seek help for those for ## Acknowledgments -A year ago, and before leaving the team, Matthew Lincoln wrote a blogpost about one of the ways in which one can [produce a complex multilingual static site using Jekyll and GitHub pages](https://matthewlincoln.net/2020/03/01/multilingual-jekyll.html). The post was nominated for the Best DH Blog Post or Series of Posts category in the Digital Humanities Awards 2020. Unfortunately, it didn't win, but the nomination showed how important this resource is for the DH community. And congratulations to [the winners](http://dhawards.org/dhawards2020/results/)! +A year ago, and before leaving the team, Matthew Lincoln wrote a blogpost about one of the ways in which one can [produce a complex multilingual static site using Jekyll and GitHub pages](https://matthewlincoln.net/2020/03/01/multilingual-jekyll.html). The post was nominated for the Best DH Blog Post or Series of Posts category in the Digital Humanities Awards 2020. Unfortunately, it didn't win, but the nomination showed how important this resource is for the DH community. And congratulations to [the winners](https://dhawards.org/dhawards2020/results/)! Jennifer Isasi, *PH* comms manager and editor, was nominated and elected to be part of the 2021-2025 Executive Council for [The Association for Computers and the Humanities](https://ach.org). ¡Felicidades, Jennifer! And we would also like to congratulate Kim Gallon and Lorena Gauthereau, who will also be part of the Executive Council. diff --git a/_posts/2021-09-02-llano-gribomont-vaughan.md b/_posts/2021-09-02-llano-gribomont-vaughan.md index e1412e1b06..9ab176506b 100644 --- a/_posts/2021-09-02-llano-gribomont-vaughan.md +++ b/_posts/2021-09-02-llano-gribomont-vaughan.md @@ -16,6 +16,6 @@ Isabelle Gribomont obtuvo su doctorado en estudios hispánicos en la universidad [Nicolás Llano Linares](https://twitter.com/enetreseles) es doctor en Ciencias de la Comunicación de la Universidad de São Paulo. Actualmente hace parte del programa MBA en Periodismo de Datos del IDP (Brasilia). Es profesor de escritura en la Escola da Cidade, y editor de la [Revista Rosa](https://revistarosa.com). Sus intereses de investigación transitan entre las mediaciones sociotécnicas en el siglo XX, las ciencias sociales computacionales y las narrativas multimedia. Quiere continuar traduciendo tutoriales, ensayos y géneros híbridos al español y portugués, con el objetivo de fomentar encuentros entre los dos universos lingüísticos y sus culturas investigativas -Nicolás Vaughan es doctor en filosofía medieval de la Universidad de Oxford (Reino Unido). Es profesor asociado del Departamento de Humanidades y Literatura, de la Universidad de los Andes (Bogotá, Colombia), donde también enseña en la [Maestría en Humanidades Digitales](https://posgradosfacartes.uniandes.edu.co/programas/humanidades-digitales/). Sus intereses de investigación son las ediciones digitales en línea, especialmente de autores de la Edad Media. Es editor adjunto de la edición crítica de la [Ordinatio](https://scta.lombardpress.org/text?resourceid=http://scta.info/resource/wodehamordinatio) del filósofo medieval Adam de Wodeham, un proyecto de código y acceso abierto del [Scholastic Commentaries and Texts Archive (SCTA).](https://scta.info/) +Nicolás Vaughan es doctor en filosofía medieval de la Universidad de Oxford (Reino Unido). Es profesor asociado del Departamento de Humanidades y Literatura, de la Universidad de los Andes (Bogotá, Colombia), donde también enseña en la [Maestría en Humanidades Digitales](https://posgradosfacartes.uniandes.edu.co/programas/humanidades-digitales/). Sus intereses de investigación son las ediciones digitales en línea, especialmente de autores de la Edad Media. Es editor adjunto de la edición crítica de la [Ordinatio](https://scta.lombardpress.org/text?resourceid=https://scta.info/resource/wodehamordinatio) del filósofo medieval Adam de Wodeham, un proyecto de código y acceso abierto del [Scholastic Commentaries and Texts Archive (SCTA).](https://scta.info/) De parte de todo el equipo queremos agradecer su interés en este proyecto, ¡bienvenidos! diff --git a/_posts/2021-09-08-cfp-jisc-ph.md b/_posts/2021-09-08-cfp-jisc-ph.md index c05c259077..14b3833e08 100644 --- a/_posts/2021-09-08-cfp-jisc-ph.md +++ b/_posts/2021-09-08-cfp-jisc-ph.md @@ -33,7 +33,7 @@ As a result of this partnership we are delighted to invite authors to submit pro - Show how a computational methodology or technique can be applied to a digital collection in order to generate initial findings it as a precursor to in-depth research; - Demystify ‘big data’ analysis techniques for a humanities audience; - Describe methods that advance humanities research questions through the analysis of large-scale digital collections; -- Demonstrate ‘[Minimal Computing](http://go-dh.github.io/mincomp/about/)’ approaches to the analysis of large-scale digital collections and thereby meet the needs of scholars working ‘under some set of significant constraints of hardware, software, education, network capacity, power, or other factors’. +- Demonstrate ‘[Minimal Computing](https://go-dh.github.io/mincomp/about/)’ approaches to the analysis of large-scale digital collections and thereby meet the needs of scholars working ‘under some set of significant constraints of hardware, software, education, network capacity, power, or other factors’. Examples of the kind of large-scale collections that would be in scope are digitised texts, email archives, social media data, web archives, bibliographic datasets, image collections, and catalogue data. This is not exhaustive, however, and no type of large-scale research collection is *a priori* excluded. @@ -73,11 +73,11 @@ The project would particularly welcome lessons which engage with web archives or - the UK Medical Heritage library, on the Jisc Historical Texts platform (https://ukmhl.historicaltexts.jisc.ac.uk/home) - British Library 19th Century Books, also on the Jisc Historical Texts platform, ([https://historicaltexts.jisc.ac.uk](https://historicaltexts.jisc.ac.uk/)) - The National Archives’ Discovery platform (https://discovery.nationalarchives.gov.uk/). A sandbox API is available at[https://discovery.nationalarchives.gov.uk/API/sandbox/index](https://discovery.nationalarchives.gov.uk/API/sandbox/index). -- The UK Government Web Archive (http://www.nationalarchives.gov.uk/webarchive/) +- The UK Government Web Archive (https://www.nationalarchives.gov.uk/webarchive/) Other useful sources of data include: -- The UK Web Archive (http://data.webarchive.org.uk/opendata/) +- The UK Web Archive (https://data.webarchive.org.uk/opendata/) - The GeoCities special collection at the Internet Archive (https://archive.org/web/geocities.php) - The Enron email corpus (https://www.cs.cmu.edu/~enron/) - Library of Congress Web Archive datasets (https://labs.loc.gov/work/experiments/webarchive-datasets/) diff --git a/_posts/2021-09-22-call-for-editors-en.md b/_posts/2021-09-22-call-for-editors-en.md index 75fbb3ed76..ac514e5765 100644 --- a/_posts/2021-09-22-call-for-editors-en.md +++ b/_posts/2021-09-22-call-for-editors-en.md @@ -13,7 +13,7 @@ Launched in 2012, *The Programming Historian* offers more than 80 novice-friendl - Lincoln Mullen, '[Review of the Programming Historian](https://academic.oup.com/jah/article-abstract/103/1/299/1751315)', *The Journal of American History*, vol. 103, no. 1 (2016), pp. 299-301. -- Cameron Blevins, '[Review of the Programming Historian](http://jitp.commons.gc.cuny.edu/review-of-the-programming-historian/)', *The Journal of Interactive Technology & Pedagogy*, vol. 8 (2015). +- Cameron Blevins, '[Review of the Programming Historian](https://jitp.commons.gc.cuny.edu/review-of-the-programming-historian/)', *The Journal of Interactive Technology & Pedagogy*, vol. 8 (2015). Interested candidates should submit a short expression of interest outlining your interests, experience, and vision for the publication, to Sarah Melton (sarah.melton@bc.edu) by 15 October 2021. Please direct any questions to Sarah in the first instance. Please note that this is a VOLUNTEER ACADEMIC SERVICE POSITION and there is no salary or stipend associated with this role. diff --git a/_posts/2021-09-24-september-newsletter.md b/_posts/2021-09-24-september-newsletter.md index bb3f09b5bf..e4edebf4cb 100644 --- a/_posts/2021-09-24-september-newsletter.md +++ b/_posts/2021-09-24-september-newsletter.md @@ -34,7 +34,7 @@ Colleagues in our Project Team have published new research exploring the challen Daniel Alves wrote an article titled [‘Ensinar Humanidades Digitais sem as Humanidades Digitais: um olhar a partir das licenciaturas em História’](https://novaresearch.unl.pt/files/32228034/Ensinar_Humanidades_Digitais.pdf) for Revista EducaOnline that argues the importance of teaching digital skills across humanities departments at universities. Adam Crymble and Maria José Afanador-Llach contributed a chapter [‘The Globally Unequal Promise of Digital Tools for History: UK and Colombia Case Study’](https://link.springer.com/chapter/10.1007%2F978-981-16-0247-4_7) to the book *Teaching History for the Contemporary World*, edited by Adele Nye. In their text, they consider some of the social, technical and infrastructural barriers that open access, multilingual projects like *Programming Historian* are designed to break down. -Later this September, Jessica Parr and Nabeel Siddiqui will contribute to [Sharing Digitally](https://www.lvivcenter.org/en/conferences/sharing-digitally-2/), an online seminar organised by the Center for Urban History at Lviv, Ukraine, where they'll lead a workshop and discussion about '*Programming Historian* and the Challenges of Globally Distributed Learning'. Through October and November, Jennifer Isasi and Brandon Walsh will co-host two special [Programming Historian Book Club](https://ach.org/blog/2021/09/13/fall-2021-programming-historian-book-club/) events lead by [The Association for Computers and the Humanities](http://ach.org/). The Book Clubs are an opportunity to work through our lessons on [R](https://programminghistorian.org/en/lessons/basic-text-processing-in-r) and [Python](https://programminghistorian.org/en/lessons/introduction-and-installation) together. Newcomers are welcome – the sessions will provide a supportive space to learn a new skill and ask questions from experienced participants. +Later this September, Jessica Parr and Nabeel Siddiqui will contribute to [Sharing Digitally](https://www.lvivcenter.org/en/conferences/sharing-digitally-2/), an online seminar organised by the Center for Urban History at Lviv, Ukraine, where they'll lead a workshop and discussion about '*Programming Historian* and the Challenges of Globally Distributed Learning'. Through October and November, Jennifer Isasi and Brandon Walsh will co-host two special [Programming Historian Book Club](https://ach.org/blog/2021/09/13/fall-2021-programming-historian-book-club/) events lead by [The Association for Computers and the Humanities](https://ach.org/). The Book Clubs are an opportunity to work through our lessons on [R](https://programminghistorian.org/en/lessons/basic-text-processing-in-r) and [Python](https://programminghistorian.org/en/lessons/introduction-and-installation) together. Newcomers are welcome – the sessions will provide a supportive space to learn a new skill and ask questions from experienced participants. Keep an eye on the [research](https://programminghistorian.org/en/research)/[investigación](https://programminghistorian.org/es/investigacion)/[recherche](https://programminghistorian.org/fr/recherche)/[pesquisa](https://programminghistorian.org/pt/pesquisa) pages of our website for further updates on our activities. diff --git a/_posts/2021-12-22-december-newsletter.md b/_posts/2021-12-22-december-newsletter.md index bea5a9939f..e0c23dc45a 100644 --- a/_posts/2021-12-22-december-newsletter.md +++ b/_posts/2021-12-22-december-newsletter.md @@ -33,7 +33,7 @@ We have had a busy few months in terms of outreach activities too. In November, Meanwhile, with support from the Institute of Historical Research, Adam Crymble and Anisa Hawes offered a free [workshop](https://www.history.ac.uk/our-century/centenary-events/training-teacher-giving-your-first-digital-history-workshop) aimed at history and humanities educators. They shared three different methods for integrating _Programming Historian_'s practical digital tutorials into university teaching, based on approaches developed at University College London, University of Edinburgh, and Universidad de los Andes, including remote and in-person delivery options. -Jennifer Isasi and Antonio Rojas Castro contributed to a roundtable discussion, titled "Torre de Babel", on the subject of multi-lingual digital humanities at [the Association of History Literature Science and Technology (AHLiST)'s International Interdisciplinary Conference](http://ahlist.org/wp-content/uploads/2021/11/AHLIST-2021-PROGRAM_Virtual_FINAL.pdf) in Madrid, Spain, and have co-written an article titled "Sin equivalencia: Una reflexión sobre la traducción al español de recursos educativos abiertos" soon to be published in the journal _Hispania_. +Jennifer Isasi and Antonio Rojas Castro contributed to a roundtable discussion, titled "Torre de Babel", on the subject of multi-lingual digital humanities at [the Association of History Literature Science and Technology (AHLiST)'s International Interdisciplinary Conference](https://ahlist.org/wp-content/uploads/2021/11/AHLIST-2021-PROGRAM_Virtual_FINAL.pdf) in Madrid, Spain, and have co-written an article titled "Sin equivalencia: Una reflexión sobre la traducción al español de recursos educativos abiertos" soon to be published in the journal _Hispania_. ## New Supporters and Partnerships diff --git a/_posts/2023-12-13-bulletin-issue-01.md b/_posts/2023-12-13-bulletin-issue-01.md index 657da512c1..041d6de2c2 100644 --- a/_posts/2023-12-13-bulletin-issue-01.md +++ b/_posts/2023-12-13-bulletin-issue-01.md @@ -16,7 +16,7 @@ categories: posts ## 2023 Milestones - We celebrated the publication of our 200th lesson overall since our launch. This was a moment to recognise our significant growth as a project over the past decade, and to honour a great collective achievement on behalf of all our contributors. -- We won the Best DH Training Materials category of the [Digital Humanities Awards](http://dhawards.org/dhawards2022/results/). Community-nominated and community-voted awards make us particularly proud, because they represent the value of _Programming Historian_ among our peers. +- We won the Best DH Training Materials category of the [Digital Humanities Awards](https://dhawards.org/dhawards2022/results/). Community-nominated and community-voted awards make us particularly proud, because they represent the value of _Programming Historian_ among our peers. - We are delighted to have recruited a brilliant new Publishing Assistant Charlotte Chevrie, who will be working alongside Anisa Hawes, now our Publishing Manager, to provide services to our four journal teams. ## New Lessons diff --git a/_posts/2024-06-28-bulletin-issue-03.md b/_posts/2024-06-28-bulletin-issue-03.md index ff339240b0..7c24edc8bb 100644 --- a/_posts/2024-06-28-bulletin-issue-03.md +++ b/_posts/2024-06-28-bulletin-issue-03.md @@ -67,7 +67,7 @@ JOHN R. LADD, JESSICA OTIS, CHRISTOPHER N. WARREN & SCOTT WEINGART, traduite par ## New Supporters Huge thanks to our generous new [Patreon](https://www.patreon.com/theprogramminghistorian) subscribers who have individually invested in our success: **James Baker** (Patron), **Cory Taylor** (Patron), and **Samuel Salgado Tello** (Educator). -Join our Patreon community: +Join our Patreon community: We're also very grateful to all those who have renewed their membership to our Institutional Partner Programme this quarter: the **School of Advanced Study**, **C2DH**, **University of Florida**, **Bristol University Library**, **Western University Library**, **Universidad de los Andes**, **Princeton University**, **Cambridge Digital Humanities**, and **University of Sussex Library** (Gold Tier). diff --git a/_posts/2024-09-27-bulletin-issue-04.md b/_posts/2024-09-27-bulletin-issue-04.md index e4a7deb523..359b208d73 100644 --- a/_posts/2024-09-27-bulletin-issue-04.md +++ b/_posts/2024-09-27-bulletin-issue-04.md @@ -51,7 +51,7 @@ Our incoming Chair of the Board of Trustees, **James Baker**, would like to shar “We shared in our June Bulletin that **Sofia Papastamkou** had recently stepped down as Chair of the Board of Trustees. **Sofia took on the role of Chair at an important moment in our journey** as a publisher: in Spring 2023, we had reorganised our structures and were looking to expand our team by hiring a Digital Humanities Publishing Assistant. Sofia’s first major task as Chair was to guide us through a successful recruitment process, which she did with purpose. -**Under Sofia's leadership, _Programming Historian_ achieved some significant successes**: we celebrated the publication of our 200th and, later, 250th lessons since launch in 2008; we won [best DH Training Materials category of the Digital Humanities Awards](http://dhawards.org/dhawards2022/results/); we grew the support base of our Institutional Partner Programme, including new Gold Tier supporters; our lessons became discoverable through [EBSCO](https://www.ebsco.com/products/ebscohost-research-platform) and [DARIAH Campus](https://campus.dariah.eu/source/programming-historian/page/1); and we expanded our outreach, communication, and community networking activities. +**Under Sofia's leadership, _Programming Historian_ achieved some significant successes**: we celebrated the publication of our 200th and, later, 250th lessons since launch in 2008; we won [best DH Training Materials category of the Digital Humanities Awards](https://dhawards.org/dhawards2022/results/); we grew the support base of our Institutional Partner Programme, including new Gold Tier supporters; our lessons became discoverable through [EBSCO](https://www.ebsco.com/products/ebscohost-research-platform) and [DARIAH Campus](https://campus.dariah.eu/source/programming-historian/page/1); and we expanded our outreach, communication, and community networking activities. From her time as Manager Editor of _Programming Historian en français_, to her recent time as Chair, **Sofia's passion and enthusiasm for our work has been undiminished**. On behalf of the Board of Trustees, I wish to extend my heartfelt thanks to Sofia and **wish her every success** in her future work.” diff --git a/_posts/2025-03-28-bulletin-issue-06.md b/_posts/2025-03-28-bulletin-issue-06.md index 8f44d2c029..284c4b4f41 100644 --- a/_posts/2025-03-28-bulletin-issue-06.md +++ b/_posts/2025-03-28-bulletin-issue-06.md @@ -55,7 +55,7 @@ Institutional Partnerships empower us to **sustain our model of Diamond Open Acc If you know that our work aligns with your organisation’s priorities and is valuable to your community, we’d love your help to connect with the right person. -[Email our Publishing Manager](mailto:admin@programminghistorian.org) or visit [http://tinyurl.com/support-PH](http://tinyurl.com/support-PH) to learn more about joining our Institutional Partnership Programme. +[Email our Publishing Manager](mailto:admin@programminghistorian.org) or visit [https://tinyurl.com/support-PH](https://tinyurl.com/support-PH) to learn more about joining our Institutional Partnership Programme. Grateful thanks to our valued partners who have renewed their membership this quarter: **Exeter University**, **Universiteit Gent**, the **College of the Liberal Arts at Penn State University**, **KU Leuven Bibliotheken**, **University of York**, **C²DH**,the **National Archives**, the **School of Advanced Study (UoL)** and **Universität Bern Bibliothek**. diff --git a/assets/corpus-analysis-with-spacy/corpus-analysis-with-spacy-16.html b/assets/corpus-analysis-with-spacy/corpus-analysis-with-spacy-16.html index e24bdd7bc5..dbc4b7c394 100644 --- a/assets/corpus-analysis-with-spacy/corpus-analysis-with-spacy-16.html +++ b/assets/corpus-analysis-with-spacy/corpus-analysis-with-spacy-16.html @@ -1,4 +1,4 @@ - + There PRON diff --git a/assets/corpus-analysis-with-spacy/corpus-analysis-with-spacy-17.html b/assets/corpus-analysis-with-spacy/corpus-analysis-with-spacy-17.html index ef96bba98e..6872e41369 100644 --- a/assets/corpus-analysis-with-spacy/corpus-analysis-with-spacy-17.html +++ b/assets/corpus-analysis-with-spacy/corpus-analysis-with-spacy-17.html @@ -1,4 +1,4 @@ - + There PRON diff --git a/assets/fetch-and-parse-data-with-openrefine/pg1105.html b/assets/fetch-and-parse-data-with-openrefine/pg1105.html index 794f32c9af..ed80eadbb6 100644 --- a/assets/fetch-and-parse-data-with-openrefine/pg1105.html +++ b/assets/fetch-and-parse-data-with-openrefine/pg1105.html @@ -1,9 +1,9 @@ - - - + + + - + diff --git a/assets/from-html-to-list-of-words-1/obo-t17800628-33.html b/assets/from-html-to-list-of-words-1/obo-t17800628-33.html index b24ffcff1d..d4aed0a70a 100644 --- a/assets/from-html-to-list-of-words-1/obo-t17800628-33.html +++ b/assets/from-html-to-list-of-words-1/obo-t17800628-33.html @@ -1,19 +1,19 @@ - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + Browse - Central Criminal Court @@ -64,7 +64,7 @@

London's Central Criminal Court, 1674 to 1913

>
  • Historical Background
  • The Project
  • Contact

  • @@ -74,15 +74,15 @@

    London's Central Criminal Court, 1674 to 1913

    - - -
    - -
    BENJAMIN BOWSEY, Breaking Peace > riot, 28th June 1780.

    Reference Number: t17800628-33
    Offence: Breaking Peace > riot
    Verdict: Guilty
    Punishment: Death
    Navigation: < Previous text (trial account) | Next text (trial account) >

    324. BENJAMIN BOWSEY (a blackmoor ) was indicted for that he together with five hundred other persons and more, did, unlawfully, riotously, and tumultuously assemble on the 6th of June to the disturbance of the public peace and did begin to demolish and pull down the dwelling house of Richard Akerman , against the form of the statute, &c.

    ROSE JENNINGS , Esq. sworn.

    Had you any occasion to be in this part of the town, on the 6th of June in the evening? - I dined with my brother who lives opposite Mr. Akerman's house. They attacked Mr. Akerman's house precisely at seven o'clock; they were preceded by a man better dressed than the rest, who went up to Mr. Akerman's door; he rapped three times, and I believe pulled the bell as often. Mr. Akerman had barrocadoed his house. When the man found that no one came, he went down the steps, made his obeisance to the mob, and pointed to the door, and then retired.

    Have you any recollection how that man who you say had a better appearance than the rest was dressed? - I think he had on a dark brown coat and a round ha, but I cannot be particular as to that; the mob immediately following in that formidable manner made such an impression upon me, that I did not take notice. The mob approached about thirty in number, three a-breast, some with paving mattocks, others with iron crows and chissels; and then followed an innumerable company with bludgeons; they seemed to be the spokes of coach-wheels; they divided, some went to Mr. Akerman's door with the mattocks, some to the felons door, and some to the debtor's door. I was struck with the formidable appearance and order in which they divided and proceeded to destroy the place, the men threw their sticks up at the windows, which they broke and demolished, yet notwith standing these sticks were coming down in showers, two men with a bar, such as brewers servants carry on their shoulders,

    attacked the parlour window to force it open. The window-shutters were exceedingly tough; they at last forced them partly open, but not quite. I then saw a man in a sailor's jacket helped up, he forced himself neck and heels into the window. They found the house-door still difficult to get open; before it was got open the other parlour window was opened and the mob were throwing the goods out at the window; at last the house-door gave way; about the same time some of the goods and furniture having been thrown out into the street, a fire was kindled.

    They proceeded immediately to throw the goods out of the house? - Immediately. An equal degree of activity seemed to exhibit itself on the outside as within, one party to burn, the other to throw out the goods of Mr. Akerman. When the conflagration took place I applied my mind to the mob.

    Was Mr. Akerman's house on fire then? - No. I was situated in the one-pair-of-stairs room, and could see what happened. I endeavoured to form a distinction between the active and inactive people. I thought I did so; the inactive people seemed to form a circle. I observed a person better dressed than the rest among those within the circle, who did not meddle, but seemed to be exciting and encouraging others. I saw several genteel looking men, and amongst them a black; there was one genteel man in particular, whose conduct I confess excited my indignation, and I took particular notice of him. I went down amongst the mob; I spoke to him; I made myself master of his voice; I believe if I was out of his sight I could swear to his voice; I have never seen that man since. When I first saw the black I turned to a lady and said, this is a motley crew, and of every colour. Mr. Akerman's house had then catched fire; the house in which I was was in extreme danger; my self with some others went down to desire the mob to prevent the houses of innocent people catching fire; and the mob were as active in saving those as in destroying Mr. Akerman's. I had no opportunity of making any remarks till I went to my station again, then I believe it was near nine o'clock; I heard a cry and a gingling of keys in the hands of some person; there were three or four genteel persons, but who had the keys I cannot say. Amongst them was the prisoner at the bar; he was without his hat, and his hands were down. I thought he might have his hat in his hand. The house I think was at that time destroyed; the roof was fallen in. Then those persons of the genteeler description moved off towards Smithfield, and amongst them was the prisoner.

    You had observed the black in the mob before you went down? - I had.

    Are you able to say who that black was? - No. Seeing this man afterwards I took it for granted it was him; I was certain to him the second time; he had his hat off in the middle of the mob.

    Jury. You said his hands were down, did you see any thing in his hands? - No, I did not; I took it for granted he had his hat in his hand, not having it on his head.

    Cross Examination.

    There were I believe other blacks in the mob? - I never saw but one; I saw a black at first, but did not remark him so as to swear to him.

    You could not swear to him I suppose from the difficulty every man has in his mind to swear to any black? - Yes.

    There is more difficulty to swear to a black than to a white man? - No. The second time I made my remark too judiciously to err.

    When was it you first saw the black? - After the goods were first set on fire, which was about a quarter after seven o'clock.

    What dress had the black on? - Something of a dark colour, but my remark was on his face.

    What w as remarkable in that man's face more than another black? - The make of his hair was one thing; the curls were out if he had had any; and his hair smooth on his head. His face was so exposed to my view the second time, that I could not be better situated to make any remark on his face.

    His hair was the thing by which you knew him? - His hair and his face.

    What was particular in his face? - I cannot distinguish it any other than from the weight of the impression it made on me.

    Counsel for the Crown. Have you any doubt about him? - No.

    ANN WOOD sworn.

    I live at Mr. Jennings's, opposite Mr. Akerman's house.

    Was you at home on the Tuesday evening when Mr. Akerman's house was attacked? - I was.

    Did you in the course of that evening see the prisoner? - I did. It was a little after seven o'clock; I saw him in Mr. Akerman's two-pair-of-stairs room, he stood against the window with something in his hand and looked at me for some time before I observed particularly what he was doing. I looked at him then, and he took up something off the ground and held it up to me; when he held it up, I went down from the window into the dining-room; I came up again, and he was there still. He seemed to be looking in a drawer upon the floor, and seemed to be doing some thing up into a bundle.

    You was in the two-pair-of-stairs room opposite him? - No, I was in the three-pair-of-stairs room.

    Did you afterwards see him do any thing else? - He got up and looked at me and nodded his head at me; then I went down stairs.

    You saw him again in the course of the evening? - Yes, I saw him an hour or two afterwards in the mob.

    From the observation you made of his person are you sure that is the man? - That is the man.

    Have you any doubt about it? - No, none at all.

    Cross Examination.

    What makes you so positive that this is the man? - I know his face perfectly again by his standing and looking at me so long.

    You recollect him only by his face? - His face and his hair.

    Did you see any other black there? - Yes, I did; not in the house but in the mob.

    Could you swear to him? - I do not know that I could. I took more notice of this man than I did of any other.

    Court. What were the other people doing when the prisoner was in the two-pair-of-stairs room? - Some of the mob were pulling the house down, and some were running in with the fire to set the parlour on fire.

    Jury. How many times did you see this prisoner? - Two or three times.

    Had he his hat on when you saw him? - Yes.

    ANN LESSAR sworn.

    Do you know the prisoner? - Yes.

    Where do you live? - I lodge in the same lodging, in which the prisoner lodged; I took the lodging of him and the landlady.

    Do you remember his coming to you and bringing you any stockings? - He gave me three pair of stockings to mark.

    What mark did he bid you put upon them? - Any kind of mark to distinguish them at the washerwoman's. I put BB, the initials of his name upon them.

    Had he left a trunk in the room? - Yes, the trunk was found there by the constable when he came; it was locked, he had the key of it.

    Who had the key of the room? - I had; nobody could get at the box without my knowledge.

    PERCIVAL PHILLIPS sworn.

    I am a constable. I searched the lodging of the prisoner last Tuesday-week.

    Did you find a trunk there? - I did.

    Did you find any thing in that trunk? - Yes; these stockings, this pocket book, and a handkerchief. (producing them.)

    Any thing else? - This key (producing it) was upon the shelf in the lodging.

    Mr. RICHARD AKERMAN sworn.

    This pocket-book, I believe, has been in my possession thirty years; it was, I believe, in one of the drawers belonging to my wife; here are several of my banker's cheques which had my name to them.

    Look at the stockings? - Here is a very remarkable pair which I had made for me, and the maker wove the initials of my

    name in them in open work; the prisoner has put the initials of his name (B B) over it; they were in the drawers in a one-pair of stairs room. Here are several others that were marked by my sister, they are mine; I believe the handkerchiefs to be mine, but there are no particular marks on them; there are a pair of stockings that were taken off the prisoner's legs, which has the name cut out.

    To Phillips. Did you take them off the prisoner's legs? - I did.

    To Mr. Akerman. Is the place that is cut out the place where the name was wove? - Yes. This is a remarkable key; it is a key of the Park, it has a crown and my name at length upon it.

    To Lessar. Do you know any thing of the key that was found in the lodging? - No, it was on the shelf when he had the lodging?

    Was it there when he left the lodging? - I believe it was there; I saw it once or twice; I never knew the meaning of the key.

    Prisoner. My Lord, please to ask that woman if she did not wash the handkerchief the things were tied up in? - I washed a blue-and-white silk handkerchief, I cannot swear it was this, it was all over mud. I washed it on the Thursday, the first week that I was in the house.

    Was that after the burning of Newgate? - Yes. I was not in town till it was burnt.

    Prisoner. I leave my defence to my counsel and my witnesses.

    For the prisoner.

    Dr. SANDIMAN sworn.

    Do you know the prisoner? - Yes, I knew him five years ago, he lived with a relation of mine; he bore an exceeding good character; he used to come backwards and forwards to my house.

    ROBERT GATES sworn.

    I am footman to Mr. Goodhousen in Golden Square.

    Do you know the prisoner? - I do; I have known him perfectly well from the second day after he came to England, which is six years ago; he lived with a person I knew in America, that person gave him an excellent character, and he has always borne a good character since I knew him.

    GRACE ROBERTS sworn.

    The prisoner lay at our house the night that the prison was burnt.

    What time did you see him that night? - I am not positive to the hour he came in, it was from nine to eleven o'clock.

    What time did he come home? - I am not positive to the hour, it was a little after nine.

    Are you positive of that? - Yes.

    Where do you live? - At No. 3, in Berner's-street.

    He came home a little after nine? - Yes, I am certain of it; he continued there all that night till six in the morning, and was never out of the house.

    What day was that? - The 6th of June.

    What day of the week? - I am not certain.

    Are you sure it was the night the prison was burnt? - I am.

    What prison? - I am not certain what prison, I heard it mentioned in the family that the prison was burnt down.

    Cross Examination.

    Who bid you to remember the 6th of June? - I remember it by the people being taken up.

    When did you talk of its being the 6th of June? - I know he lay at our house on the 6th of June.

    Did you take notice of any other night when he lay there? - No.

    Did not he lie there on the 7th and 8th of June? - No, only that night.

    You are an acquaintance of his? - Yes.

    Is he a married man? - I cannot say.

    Did he bring any body with him? - No.

    Did he lie by himself? - Yes, I gave him a candle to light him to bed.

    Did you know he was to lie there that night? - Yes, he told my fellow servant so.

    You are a servant, are you? - Yes.

    Did your master know that this man was to lie in the house? - I cannot tell.

    Do you let such persons lie in the house without your master's knowledge? - He was an old servant, he lay in the servants hall.

    Other servants lie there? - Yes, there was a black lay there.

    JOHN NORTHINGTON (a Black) sworn.

    I am servant to Mr. Wood.

    Did the prisoner lie at your house? - Yes, on the night that Holbourn was on fire.

    When the house of Mr. Langdale was on fire? - Yes, the man that lives in Holbourn.

    Counsel for the Crown. That was on Wednesday night, the 7th?

    To Roberts. Where did the prisoner use to sleep at other times? - In the same bed.

    That was when he was a servant there? - Yes.

    When he was not a servant there where did he sleep? - He never lay at our house when he was not a servant but that night; I cannot be positive to the night nor the day of the week; I say nothing but the truth.

    Prisoner to Ann Wood . What dress had I on that night? - A light brownish coat, a round hat, and a red waistcoat.

    GUILTY ( Death .)

    Tried by the Second London Jury before Mr. Justice NARES.

    -
    -

    View as XML

    - -

    + + +
    + +
    BENJAMIN BOWSEY, Breaking Peace > riot, 28th June 1780.

    Reference Number: t17800628-33
    Offence: Breaking Peace > riot
    Verdict: Guilty
    Punishment: Death
    Navigation: < Previous text (trial account) | Next text (trial account) >

    324. BENJAMIN BOWSEY (a blackmoor ) was indicted for that he together with five hundred other persons and more, did, unlawfully, riotously, and tumultuously assemble on the 6th of June to the disturbance of the public peace and did begin to demolish and pull down the dwelling house of Richard Akerman , against the form of the statute, &c.

    ROSE JENNINGS , Esq. sworn.

    Had you any occasion to be in this part of the town, on the 6th of June in the evening? - I dined with my brother who lives opposite Mr. Akerman's house. They attacked Mr. Akerman's house precisely at seven o'clock; they were preceded by a man better dressed than the rest, who went up to Mr. Akerman's door; he rapped three times, and I believe pulled the bell as often. Mr. Akerman had barrocadoed his house. When the man found that no one came, he went down the steps, made his obeisance to the mob, and pointed to the door, and then retired.

    Have you any recollection how that man who you say had a better appearance than the rest was dressed? - I think he had on a dark brown coat and a round ha, but I cannot be particular as to that; the mob immediately following in that formidable manner made such an impression upon me, that I did not take notice. The mob approached about thirty in number, three a-breast, some with paving mattocks, others with iron crows and chissels; and then followed an innumerable company with bludgeons; they seemed to be the spokes of coach-wheels; they divided, some went to Mr. Akerman's door with the mattocks, some to the felons door, and some to the debtor's door. I was struck with the formidable appearance and order in which they divided and proceeded to destroy the place, the men threw their sticks up at the windows, which they broke and demolished, yet notwith standing these sticks were coming down in showers, two men with a bar, such as brewers servants carry on their shoulders,

    attacked the parlour window to force it open. The window-shutters were exceedingly tough; they at last forced them partly open, but not quite. I then saw a man in a sailor's jacket helped up, he forced himself neck and heels into the window. They found the house-door still difficult to get open; before it was got open the other parlour window was opened and the mob were throwing the goods out at the window; at last the house-door gave way; about the same time some of the goods and furniture having been thrown out into the street, a fire was kindled.

    They proceeded immediately to throw the goods out of the house? - Immediately. An equal degree of activity seemed to exhibit itself on the outside as within, one party to burn, the other to throw out the goods of Mr. Akerman. When the conflagration took place I applied my mind to the mob.

    Was Mr. Akerman's house on fire then? - No. I was situated in the one-pair-of-stairs room, and could see what happened. I endeavoured to form a distinction between the active and inactive people. I thought I did so; the inactive people seemed to form a circle. I observed a person better dressed than the rest among those within the circle, who did not meddle, but seemed to be exciting and encouraging others. I saw several genteel looking men, and amongst them a black; there was one genteel man in particular, whose conduct I confess excited my indignation, and I took particular notice of him. I went down amongst the mob; I spoke to him; I made myself master of his voice; I believe if I was out of his sight I could swear to his voice; I have never seen that man since. When I first saw the black I turned to a lady and said, this is a motley crew, and of every colour. Mr. Akerman's house had then catched fire; the house in which I was was in extreme danger; my self with some others went down to desire the mob to prevent the houses of innocent people catching fire; and the mob were as active in saving those as in destroying Mr. Akerman's. I had no opportunity of making any remarks till I went to my station again, then I believe it was near nine o'clock; I heard a cry and a gingling of keys in the hands of some person; there were three or four genteel persons, but who had the keys I cannot say. Amongst them was the prisoner at the bar; he was without his hat, and his hands were down. I thought he might have his hat in his hand. The house I think was at that time destroyed; the roof was fallen in. Then those persons of the genteeler description moved off towards Smithfield, and amongst them was the prisoner.

    You had observed the black in the mob before you went down? - I had.

    Are you able to say who that black was? - No. Seeing this man afterwards I took it for granted it was him; I was certain to him the second time; he had his hat off in the middle of the mob.

    Jury. You said his hands were down, did you see any thing in his hands? - No, I did not; I took it for granted he had his hat in his hand, not having it on his head.

    Cross Examination.

    There were I believe other blacks in the mob? - I never saw but one; I saw a black at first, but did not remark him so as to swear to him.

    You could not swear to him I suppose from the difficulty every man has in his mind to swear to any black? - Yes.

    There is more difficulty to swear to a black than to a white man? - No. The second time I made my remark too judiciously to err.

    When was it you first saw the black? - After the goods were first set on fire, which was about a quarter after seven o'clock.

    What dress had the black on? - Something of a dark colour, but my remark was on his face.

    What w as remarkable in that man's face more than another black? - The make of his hair was one thing; the curls were out if he had had any; and his hair smooth on his head. His face was so exposed to my view the second time, that I could not be better situated to make any remark on his face.

    His hair was the thing by which you knew him? - His hair and his face.

    What was particular in his face? - I cannot distinguish it any other than from the weight of the impression it made on me.

    Counsel for the Crown. Have you any doubt about him? - No.

    ANN WOOD sworn.

    I live at Mr. Jennings's, opposite Mr. Akerman's house.

    Was you at home on the Tuesday evening when Mr. Akerman's house was attacked? - I was.

    Did you in the course of that evening see the prisoner? - I did. It was a little after seven o'clock; I saw him in Mr. Akerman's two-pair-of-stairs room, he stood against the window with something in his hand and looked at me for some time before I observed particularly what he was doing. I looked at him then, and he took up something off the ground and held it up to me; when he held it up, I went down from the window into the dining-room; I came up again, and he was there still. He seemed to be looking in a drawer upon the floor, and seemed to be doing some thing up into a bundle.

    You was in the two-pair-of-stairs room opposite him? - No, I was in the three-pair-of-stairs room.

    Did you afterwards see him do any thing else? - He got up and looked at me and nodded his head at me; then I went down stairs.

    You saw him again in the course of the evening? - Yes, I saw him an hour or two afterwards in the mob.

    From the observation you made of his person are you sure that is the man? - That is the man.

    Have you any doubt about it? - No, none at all.

    Cross Examination.

    What makes you so positive that this is the man? - I know his face perfectly again by his standing and looking at me so long.

    You recollect him only by his face? - His face and his hair.

    Did you see any other black there? - Yes, I did; not in the house but in the mob.

    Could you swear to him? - I do not know that I could. I took more notice of this man than I did of any other.

    Court. What were the other people doing when the prisoner was in the two-pair-of-stairs room? - Some of the mob were pulling the house down, and some were running in with the fire to set the parlour on fire.

    Jury. How many times did you see this prisoner? - Two or three times.

    Had he his hat on when you saw him? - Yes.

    ANN LESSAR sworn.

    Do you know the prisoner? - Yes.

    Where do you live? - I lodge in the same lodging, in which the prisoner lodged; I took the lodging of him and the landlady.

    Do you remember his coming to you and bringing you any stockings? - He gave me three pair of stockings to mark.

    What mark did he bid you put upon them? - Any kind of mark to distinguish them at the washerwoman's. I put BB, the initials of his name upon them.

    Had he left a trunk in the room? - Yes, the trunk was found there by the constable when he came; it was locked, he had the key of it.

    Who had the key of the room? - I had; nobody could get at the box without my knowledge.

    PERCIVAL PHILLIPS sworn.

    I am a constable. I searched the lodging of the prisoner last Tuesday-week.

    Did you find a trunk there? - I did.

    Did you find any thing in that trunk? - Yes; these stockings, this pocket book, and a handkerchief. (producing them.)

    Any thing else? - This key (producing it) was upon the shelf in the lodging.

    Mr. RICHARD AKERMAN sworn.

    This pocket-book, I believe, has been in my possession thirty years; it was, I believe, in one of the drawers belonging to my wife; here are several of my banker's cheques which had my name to them.

    Look at the stockings? - Here is a very remarkable pair which I had made for me, and the maker wove the initials of my

    name in them in open work; the prisoner has put the initials of his name (B B) over it; they were in the drawers in a one-pair of stairs room. Here are several others that were marked by my sister, they are mine; I believe the handkerchiefs to be mine, but there are no particular marks on them; there are a pair of stockings that were taken off the prisoner's legs, which has the name cut out.

    To Phillips. Did you take them off the prisoner's legs? - I did.

    To Mr. Akerman. Is the place that is cut out the place where the name was wove? - Yes. This is a remarkable key; it is a key of the Park, it has a crown and my name at length upon it.

    To Lessar. Do you know any thing of the key that was found in the lodging? - No, it was on the shelf when he had the lodging?

    Was it there when he left the lodging? - I believe it was there; I saw it once or twice; I never knew the meaning of the key.

    Prisoner. My Lord, please to ask that woman if she did not wash the handkerchief the things were tied up in? - I washed a blue-and-white silk handkerchief, I cannot swear it was this, it was all over mud. I washed it on the Thursday, the first week that I was in the house.

    Was that after the burning of Newgate? - Yes. I was not in town till it was burnt.

    Prisoner. I leave my defence to my counsel and my witnesses.

    For the prisoner.

    Dr. SANDIMAN sworn.

    Do you know the prisoner? - Yes, I knew him five years ago, he lived with a relation of mine; he bore an exceeding good character; he used to come backwards and forwards to my house.

    ROBERT GATES sworn.

    I am footman to Mr. Goodhousen in Golden Square.

    Do you know the prisoner? - I do; I have known him perfectly well from the second day after he came to England, which is six years ago; he lived with a person I knew in America, that person gave him an excellent character, and he has always borne a good character since I knew him.

    GRACE ROBERTS sworn.

    The prisoner lay at our house the night that the prison was burnt.

    What time did you see him that night? - I am not positive to the hour he came in, it was from nine to eleven o'clock.

    What time did he come home? - I am not positive to the hour, it was a little after nine.

    Are you positive of that? - Yes.

    Where do you live? - At No. 3, in Berner's-street.

    He came home a little after nine? - Yes, I am certain of it; he continued there all that night till six in the morning, and was never out of the house.

    What day was that? - The 6th of June.

    What day of the week? - I am not certain.

    Are you sure it was the night the prison was burnt? - I am.

    What prison? - I am not certain what prison, I heard it mentioned in the family that the prison was burnt down.

    Cross Examination.

    Who bid you to remember the 6th of June? - I remember it by the people being taken up.

    When did you talk of its being the 6th of June? - I know he lay at our house on the 6th of June.

    Did you take notice of any other night when he lay there? - No.

    Did not he lie there on the 7th and 8th of June? - No, only that night.

    You are an acquaintance of his? - Yes.

    Is he a married man? - I cannot say.

    Did he bring any body with him? - No.

    Did he lie by himself? - Yes, I gave him a candle to light him to bed.

    Did you know he was to lie there that night? - Yes, he told my fellow servant so.

    You are a servant, are you? - Yes.

    Did your master know that this man was to lie in the house? - I cannot tell.

    Do you let such persons lie in the house without your master's knowledge? - He was an old servant, he lay in the servants hall.

    Other servants lie there? - Yes, there was a black lay there.

    JOHN NORTHINGTON (a Black) sworn.

    I am servant to Mr. Wood.

    Did the prisoner lie at your house? - Yes, on the night that Holbourn was on fire.

    When the house of Mr. Langdale was on fire? - Yes, the man that lives in Holbourn.

    Counsel for the Crown. That was on Wednesday night, the 7th?

    To Roberts. Where did the prisoner use to sleep at other times? - In the same bed.

    That was when he was a servant there? - Yes.

    When he was not a servant there where did he sleep? - He never lay at our house when he was not a servant but that night; I cannot be positive to the night nor the day of the week; I say nothing but the truth.

    Prisoner to Ann Wood . What dress had I on that night? - A light brownish coat, a round hat, and a red waistcoat.

    GUILTY ( Death .)

    Tried by the Second London Jury before Mr. Justice NARES.

    +
    +

    View as XML

    + +

    @@ -195,8 +195,8 @@

    Footer

    @@ -204,5 +204,5 @@

    Footer

    - - + + diff --git a/assets/mapping-with-python-leaflet/exercises/exercise00 - original/mymap.html b/assets/mapping-with-python-leaflet/exercises/exercise00 - original/mymap.html index d4d396c05e..98297c9332 100644 --- a/assets/mapping-with-python-leaflet/exercises/exercise00 - original/mymap.html +++ b/assets/mapping-with-python-leaflet/exercises/exercise00 - original/mymap.html @@ -1,7 +1,7 @@ - - + + diff --git a/assets/mapping-with-python-leaflet/exercises/exercise01/mymap.html b/assets/mapping-with-python-leaflet/exercises/exercise01/mymap.html index 97e0847002..5f6eedcf3b 100644 --- a/assets/mapping-with-python-leaflet/exercises/exercise01/mymap.html +++ b/assets/mapping-with-python-leaflet/exercises/exercise01/mymap.html @@ -1,7 +1,7 @@ - - + + diff --git a/assets/mapping-with-python-leaflet/exercises/exercise02/mymap.html b/assets/mapping-with-python-leaflet/exercises/exercise02/mymap.html index d4d396c05e..98297c9332 100644 --- a/assets/mapping-with-python-leaflet/exercises/exercise02/mymap.html +++ b/assets/mapping-with-python-leaflet/exercises/exercise02/mymap.html @@ -1,7 +1,7 @@ - - + + diff --git a/assets/mapping-with-python-leaflet/exercises/exercise03/mymap.html b/assets/mapping-with-python-leaflet/exercises/exercise03/mymap.html index d4d396c05e..98297c9332 100644 --- a/assets/mapping-with-python-leaflet/exercises/exercise03/mymap.html +++ b/assets/mapping-with-python-leaflet/exercises/exercise03/mymap.html @@ -1,7 +1,7 @@ - - + + diff --git a/assets/mapping-with-python-leaflet/exercises/exercise04/mymap.html b/assets/mapping-with-python-leaflet/exercises/exercise04/mymap.html index 9afce75f7b..e6df973e18 100644 --- a/assets/mapping-with-python-leaflet/exercises/exercise04/mymap.html +++ b/assets/mapping-with-python-leaflet/exercises/exercise04/mymap.html @@ -1,7 +1,7 @@ - - + + diff --git a/assets/mapping-with-python-leaflet/exercises/exercise05/mymap.html b/assets/mapping-with-python-leaflet/exercises/exercise05/mymap.html index 86c3b598cd..dd2ba7580e 100644 --- a/assets/mapping-with-python-leaflet/exercises/exercise05/mymap.html +++ b/assets/mapping-with-python-leaflet/exercises/exercise05/mymap.html @@ -1,7 +1,7 @@ - - + + diff --git a/assets/mapping-with-python-leaflet/map/mymap-onepage.html b/assets/mapping-with-python-leaflet/map/mymap-onepage.html index 335a699363..af5a7fa346 100644 --- a/assets/mapping-with-python-leaflet/map/mymap-onepage.html +++ b/assets/mapping-with-python-leaflet/map/mymap-onepage.html @@ -1,7 +1,7 @@ - - + + diff --git a/assets/mapping-with-python-leaflet/map/mymap.html b/assets/mapping-with-python-leaflet/map/mymap.html index d4d396c05e..98297c9332 100644 --- a/assets/mapping-with-python-leaflet/map/mymap.html +++ b/assets/mapping-with-python-leaflet/map/mymap.html @@ -1,7 +1,7 @@ - - + + diff --git a/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html b/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html index 2b0144b95d..d4aed0a70a 100644 --- a/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html +++ b/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html @@ -12,8 +12,8 @@ - - + + Browse - Central Criminal Court @@ -64,7 +64,7 @@

    London's Central Criminal Court, 1674 to 1913

    >
  • Historical Background
  • The Project
  • Contact

  • @@ -195,8 +195,8 @@

    Footer

    diff --git a/en/about.md b/en/about.md index b79ec22635..9a93d1be36 100755 --- a/en/about.md +++ b/en/about.md @@ -14,7 +14,7 @@ The review process is an integral component of a collaborative, productive, and ## Open Source -The _Programming Historian_ team is committed to open source values. All contributed lessons make use of open source programming languages and software whenever possible. This policy is meant to minimize costs for all parties, and to allow the greatest possible level of participation. We believe everyone should be able to benefit from these tutorials, not just those with large research budgets for expensive proprietary software. Since 2016, a citable version of the _Programming Historian_ project has been deposited on [Zenodo](https://zenodo.org/). The 2022 deposit is available at [doi.org/10.5281/zenodo.7313045](https://doi.org/10.5281/zenodo.7313045). Since 2018, the [UK Web Archive](https://www.webarchive.org.uk/) has made regular crawls of the _Programming Historian_. These are archived and made publicly available [via their website](https://www.webarchive.org.uk/wayback/en/archive/*/http://programminghistorian.org/). +The _Programming Historian_ team is committed to open source values. All contributed lessons make use of open source programming languages and software whenever possible. This policy is meant to minimize costs for all parties, and to allow the greatest possible level of participation. We believe everyone should be able to benefit from these tutorials, not just those with large research budgets for expensive proprietary software. Since 2016, a citable version of the _Programming Historian_ project has been deposited on [Zenodo](https://zenodo.org/). The 2022 deposit is available at [doi.org/10.5281/zenodo.7313045](https://doi.org/10.5281/zenodo.7313045). Since 2018, the [UK Web Archive](https://www.webarchive.org.uk/) has made regular crawls of the _Programming Historian_. These are archived and made publicly available [via their website](https://www.webarchive.org.uk/wayback/en/archive/*/https://programminghistorian.org/). ## Diamond Open Access @@ -25,7 +25,7 @@ We do not charge Article Processing Charges (APCs), nor do we charge library sub The _Programming Historian_ (ISSN {{ site.data.snippets.issn[page.lang] }}) is indexed by the [Directory of Open Access Journals](https://doaj.org/toc/2397-2068). ## Awards -The _Programming Historian_ has won multiple awards which recognise and celebrate our achievements in the spheres of open access publishing and digital scholarship. In 2016 our English-language journal was the winner of the [Digital Humanities Awards](http://dhawards.org/dhawards2016/results/) in the Best Series of Posts category, then in the following year, 2017, _Programming Historian en español_ [won that very same accolade](http://dhawards.org/dhawards2017/results/). In 2018, The _Programming Historian en español_, was the winner of 'Mejor iniciativa formativa desarrollada durante el año 2018', [Humanidades Digitales Hispánicas Association](http://humanidadesdigitaleshispanicas.es/). We won the [Canadian Social Knowledge Institute's Open Scholarship Award](https://etcl.uvic.ca/events-activities/open-scholarship-awards/) 2020 and in 2021 we were awarded [Coko Foundation's Open Publishing Award](https://openpublishingawards.org/results/2021/index.html) in their Open Content category. In 2022, we won the Best DH Training Materials category of the [Digital Humanities Awards](http://dhawards.org/dhawards2022/results/). +The _Programming Historian_ has won multiple awards which recognise and celebrate our achievements in the spheres of open access publishing and digital scholarship. In 2016 our English-language journal was the winner of the [Digital Humanities Awards](https://dhawards.org/dhawards2016/results/) in the Best Series of Posts category, then in the following year, 2017, _Programming Historian en español_ [won that very same accolade](https://dhawards.org/dhawards2017/results/). In 2018, The _Programming Historian en español_, was the winner of 'Mejor iniciativa formativa desarrollada durante el año 2018', [Humanidades Digitales Hispánicas Association](https://humanidadesdigitaleshispanicas.es/). We won the [Canadian Social Knowledge Institute's Open Scholarship Award](https://etcl.uvic.ca/events-activities/open-scholarship-awards/) 2020 and in 2021 we were awarded [Coko Foundation's Open Publishing Award](https://openpublishingawards.org/results/2021/index.html) in their Open Content category. In 2022, we won the Best DH Training Materials category of the [Digital Humanities Awards](https://dhawards.org/dhawards2022/results/). ## Diversity Policy @@ -40,4 +40,4 @@ For a list of our funders and supports, see the ['Support Us']({{site.baseurl}}/ ## History of the Project -The _Programming Historian_ was founded in 2008 by William J. Turkel and Alan MacEachern. Turkel published [a blog post](http://digitalhistoryhacks.blogspot.com/2008/01/programming-historian.html) at the time, setting out their intentions for the project. Initially it focused heavily on the Python programming language and was published open access as a *Network in Canadian History & Environment* (NiCHE) ‘Digital Infrastructure’ project. In 2012, _Programming Historian_ expanded its editorial team and launched as an open access peer reviewed scholarly journal of methodology for digital historians. In 2016 we added a Spanish Language publication to the initial English-language publication and in 2017 started publishing translated lessons under the title *[Programming Historian en español]({{site.baseurl}}/es)*. In 2018 we [hosted our first Spanish-language writing workshop](/posts/bogota-workshop-report) and issued a call for [new lessons in Spanish](/posts/convocatoria-de-tutoriales). In the same year we added a French language publication and launched *[Programming Historian en français]({{site.baseurl}}/fr)* in 2019. A year later, we were joined by a Portuguese-speaking team and launched *[Programming Historian em português]({{site.baseurl}}/pt)* in early 2021. +The _Programming Historian_ was founded in 2008 by William J. Turkel and Alan MacEachern. Turkel published [a blog post](https://digitalhistoryhacks.blogspot.com/2008/01/programming-historian.html) at the time, setting out their intentions for the project. Initially it focused heavily on the Python programming language and was published open access as a *Network in Canadian History & Environment* (NiCHE) ‘Digital Infrastructure’ project. In 2012, _Programming Historian_ expanded its editorial team and launched as an open access peer reviewed scholarly journal of methodology for digital historians. In 2016 we added a Spanish Language publication to the initial English-language publication and in 2017 started publishing translated lessons under the title *[Programming Historian en español]({{site.baseurl}}/es)*. In 2018 we [hosted our first Spanish-language writing workshop](/posts/bogota-workshop-report) and issued a call for [new lessons in Spanish](/posts/convocatoria-de-tutoriales). In the same year we added a French language publication and launched *[Programming Historian en français]({{site.baseurl}}/fr)* in 2019. A year later, we were joined by a Portuguese-speaking team and launched *[Programming Historian em português]({{site.baseurl}}/pt)* in early 2021. diff --git a/en/contribute.md b/en/contribute.md index 1e3cc5431d..95a21b18a4 100755 --- a/en/contribute.md +++ b/en/contribute.md @@ -59,9 +59,9 @@ We are especially grateful for tips about lessons that seem to be broken. As URL This project is our attempt to demonstrate what open access academic publishing can and should be. Please help us spreading the message and providing the widest possible access to this resource by asking your librarian to include the project in your library catalogue. -The _Programming Historian_ has listings in WorldCat ([English](http://www.worldcat.org/title/programming-historian/oclc/951537099), [Spanish](https://www.worldcat.org/title/programming-historian-en-espanol/oclc/1061292935&referer=brief_results), [French](https://uva.worldcat.org/title/programming-historian-en-franais/oclc/1104391842) and [Portuguese](https://search.worldcat.org/title/1332987197)). +The _Programming Historian_ has listings in WorldCat ([English](https://www.worldcat.org/title/programming-historian/oclc/951537099), [Spanish](https://www.worldcat.org/title/programming-historian-en-espanol/oclc/1061292935&referer=brief_results), [French](https://uva.worldcat.org/title/programming-historian-en-franais/oclc/1104391842) and [Portuguese](https://search.worldcat.org/title/1332987197)). -With thanks to the [University of Purdue library](http://purdue-primo-prod.hosted.exlibrisgroup.com/primo_library/libweb/action/dlDisplay.do?vid=PURDUE&search_scope=everything&docId=PURDUE_ALMA51671812890001081&fn=permalink) and Amanda Visconti, and to the University of Virginia. +With thanks to the [University of Purdue library](https://purdue-primo-prod.hosted.exlibrisgroup.com/primo_library/libweb/action/dlDisplay.do?vid=PURDUE&search_scope=everything&docId=PURDUE_ALMA51671812890001081&fn=permalink) and Amanda Visconti, and to the University of Virginia. The English edition of the project is indexed by the [Directory of Open Access Journals](https://doaj.org/toc/2397-2068). diff --git a/en/editor-guidelines.md b/en/editor-guidelines.md index 46051ecfde..b9330f4c01 100755 --- a/en/editor-guidelines.md +++ b/en/editor-guidelines.md @@ -291,7 +291,7 @@ Here are a few places to look for lesson images: - The [British Library](https://www.flickr.com/photos/britishlibrary) - The [Internet Archive Book Images](https://archive.org/details/bookimages) - The [Virtual Manuscript Library of Switzerland](https://www.flickr.com/photos/e-codices) - - The [Library of Congress Maps](http://www.loc.gov/maps/collections) + - The [Library of Congress Maps](https://www.loc.gov/maps/collections) Ensure that the image matches the style of the other images (it should be a book image, not a photograph), is at least 200 pixels in both dimensions, and is not copyright restricted. Make sure the image is not offensive, and keeping with our [Commitment to Diversity](/posts/PH-commitment-to-diversity) try to find something that does not perpetuate stereotypes or send a subtle message about maleness and whiteness. diff --git a/en/individual.md b/en/individual.md index 49edca3e6f..d3ea5ab3a1 100644 --- a/en/individual.md +++ b/en/individual.md @@ -19,7 +19,7 @@ Your support directly enables the infrastructure that keeps our publications tog
    - + diff --git a/en/ipp.md b/en/ipp.md index 7da840d8de..0dc420a2d5 100644 --- a/en/ipp.md +++ b/en/ipp.md @@ -49,7 +49,7 @@ By joining the Institutional Partner Programme you will receive the following be
    - + diff --git a/en/lessons/analyzing-documents-with-tfidf.md b/en/lessons/analyzing-documents-with-tfidf.md index 07f85f7022..adfa739b0e 100644 --- a/en/lessons/analyzing-documents-with-tfidf.md +++ b/en/lessons/analyzing-documents-with-tfidf.md @@ -326,7 +326,7 @@ As I have described, __tf-idf__ has its origins in information retrieval, and th ### 1. As an Exploratory Tool or Visualization Technique -As I've already demonstrated, terms lists with __tf-idf__ scores for each document in a corpus can be a strong interpretive aid in themselves, they can help generate hypotheses or research questions. Word lists can also be the building bocks for more sophisticated browsing and visualization strategies. ["A full-text visualization of the Iraq War Logs"](http://jonathanstray.com/a-full-text-visualization-of-the-iraq-war-logs), by Jonathan Stray and Julian Burgess, is a good example of this use case.[^11] Using __tf-idf__-transformed features, Stray and Burgess build a network visualization that positions Iraq War logs in relation to their most distinctive keywords. This way of visualizing textual information led Stray to develop [the Overview Project](https://www.overviewdocs.com), which provides a dashboard for users to visualize and search thousands of documents at a time. We could use this kind of approach to graph our obituaries corpus and see if there are keyword communities. +As I've already demonstrated, terms lists with __tf-idf__ scores for each document in a corpus can be a strong interpretive aid in themselves, they can help generate hypotheses or research questions. Word lists can also be the building bocks for more sophisticated browsing and visualization strategies. ["A full-text visualization of the Iraq War Logs"](https://jonathanstray.com/a-full-text-visualization-of-the-iraq-war-logs), by Jonathan Stray and Julian Burgess, is a good example of this use case.[^11] Using __tf-idf__-transformed features, Stray and Burgess build a network visualization that positions Iraq War logs in relation to their most distinctive keywords. This way of visualizing textual information led Stray to develop [the Overview Project](https://www.overviewdocs.com), which provides a dashboard for users to visualize and search thousands of documents at a time. We could use this kind of approach to graph our obituaries corpus and see if there are keyword communities. ### 2. Textual Similarity and Feature Sets @@ -408,13 +408,13 @@ Text summarization is yet another way to explore a corpus. Rada Mihalcea and Pau - Salton, G. and M.J. McGill, _Introduction to Modern Information Retrieval_. New York: McGraw-Hill, 1983. -- Schmidt, Ben. "Do Digital Humanists Need to Understand Algorithms?" _Debates in the Digital Humanities 2016_. Online edition. Minneapois: University of Minnesota Press. http://dhdebates.gc.cuny.edu/debates/text/99 +- Schmidt, Ben. "Do Digital Humanists Need to Understand Algorithms?" _Debates in the Digital Humanities 2016_. Online edition. Minneapois: University of Minnesota Press. https://dhdebates.gc.cuny.edu/debates/text/99 -- --. "Words Alone: Dismantling Topic Models in the Humanities," _Journal of Digital Humanities_. Vol. 2, No. 1 (2012): n.p. http://journalofdigitalhumanities.org/2-1/words-alone-by-benjamin-m-schmidt/ +- --. "Words Alone: Dismantling Topic Models in the Humanities," _Journal of Digital Humanities_. Vol. 2, No. 1 (2012): n.p. https://journalofdigitalhumanities.org/2-1/words-alone-by-benjamin-m-schmidt/ - Spärck Jones, Karen. "A Statistical Interpretation of Term Specificity and Its Application in Retrieval." Journal of Documentation 28, no. 1 (1972): 11–21. -- Stray, Jonathan, and Julian Burgess. "A Full-text Visualization of the Iraq War Logs," December 10, 2010 (Update April 2012). http://jonathanstray.com/a-full-text-visualization-of-the-iraq-war-logs +- Stray, Jonathan, and Julian Burgess. "A Full-text Visualization of the Iraq War Logs," December 10, 2010 (Update April 2012). https://jonathanstray.com/a-full-text-visualization-of-the-iraq-war-logs - Underwood, Ted. "Identifying diction that characterizes an author or genre: why Dunning's may not be the best method," _The Stone and the Shell_, November 9, 2011. https://tedunderwood.com/2011/11/09/identifying-the-terms-that-characterize-an-author-or-genre-why-dunnings-may-not-be-the-best-method/ @@ -435,7 +435,7 @@ If you are not using Anaconda, you will need to cover the following dependencies 1. Install Python 2 or 3 (preferably Python 3.6 or later) 2. Recommended: install and run a virtual environment -3. Install the Scikit-Learn library and its dependencies (see [http://scikit-learn.org/stable/install.html](http://scikit-learn.org/stable/install.html)). +3. Install the Scikit-Learn library and its dependencies (see [https://scikit-learn.org/stable/install.html](https://scikit-learn.org/stable/install.html)). 4. Install Jupyter Notebook and its dependencies # Endnotes @@ -454,13 +454,13 @@ If you are not using Anaconda, you will need to cover the following dependencies [^7]: Documentation for TfidfVectorizer. -[^8]: Schmidt, Ben. "Do Digital Humanists Need to Understand Algorithms?" _Debates in the Digital Humanities 2016_. Online edition. (Minneapois: University of Minnesota Press): n.p. +[^8]: Schmidt, Ben. "Do Digital Humanists Need to Understand Algorithms?" _Debates in the Digital Humanities 2016_. Online edition. (Minneapois: University of Minnesota Press): n.p. [^9]: van Rossum, Guido, Barry Warsaw, and Nick Coghlan. "PEP 8 -- Style Guide for Python Code." July 5, 2001. Updated July 2013. [^10]: "Ida M. Tarbell, 86, Dies in Bridgeport" _The New York Times_, January 7, 1944, 17. ; "Nellie Bly, Journalist, Dies of Pneumonia" _The New York Times_, January 28, 1922, 11. ; "W. E. B. DuBois Dies in Ghana; Negro Leader and Author, 95" _The New York Times_, August 28, 1963, 27. ; Whitman, Alden. "Upton Sinclair, Author, Dead; Crusader for Social Justice, 90" _The New York Times_, November 26, 1968, 1, 34. ; "Willa Cather Dies; Noted Novelist, 70" _The New York Times_, April 25, 1947, 21. -[^11]: Stray, Jonathan, and Julian Burgess. "A Full-text Visualization of the Iraq War Logs," December 10, 2010 (Update April 2012). +[^11]: Stray, Jonathan, and Julian Burgess. "A Full-text Visualization of the Iraq War Logs," December 10, 2010 (Update April 2012). [^12]: Manning, C.D., P. Raghavan, and H. Schütze, _Introduction to Information Retrieval_. (Cambridge: Cambridge University Press, 2008): 118-120. @@ -470,6 +470,6 @@ If you are not using Anaconda, you will need to cover the following dependencies [^15]: __Tf-idf__ is not typically a recommended pre-processing step when generating topic models. See -[^16]: Schmidt, Ben. "Words Alone: Dismantling Topic Models in the Humanities," _Journal of Digital Humanities_. Vol. 2, No. 1 (2012): n.p. +[^16]: Schmidt, Ben. "Words Alone: Dismantling Topic Models in the Humanities," _Journal of Digital Humanities_. Vol. 2, No. 1 (2012): n.p. [^17]: Mihalcea, Rada, and Paul Tarau. "Textrank: Bringing order into text." In _Proceedings of the 2004 conference on empirical methods in natural language processing_. 2004. diff --git a/en/lessons/applied-archival-downloading-with-wget.md b/en/lessons/applied-archival-downloading-with-wget.md index b52a51ed0f..8cb452df87 100755 --- a/en/lessons/applied-archival-downloading-with-wget.md +++ b/en/lessons/applied-archival-downloading-with-wget.md @@ -72,8 +72,8 @@ identify the beginning URL in the series of documents that you want to download. Because of its smaller size we're going to use the online war diary for [No. 14 Canadian General Hospital][] as our example. The entire war diary is 80 pages long. The URL for page 1 is - and the URL for page -80 is '. Note that + and the URL for page +80 is '. Note that they are in sequential order. We want to download the .jpeg images for *all* of the pages in the diary. To do this, we need to design a script to generate all of the URLs for the pages in between (and including) the @@ -197,9 +197,9 @@ Mutineers, 1789" which provides an account of the mutiny aboard the HMS (pages) to the notebook. This is somewhat misleading. Click on the first thumbnail in the top right to view the whole page. Now, *right-click -\> view image*. The URL should be -''. If you browse through +''. If you browse through the thumbnails, the last one is 'Part 127', which is located at -''. The discrepancy +''. The discrepancy between the range of URLs and the total number of files means that you may miss a page or two in the automated download – in this case there are a few URLs that include a letter in the name of the .jpeg @@ -258,7 +258,7 @@ complicate matters and do not permit us to easily generate URLs with the first script we used. Here's a workaround. Click on this link: - + The page you just opened is a sub-directory of the website that lists the .jpeg files for a selection of the Jefferson Papers. This means that @@ -270,15 +270,15 @@ URLs you do not actually need to write a script (although you could using my final example, which discusses the problem of leading zeros). Instead, simply manipulate the URLs in a .txt file as follows: - + - + - + ... all the way up to - + This is the last sub-directory on the Library of Congress site for these dates in Series 1. This last URL contains images 1400-1487. @@ -319,12 +319,12 @@ Archives example, to get the simplified URL you must *right-click -\> view image* using your web-browser. The URL for the first poster should be: - + Follow the same steps for the last poster in the gallery – the URL should be: -. +. The script we used to download from LAC will not work because the range function cannot comprehend leading zeros. The script below provides an @@ -383,13 +383,13 @@ toolkit. As new methods for scraping online repositories become available, we will continue to update this lesson with additional examples of Wget's power and potential. - [ActiveHistory.ca]: http://www.activehistory.ca - [curl]: http://chronicle.com/blogs/profhacker/download-a-sequential-range-of-urls-with-curl/41055 + [ActiveHistory.ca]: https://www.activehistory.ca + [curl]: https://chronicle.com/blogs/profhacker/download-a-sequential-range-of-urls-with-curl/41055 [Indian Affairs Annual Reports database]: https://recherche-collection-search.bac-lac.gc.ca/eng/Home/Search?q=%20Indian%20Affairs%20Annual%20Reports%20database [View a scanned page of original Report]: https://recherche-collection-search.bac-lac.gc.ca/eng/home/record?app=fonandcol&IdNumber=2061374&q=Indian%20Affairs%20Annual%20Reports [No. 14 Canadian General Hospital]: https://recherche-collection-search.bac-lac.gc.ca/eng/Home/Record?app=fonandcol&IdNumber=2005110&new=-8585971893141232328 - [http://data2.archives.ca/e/e061/e001518109.jpg]: http://data2.archives.ca/e/e061/e001518029.jpg - [leading zeros]: http://en.wikipedia.org/wiki/Leading_zero - [On the viewer page]: http://www.nla.gov.au/apps/cdview/?pi=nla.ms-ms5393-1 - [Series 1: General Correspondence. 1651-1827]: http://memory.loc.gov/cgi-bin/ampage?collId=mtj1&fileName=mtj1page001.db&recNum=1&itemLink=/ammem/collections/jefferson_papers/mtjser1.html&linkText=6 - [Historical Medical Poster Collection]: http://cushing.med.yale.edu/gsdl/collect/mdposter/ + [https://data2.archives.ca/e/e061/e001518109.jpg]: https://data2.archives.ca/e/e061/e001518029.jpg + [leading zeros]: https://en.wikipedia.org/wiki/Leading_zero + [On the viewer page]: https://www.nla.gov.au/apps/cdview/?pi=nla.ms-ms5393-1 + [Series 1: General Correspondence. 1651-1827]: https://memory.loc.gov/cgi-bin/ampage?collId=mtj1&fileName=mtj1page001.db&recNum=1&itemLink=/ammem/collections/jefferson_papers/mtjser1.html&linkText=6 + [Historical Medical Poster Collection]: https://cushing.med.yale.edu/gsdl/collect/mdposter/ diff --git a/en/lessons/automated-downloading-with-wget.md b/en/lessons/automated-downloading-with-wget.md index 1b3ec73cca..97d6a8af4d 100755 --- a/en/lessons/automated-downloading-with-wget.md +++ b/en/lessons/automated-downloading-with-wget.md @@ -277,10 +277,10 @@ manual][] page. Let's take an example dataset. Say you wanted to download all of the papers hosted on the website ActiveHistory.ca. They are all located at: -; in the sense that they are all +; in the sense that they are all contained within the `/papers/` directory: for example, the 9th paper published on the website -is . Think of this +is . Think of this structure in the same way as directories on your own computer: if you have a folder labeled `/History/`, it likely contains several files within it. The same structure holds true for websites, and we are using @@ -331,7 +331,7 @@ Saving to: `index.html.1' ``` What you have done is downloaded just the first page of -, the index page for the papers to your +, the index page for the papers to your new directory. If you open it, you'll see the main text on the home page of ActiveHistory.ca. So at a glance, we have already quickly downloaded something. @@ -357,10 +357,10 @@ options. So let's learn a few commands now: Recursive retrieval is the most important part of wget. What this means is that the program begins following links from the website and downloading them too. So for example, the - has a link to -, so it will download + has a link to +, so it will download that too if we use recursive retrieval. However, it will also follow any -other links: if there was a link to somewhere on that +other links: if there was a link to somewhere on that page, it would follow that and download it as well. By default, -r sends wget to a depth of five sites after the first one. This is following links, to a limit of five clicks after the first website. At this point, @@ -376,8 +376,8 @@ have a short version, this could be initiated using -np). This is an important one. What this means is that wget should follow links, but not beyond the last parent directory. In our case, that means that it won't go anywhere that is not part of the -http://activehistory.ca/papers/ hierarchy. If it was a long path such as -http://niche-canada.org/projects/events/new-events/not-yet-happened-events/, +https://activehistory.ca/papers/ hierarchy. If it was a long path such as +https://niche-canada.org/projects/events/new-events/not-yet-happened-events/, it would only find files in the `/not-yet-happened-events/` folder. It is a critical command for delineating your search. @@ -493,12 +493,12 @@ files, backups, etc. I've only given a snapshot of some of wget's functionalities. For more, please visit the [wget manual][GNU wget manual]. - [Command Line Bootcamp]: http://praxis.scholarslab.org/scratchpad/bash/ + [Command Line Bootcamp]: https://praxis.scholarslab.org/scratchpad/bash/ [download XCode via this link]: https://itunes.apple.com/us/app/xcode/id497799835?mt=12 [Apple Developer website]: https://developer.apple.com/xcode/ [View Downloads]: https://developer.apple.com/downloads/ - [GNU website]: http://www.gnu.org/software/wget/ - [HTTP]: http://ftp.gnu.org/gnu/wget/ + [GNU website]: https://www.gnu.org/software/wget/ + [HTTP]: https://ftp.gnu.org/gnu/wget/ [FTP]: ftp://ftp.gnu.org/gnu/wget/ - [ugent website]: http://users.ugent.be/~bpuype/wget/ - [GNU wget manual]: http://www.gnu.org/software/wget/manual/wget.html + [ugent website]: https://users.ugent.be/~bpuype/wget/ + [GNU wget manual]: https://www.gnu.org/software/wget/manual/wget.html diff --git a/en/lessons/basic-text-processing-in-r.md b/en/lessons/basic-text-processing-in-r.md index 25f9adaeb2..2052d28348 100755 --- a/en/lessons/basic-text-processing-in-r.md +++ b/en/lessons/basic-text-processing-in-r.md @@ -644,9 +644,9 @@ Many generic tutorials exist for all three of these, as well as extensive packag [^2]: Our corpus has 236 State of the Union addresses. Depending on exactly what is counted, this number can be slightly higher or lower. -[^3]: All Presidential State of the Union Addresses were downloaded from The American Presidency Project at the University of California Santa Barbara. (Accessed 2016-11-11) [http://www.presidency.ucsb.edu/sou.php](http://www.presidency.ucsb.edu/sou.php). +[^3]: All Presidential State of the Union Addresses were downloaded from The American Presidency Project at the University of California Santa Barbara. (Accessed 2016-11-11) [https://www.presidency.ucsb.edu/sou.php](https://www.presidency.ucsb.edu/sou.php). -[^4]: Peter Norvig. "Google Web Trillion Word Corpus". (Accessed 2016-11-11) [http://norvig.com/ngrams/](http://norvig.com/ngrams/). +[^4]: Peter Norvig. "Google Web Trillion Word Corpus". (Accessed 2016-11-11) [https://norvig.com/ngrams/](https://norvig.com/ngrams/). [^5]: This does happen for a few written State of the Union addresses, where a long bulleted list gets parsed into one very long sentence. diff --git a/en/lessons/beginners-guide-to-twitter-data.md b/en/lessons/beginners-guide-to-twitter-data.md index 17d4621cba..2f65d7741d 100644 --- a/en/lessons/beginners-guide-to-twitter-data.md +++ b/en/lessons/beginners-guide-to-twitter-data.md @@ -16,7 +16,7 @@ reviewers: - Frédéric Clavert - Telmo Menezes - Ed Summers -review-ticket: http://programminghistorian.github.io/ph-submissions/lessons/beginners-guide-to-twitter-data +review-ticket: https://programminghistorian.github.io/ph-submissions/lessons/beginners-guide-to-twitter-data difficulty: 1 activity: acquiring topics: [data-manipulation, api] @@ -140,7 +140,7 @@ TweetSets provides additional files from the Hurricane Irma dataset. These data Download and extract the files. On a Windows computer, you can use an application such as [7-Zip](https://www.7-zip.org/) to uncompress files with a .gz exension. -If you are unfamiliar with social network analysis, it might be worthwhile to check out one of Scott Weingart’s ["Demystifying Networks"](http://journalofdigitalhumanities.org/1-1/demystifying-networks-by-scott-weingart/) series to familiarize yourself with the basic linguistic and visual vocabularies. If you have done so, you will recognize that the TweetSets outputs show us some basic information that can be used to reconstruct a social network. The edges file shows us who is tweeting to whom; the nodes files associates user names with ID numbers; and the top mentions and users files do the same, but for the most actively mentioned and most actively tweeting users. +If you are unfamiliar with social network analysis, it might be worthwhile to check out one of Scott Weingart’s ["Demystifying Networks"](https://journalofdigitalhumanities.org/1-1/demystifying-networks-by-scott-weingart/) series to familiarize yourself with the basic linguistic and visual vocabularies. If you have done so, you will recognize that the TweetSets outputs show us some basic information that can be used to reconstruct a social network. The edges file shows us who is tweeting to whom; the nodes files associates user names with ID numbers; and the top mentions and users files do the same, but for the most actively mentioned and most actively tweeting users. The edges file is 13,856,080 lines, so too large to work with in Excel. For this lesson, we will work with only the first 1,000 lines of data in the file. The [Introduction to the Bash Command Line](/en/lessons/intro-to-bash) lesson describes how you can use a command-line interface to read parts of a file using commands such as `head`. We can read the first 1,001 lines (1,000 lines of data plus a header) of the file into a new file using the following command: @@ -208,7 +208,7 @@ Once you've done this, you can see the returned value is now in the formula fiel {% include figure.html filename="vlookup-final.png" caption="Done with data formatting!" %} ## Further Applications -After repeating this process on the second column, this spreadsheet is ready to be used in a variety of social network visualizations. It will drop right in to a SNA tool like [Palladio](http://hdlab.stanford.edu/palladio/), or, with some light reformatting, into software like [Gephi](https://gephi.org/) or [Cytoscape](https://cytoscape.org/). The VLOOKUP we did makes it so you can do the visualizations with human-legible user names, rather than rather meaningless user IDs. +After repeating this process on the second column, this spreadsheet is ready to be used in a variety of social network visualizations. It will drop right in to a SNA tool like [Palladio](https://hdlab.stanford.edu/palladio/), or, with some light reformatting, into software like [Gephi](https://gephi.org/) or [Cytoscape](https://cytoscape.org/). The VLOOKUP we did makes it so you can do the visualizations with human-legible user names, rather than rather meaningless user IDs. {% include figure.html filename="palladio.png" caption="A very quick social network sketch showing the users who most often mentioned @realDonaldTrump in their hurricane tweets. Done in Palladio." %} diff --git a/en/lessons/building-static-sites-with-jekyll-github-pages.md b/en/lessons/building-static-sites-with-jekyll-github-pages.md index ddbb1613ed..34b12e2f16 100755 --- a/en/lessons/building-static-sites-with-jekyll-github-pages.md +++ b/en/lessons/building-static-sites-with-jekyll-github-pages.md @@ -23,7 +23,7 @@ doi: 10.46430/phen0048 **This lesson is for you if** you'd like an entirely free, easy-to-maintain, preservation-friendly, secure website over which you have full control, such as a scholarly blog, project website, or online portfolio. -**At the end of this lesson**, you'll have a basic live website where you can publish content that other people can visit—it will look like [this](http://amandavisconti.github.io/JekyllDemo/)!—and you'll also have some resources to explore if you want to further customize the site. +**At the end of this lesson**, you'll have a basic live website where you can publish content that other people can visit—it will look like [this](https://amandavisconti.github.io/JekyllDemo/)!—and you'll also have some resources to explore if you want to further customize the site. **Requirements:** A computer (Mac/Windows/Linux are all okay, but this lesson doesn't cover some aspects of Linux use, and you may encounter some issues if you are using a Mac with an M-series (silicon) chip), the ability to download and install software on the computer, an internet connection that can support downloading software. Users have reported needing between 1-3 hours to complete the entire lesson. @@ -35,7 +35,7 @@ doi: 10.46430/phen0048 ## What are static sites, Jekyll, etc. & why might I care? -*This tutorial is built on the [official Jekyll Documentation](http://jekyllrb.com/docs/home/) written by the Jekyll community. See the ["Read more"](#section9-3) section below if you'd like to know even more about these terms!* +*This tutorial is built on the [official Jekyll Documentation](https://jekyllrb.com/docs/home/) written by the Jekyll community. See the ["Read more"](#section9-3) section below if you'd like to know even more about these terms!* ### Dynamic websites, static websites, & Jekyll @@ -49,7 +49,7 @@ Note that when someone refers to a "Jekyll website", they really mean a static ( Because static sites are really just text files (no database to complicate matters), you can easily *version* a static site—that is, use a tool to keep track of the different versions of the site over time by tracking how the text files that compose the site have been altered. Versioning is especially helpful when you need to merge two files (e.g. two students are writing a blog post together, and you want to combine their two versions), or when you want compare files to look for differences among them (e.g. "How did the original About page describe this project?"). Versioning is great when working with a team (e.g. helps you combine and track different people's work), but it's also useful when writing or running a website on your own. -Read more about [Jekyll here](http://jekyllrb.com/docs/home/) or [static site generators here](https://davidwalsh.name/introduction-static-site-generators). +Read more about [Jekyll here](https://jekyllrb.com/docs/home/) or [static site generators here](https://davidwalsh.name/introduction-static-site-generators). ### GitHub & GitHub Pages @@ -97,7 +97,7 @@ Jekyll isn't officially supported for Windows, which means none of the official *A GitHub user account will let you host your website (make it available for others to visit) for free on GitHub (we'll cover how in a later step). As a bonus, it will also let you keep track of versions of the website and its writing as it grows or changes over time.* -1\. Visit [GitHub.com](https://github.com/) and click on the "Sign up" button on the upper right. Write your desired username. This will be visible to others, identify you on GitHub, and also be part of your site's URL; for example, the author's GitHub username is amandavisconti and her demo Jekyll site's URL is http://amandavisconti.github.io/JekyllDemo/. (*Note you can also purchase your own domain name and use it for this site, but that won't be covered in this tutorial*). Also write your desired email address and password, then click "Create an account". +1\. Visit [GitHub.com](https://github.com/) and click on the "Sign up" button on the upper right. Write your desired username. This will be visible to others, identify you on GitHub, and also be part of your site's URL; for example, the author's GitHub username is amandavisconti and her demo Jekyll site's URL is https://amandavisconti.github.io/JekyllDemo/. (*Note you can also purchase your own domain name and use it for this site, but that won't be covered in this tutorial*). Also write your desired email address and password, then click "Create an account". 2\. On the next page, click the "Choose" button next to the "Free" plan option, ignore the "Help me set up an organization next" checkbox, and click "Finish sign up". 3\. *Optional*: Visit https://github.com/settings/profile to add a full name (can be your real name, GitHub user name, or something else) and other public profile information, if desired. @@ -115,7 +115,7 @@ Jekyll isn't officially supported for Windows, which means none of the official ### Text editor -You'll need to download and install a "text editor" program on your computer for making small customizations to your Jekyll site's code. Good free options include [TextWrangler](http://www.barebones.com/products/textwrangler/download.html) (Mac) or [Notepad++](https://notepad-plus-plus.org/) (Windows). Software aimed at word processing, like Microsoft Word or Word Pad, isn't a good choice because it's easy to forget how to format and save the file, accidentally adding in extra and/or invisible formatting and characters that will break your site. You'll want something that specifically can save what you write as plaintext (e.g. HTML, Markdown). +You'll need to download and install a "text editor" program on your computer for making small customizations to your Jekyll site's code. Good free options include [TextWrangler](https://www.barebones.com/products/textwrangler/download.html) (Mac) or [Notepad++](https://notepad-plus-plus.org/) (Windows). Software aimed at word processing, like Microsoft Word or Word Pad, isn't a good choice because it's easy to forget how to format and save the file, accidentally adding in extra and/or invisible formatting and characters that will break your site. You'll want something that specifically can save what you write as plaintext (e.g. HTML, Markdown). *Optional:* See [the "Authoring in Markdown" section below](#section5-2) for notes on a Markdown-specific editing program, which you may also wish to install when you get to the point of authoring webpages and/or blog posts. @@ -162,7 +162,7 @@ Open a command line window (Applications > Utilities > Terminal) and enter the c ### Command line tools suite -You'll need to first install the Mac "command line tools" suite to be able to use [Homebrew](http://brew.sh/) (which we'll install next). Homebrew lets you download and install open-source software on Macs from the command line (it's a "package manager"), which will make installing Ruby (the language Jekyll is built on) easier. +You'll need to first install the Mac "command line tools" suite to be able to use [Homebrew](https://brew.sh/) (which we'll install next). Homebrew lets you download and install open-source software on Macs from the command line (it's a "package manager"), which will make installing Ruby (the language Jekyll is built on) easier. In Terminal, paste the following code then press enter: @@ -182,7 +182,7 @@ You'll see a message that "The software was installed" when the installation is ### Homebrew -After the command line tools suite has completed installation, return to your command line window and enter the following to install [Homebrew](http://brew.sh/): +After the command line tools suite has completed installation, return to your command line window and enter the following to install [Homebrew](https://brew.sh/): ``` /usr/bin/ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)" @@ -282,7 +282,7 @@ If you get a permissions error at this point, entering `usr/local/bin/gem instal Don't forget to wait until the command prompt appears again to move to the next step. -4\. Your site's public URL will take the form http://amandavisconti.github.io/JekyllDemo/, with *amandavisconti* being the author's GitHub username and *JekyllDemo* the name of the site I entered at this step (*an option to purchase and use your own [custom URL](#section7-2) is possible, but not covered in this lesson*). **Lowercase and uppercase website names do *not* point to the same website automatically**, so unlike my *JekyllDemo* example you might wish to pick an all-lowercase name to make sure people who hear about the site tend to type its URL correctly. +4\. Your site's public URL will take the form https://amandavisconti.github.io/JekyllDemo/, with *amandavisconti* being the author's GitHub username and *JekyllDemo* the name of the site I entered at this step (*an option to purchase and use your own [custom URL](#section7-2) is possible, but not covered in this lesson*). **Lowercase and uppercase website names do *not* point to the same website automatically**, so unlike my *JekyllDemo* example you might wish to pick an all-lowercase name to make sure people who hear about the site tend to type its URL correctly. At the command line, type in the following (but replace *JekyllDemo* with whatever you want your site to be called): @@ -370,7 +370,7 @@ You'll notice that generating and running your site in the previous section adde - **email**: Your email address. - **description**: A description of your website that will be used in search engine results and the site's RSS feed. - **baseurl**: Fill in the quotation marks with a forward slash followed by the name of your website folder (e.g. "/JekyllDemo/") to help locate the site at the correct URL. Make sure that your folder is the same the GitHub repository name and ends with a backslash (`/`). It will be required for publishing it on GitHub Pages. - - **url**: Replace "http://yourdomain.com" with "localhost:4000" to help locate your local version of the site at the correct URL. + - **url**: Replace "https://yourdomain.com" with "localhost:4000" to help locate your local version of the site at the correct URL. - **twitter_username**: Your Twitter username (do not include @ symbol). - **github_username**: Your GitHub username. @@ -412,11 +412,11 @@ To get a sense of how your site works and what files you'd experiment with to do Markdown is a way of formatting your writing for reading on the web: it's a set of easy-to-remember symbols that show where text formatting should be added (e.g. a # in front of text means to format it as a heading, while a * in front of text means to format it as a bulleted list item). For Jekyll in particular, Markdown means you can write webpages and blog posts in a way that's comfortable to authors (e.g. no need to look up/add in HTML tags while trying to write an essay), but have that writing show up formatted nicely on the web (i.e. a text-to-HTML convertor). -We won't cover Markdown in this lesson; if you're not familiar with it, for now you can just create posts and pages with no formatting (i.e. no bold/italic, no headers, no bulleted lists). But these are easy to learn how to add: there's a handy markdown [reference](http://kramdown.gettalong.org/quickref.html), as well as [a Programming Historian lesson by Sarah Simpkin on the hows and whys of writing with Markdown](/lessons/getting-started-with-markdown). Check out these links if you'd like to format text (italics, bold, headings, bullet/numbered lists) or add hyperlinks or embedded images and other files. +We won't cover Markdown in this lesson; if you're not familiar with it, for now you can just create posts and pages with no formatting (i.e. no bold/italic, no headers, no bulleted lists). But these are easy to learn how to add: there's a handy markdown [reference](https://kramdown.gettalong.org/quickref.html), as well as [a Programming Historian lesson by Sarah Simpkin on the hows and whys of writing with Markdown](/lessons/getting-started-with-markdown). Check out these links if you'd like to format text (italics, bold, headings, bullet/numbered lists) or add hyperlinks or embedded images and other files. -Make sure any Markdown cheatsheets you look at are for the "[kramdown](http://kramdown.gettalong.org/quickref.html)" flavor of Markdown, which is what GitHub Pages (where we'll be hosting our website) supports. (*There are [various "flavors" of Markdown](https://github.com/jgm/CommonMark/wiki/Markdown-Flavors) that have subtle differences in what various symbols do, but for the most part frequently used symbols like those that create heading formatting are the same—so you're actually probably okay using a markdown cheatsheet that doesn't specify it's kramdown, but if you're getting errors on your site using symbols that aren't included in kramdown might be why*). +Make sure any Markdown cheatsheets you look at are for the "[kramdown](https://kramdown.gettalong.org/quickref.html)" flavor of Markdown, which is what GitHub Pages (where we'll be hosting our website) supports. (*There are [various "flavors" of Markdown](https://github.com/jgm/CommonMark/wiki/Markdown-Flavors) that have subtle differences in what various symbols do, but for the most part frequently used symbols like those that create heading formatting are the same—so you're actually probably okay using a markdown cheatsheet that doesn't specify it's kramdown, but if you're getting errors on your site using symbols that aren't included in kramdown might be why*). -You might be interested in "markdown editor" software such as [Typora](http://www.typora.io/) (OS X and Windows; free during current beta period), which will let you use popular keyboard shortcuts to write Markdown (e.g. highlight text and press command-B to make it bold) and/or type in Markdown but have it show as it will look on the web (see headings styled like headings, instead of like normal text with a # in front of them). +You might be interested in "markdown editor" software such as [Typora](https://www.typora.io/) (OS X and Windows; free during current beta period), which will let you use popular keyboard shortcuts to write Markdown (e.g. highlight text and press command-B to make it bold) and/or type in Markdown but have it show as it will look on the web (see headings styled like headings, instead of like normal text with a # in front of them). ### Authoring pages @@ -436,7 +436,7 @@ You might be interested in "markdown editor" software such as [Typora](http://ww {% include figure.html filename="building-static-sites-with-jekyll-github-pages-22.png" caption="After adding a new page file to the website folder, the new page appears in the website's header menu" %} -For reference, you can check out [an example of a page](http://amandavisconti.github.io/JekyllDemo/resume/) on my demo site, or see [the file that's behind that page](https://raw.githubusercontent.com/amandavisconti/JekyllDemo/gh-pages/resume.md). +For reference, you can check out [an example of a page](https://amandavisconti.github.io/JekyllDemo/resume/) on my demo site, or see [the file that's behind that page](https://raw.githubusercontent.com/amandavisconti/JekyllDemo/gh-pages/resume.md). ### Authoring posts @@ -469,7 +469,7 @@ Notice that **the URL of the post** is your local website URL (e.g. *localhost:4 **To create further posts**, duplicate an existing file, then remember to change not just the front matter and content inside the post as described above, but also the file name (date and title) of the new file. -For reference, you can check out [an example of a post](https://amandavisconti.github.io/JekyllDemo/2016/11/12/a-post-about-my-research.html) on my demo site, or see [the code running that post](http://raw.githubusercontent.com/amandavisconti/JekyllDemo/gh-pages/_posts/2016-02-29-a-post-about-my-research.markdown). +For reference, you can check out [an example of a post](https://amandavisconti.github.io/JekyllDemo/2016/11/12/a-post-about-my-research.html) on my demo site, or see [the code running that post](https://raw.githubusercontent.com/amandavisconti/JekyllDemo/gh-pages/_posts/2016-02-29-a-post-about-my-research.markdown). ## Hosting on GitHub Pages @@ -502,7 +502,7 @@ For reference, you can check out [an example of a post](https://amandavisconti.g 9\. Click the "Sync" button in the upper-right. {% include figure.html filename="building-static-sites-with-jekyll-github-pages-28.png" caption="Click the 'Sync' button in the upper-right" %} -10\. You can now visit (and share the link to!) your live website. The URL will follow the pattern of *your GitHub username DOT github.io SLASH name of your website SLASH*. (For example, the author's URL is [amandavisconti.github.io/JekyllDemo/](http://amandavisconti.github.io/JekyllDemo/).) +10\. You can now visit (and share the link to!) your live website. The URL will follow the pattern of *your GitHub username DOT github.io SLASH name of your website SLASH*. (For example, the author's URL is [amandavisconti.github.io/JekyllDemo/](https://amandavisconti.github.io/JekyllDemo/).) ### Mini cheatsheet @@ -527,16 +527,16 @@ Or, you can add in (and further customize, if desired) a theme already created b - [Alex Gil's "Ed" theme for minimal digital editions](https://github.com/minicomp/ed/) and [its documentation](https://github.com/minicomp/ed/blob/main/documentation.md) (free) - [Rebecca Sutton Koeser's "Digital Edition" theme](https://github.com/emory-libraries-ecds/digitaledition-jekylltheme) (free) -- The [Jekyll Themes](http://jekyllthemes.org/) directory (free) -- [JekyllThemes.io](http://jekyllthemes.io/) (free and paid) +- The [Jekyll Themes](https://jekyllthemes.org/) directory (free) +- [JekyllThemes.io](https://jekyllthemes.io/) (free and paid) ### Functionality -- [Jekyll plugins](http://jekyllrb.com/docs/plugins/) allow you to add small bits of code that add functionality to your site such as [full-text search](https://github.com/PascalW/jekyll_indextank), [emoji support](https://github.com/yihangho/emoji-for-jekyll), and [tag clouds](https://gist.github.com/ilkka/710577). +- [Jekyll plugins](https://jekyllrb.com/docs/plugins/) allow you to add small bits of code that add functionality to your site such as [full-text search](https://github.com/PascalW/jekyll_indextank), [emoji support](https://github.com/yihangho/emoji-for-jekyll), and [tag clouds](https://gist.github.com/ilkka/710577). - If you want to host your site on GitHub Pages as we did in this lesson, you can only use the Jekyll plugins already included in the GitHub Pages gem we installed (here's [a full list of what you installed](https://pages.github.com/versions/) when adding the GitHub Pages gem to your Gemfile earlier). - - If you choose to host your Jekyll website elsewhere than GitHub Pages, you can use any Jekyll plugin (instructions to self-host vary by web host and won't be covered here, but [this](http://jekyllrb.com/docs/plugins/) is a page about how to install plugins once you've set up your self-hosted Jekyll site). You can search for "Jekyll plugin" plus the functionality you need to see if one is available, or check out the "Available plugins" section near the bottom of [this page](http://jekyllrb.com/docs/plugins/) for a list of plugins. + - If you choose to host your Jekyll website elsewhere than GitHub Pages, you can use any Jekyll plugin (instructions to self-host vary by web host and won't be covered here, but [this](https://jekyllrb.com/docs/plugins/) is a page about how to install plugins once you've set up your self-hosted Jekyll site). You can search for "Jekyll plugin" plus the functionality you need to see if one is available, or check out the "Available plugins" section near the bottom of [this page](https://jekyllrb.com/docs/plugins/) for a list of plugins. - You can keep GitHub Page's free hosting of your Jekyll website, but give the site a **custom domain name** (domain names are purchased for a reasonable yearly fee—usually around $10—from a "domain name registrar" such as [NearlyFreeSpeech.net](https://www.nearlyfreespeech.net/services/domains)). For example, the author's LiteratureGeek.com blog is built with Jekyll and hosted on GitHub Pages just like the site you built with this lesson, but it uses a custom domain name I purchased and configured to point to my site. Instructions on setting up a custom domain name can be found [here](https://help.github.com/articles/using-a-custom-domain-with-github-pages/).
    @@ -566,7 +566,7 @@ If you set up a custom domain with your GitHub Pages-hosted website, to avoid a ### Help -If you run into an issue, [Jekyll has a page on troubleshooting](https://jekyllrb.com/docs/troubleshooting/) that might help. If you're working on the command line and get an error message, don't forget to try searching for that specific error message online. Besides search engines, [the StackExchange site](http://stackexchange.com/) is a good place to find questions and answers from people who have run into the same problem as you in the past. +If you run into an issue, [Jekyll has a page on troubleshooting](https://jekyllrb.com/docs/troubleshooting/) that might help. If you're working on the command line and get an error message, don't forget to try searching for that specific error message online. Besides search engines, [the StackExchange site](https://stackexchange.com/) is a good place to find questions and answers from people who have run into the same problem as you in the past. ### Credits @@ -578,12 +578,12 @@ The Editorial Board would like to thank [spswanz](https://github.com/spswanz) fo Check out the following links for documentation, inspiration, and further reading about Jekyll: -* [Official Jekyll Documentation](http://jekyllrb.com/docs/home/) -* Jekyll "unofficially" links to two Windows + Jekyll resources: [http://jekyll-windows.juthilo.com/](http://jekyll-windows.juthilo.com/) and [https://davidburela.wordpress.com/2015/11/28/easily-install-jekyll-on-windows-with-3-command-prompt-entries-and-chocolatey/](https://davidburela.wordpress.com/2015/11/28/easily-install-jekyll-on-windows-with-3-command-prompt-entries-and-chocolatey/) +* [Official Jekyll Documentation](https://jekyllrb.com/docs/home/) +* Jekyll "unofficially" links to two Windows + Jekyll resources: [https://jekyll-windows.juthilo.com/](https://jekyll-windows.juthilo.com/) and [https://davidburela.wordpress.com/2015/11/28/easily-install-jekyll-on-windows-with-3-command-prompt-entries-and-chocolatey/](https://davidburela.wordpress.com/2015/11/28/easily-install-jekyll-on-windows-with-3-command-prompt-entries-and-chocolatey/) * [https://help.github.com/articles/using-jekyll-with-pages/](https://help.github.com/articles/using-jekyll-with-pages/) -* Amanda Visconti, ["Introducing Static Sites for Digital Humanities Projects (why & what are Jekyll, GitHub, etc.?)"](http://literaturegeek.com/2015/12/08/WhyJekyllGitHub) -* Alex Gil, ["How (and Why) to Generate a Static Website Using Jekyll, Part 1"](http://chronicle.com/blogs/profhacker/jekyll1/60913) +* Amanda Visconti, ["Introducing Static Sites for Digital Humanities Projects (why & what are Jekyll, GitHub, etc.?)"](https://literaturegeek.com/2015/12/08/WhyJekyllGitHub) +* Alex Gil, ["How (and Why) to Generate a Static Website Using Jekyll, Part 1"](https://chronicle.com/blogs/profhacker/jekyll1/60913) * Eduardo Bouças, ["An Introduction to Static Site Generators"](https://davidwalsh.name/introduction-static-site-generators) -* [Jekyll Style Guide](http://ben.balter.com/jekyll-style-guide/) -* The [Prose](http://prose.io/) content editor (built on Jekyll) +* [Jekyll Style Guide](https://ben.balter.com/jekyll-style-guide/) +* The [Prose](https://prose.io/) content editor (built on Jekyll) * [Join the Digital Humanities Slack](https://digitalhumanities.slack.com) (anyone can join, even if you have no DH experience) and check out the #publishing channel for discussions of Jekyll and other DH publishing platforms diff --git a/en/lessons/calibrating-radiocarbon-dates-r.md b/en/lessons/calibrating-radiocarbon-dates-r.md index fd280dbe1b..1a61fec5fc 100644 --- a/en/lessons/calibrating-radiocarbon-dates-r.md +++ b/en/lessons/calibrating-radiocarbon-dates-r.md @@ -159,7 +159,7 @@ By now it is clear that these details, if poorly understood, can quickly lead to ## Applications with R -Many tools are now available to calibrate radiocarbon data, like [OxCal](https://c14.arch.ox.ac.uk/oxcal/), [CALIB](http://calib.org) and [ChronoModel](https://chronomodel.com). But these tools are rather intended to deal with [Bayesian](https://perma.cc/R247-RG8E) modeling problems of chronological sequences (which we don't cover in this lesson). R offers an interesting alternative to these tools which suits our needs. R is distributed under an open license, promotes reproducibility and lets you integrate the processing of radiocarbon date into larger projects (spatial analysis, etc.). +Many tools are now available to calibrate radiocarbon data, like [OxCal](https://c14.arch.ox.ac.uk/oxcal/), [CALIB](https://calib.org) and [ChronoModel](https://chronomodel.com). But these tools are rather intended to deal with [Bayesian](https://perma.cc/R247-RG8E) modeling problems of chronological sequences (which we don't cover in this lesson). R offers an interesting alternative to these tools which suits our needs. R is distributed under an open license, promotes reproducibility and lets you integrate the processing of radiocarbon date into larger projects (spatial analysis, etc.). Several R packages are useful for calibrating radiocarbon dates: for example, packages like [Bchron](https://cran.r-project.org/package=Bchron) and [oxcAAR](https://cran.r-project.org/package=oxcAAR) are often oriented towards modeling (constructing chronologies, age-depth models, etc.). The package you will use in this lesson is called [rcarbon](https://cran.r-project.org/package=rcarbon).[^14] It allows you to easily calibrate and analyze radiocarbon ages. @@ -524,7 +524,7 @@ In this lesson, you learned how to combine conventional dates and check for cons [^7]: See, for example, Calabrisotto, C. S., Amadio, M., Fedi, M. E., Liccioli, L. & Bombardieri, L. 2017. "Strategies for Sampling Difficult Archaeological Contexts and Improving the Quality of Radiocarbon Data: The Case of Erimi Laonin Tou Porakou, Cyprus." *Radiocarbon* 59 (6): 1919–30. . -[^8]: Arnold, J. R., & W. F. Libby. 1949. "Age Determinations by Radiocarbon Content: Checks with Samples of Known Age". *Science* 110 (2869): 678‑80. ; Libby, W. F. "Radiocarbon Dating". *Nobel Lecture*. Stockholm, 12 December 1960. [http://www.nobelprize.org/nobel_prizes/chemistry/laureates/1960/libby-lecture.html](https://perma.cc/HPU7-F8GD). +[^8]: Arnold, J. R., & W. F. Libby. 1949. "Age Determinations by Radiocarbon Content: Checks with Samples of Known Age". *Science* 110 (2869): 678‑80. ; Libby, W. F. "Radiocarbon Dating". *Nobel Lecture*. Stockholm, 12 December 1960. [https://www.nobelprize.org/nobel_prizes/chemistry/laureates/1960/libby-lecture.html](https://perma.cc/HPU7-F8GD). [^9]: There actually exists three series of calibration curves: IntCal for the northern hemisphere, SHCal for the southern hemisphere, and Marine for marine samples. diff --git a/en/lessons/cleaning-data-with-openrefine.md b/en/lessons/cleaning-data-with-openrefine.md index 764254e38b..ecd26da28a 100755 --- a/en/lessons/cleaning-data-with-openrefine.md +++ b/en/lessons/cleaning-data-with-openrefine.md @@ -395,28 +395,28 @@ cleaning features, as you're performing these steps on a copy of your data set, and *OpenRefine* allows you to trace back all of your steps in the case you have made an error. - [*OpenRefine*]: http://openrefine.org "OpenRefine" + [*OpenRefine*]: https://openrefine.org "OpenRefine" [Powerhouse museum]: https://powerhouse.com.au/ "Powerhouse museum" [*Potter’s Wheel ABC*]: https://perma.cc/Q6QD-E64N "Potter's Wheel ABC " - [*Wrangler*]: http://vis.stanford.edu/papers/wrangler/ "Wrangler" - [data profiling]: http://en.wikipedia.org/wiki/Data_profiling - [named-entity recognition]: http://en.wikipedia.org/wiki/Named-entity_recognition - [Library of Congress]: http://www.loc.gov/index.html + [*Wrangler*]: https://vis.stanford.edu/papers/wrangler/ "Wrangler" + [data profiling]: https://en.wikipedia.org/wiki/Data_profiling + [named-entity recognition]: https://en.wikipedia.org/wiki/Named-entity_recognition + [Library of Congress]: https://www.loc.gov/index.html "Library of Congress" - [OCLC]: http://www.oclc.org/home.en.html "OCLC" + [OCLC]: https://www.oclc.org/home.en.html "OCLC" [website]: https://powerhouse.com.au/ "website" - [Creative Commons Attribution Share Alike (CCASA) license]: http://creativecommons.org/licenses/by-nc/2.5/au/ - [Controlled vocabulary]: http://en.wikipedia.org/wiki/Controlled_vocabulary - [Linked Data]: http://en.wikipedia.org/wiki/Linked_data + [Creative Commons Attribution Share Alike (CCASA) license]: https://creativecommons.org/licenses/by-nc/2.5/au/ + [Controlled vocabulary]: https://en.wikipedia.org/wiki/Controlled_vocabulary + [Linked Data]: https://en.wikipedia.org/wiki/Linked_data [Download OpenRefine]: https://openrefine.org/download [phm-collection]: /assets/cleaning-data-with-openrefine/phm-collection.tsv [Powerhouse Museum Website]: /images/powerhouseScreenshot.png - [facet]: http://en.wikipedia.org/wiki/Faceted_search + [facet]: https://en.wikipedia.org/wiki/Faceted_search [Screenshot of OpenRefine Example]: /images/overviewOfSomeClusters.png [GREL documentation]: https://openrefine.org/docs/manual/grelfunctions - [CSV]: http://en.wikipedia.org/wiki/Comma-separated_values + [CSV]: https://en.wikipedia.org/wiki/Comma-separated_values [RDF Transform extension]: https://github.com/AtesComp/rdf-transform#rdf-transform [NER extension]: https://github.com/stkenny/Refine-NER-Extension diff --git a/en/lessons/cleaning-ocrd-text-with-regular-expressions.md b/en/lessons/cleaning-ocrd-text-with-regular-expressions.md index 5f3f658eec..d18c05eb28 100755 --- a/en/lessons/cleaning-ocrd-text-with-regular-expressions.md +++ b/en/lessons/cleaning-ocrd-text-with-regular-expressions.md @@ -417,27 +417,27 @@ Regular Expressions are powerful. Yes, they are powerful enough to completely destroy your data. So practice on copies and take it one itty bitty step at a time. - [HeinOnline]: http://home.heinonline.org/ + [HeinOnline]: https://home.heinonline.org/ "Source for Legal and Government-based documents" - [pdfminer]: http://www.unixuser.org/~euske/python/pdfminer/index.html + [pdfminer]: https://www.unixuser.org/~euske/python/pdfminer/index.html "PDF Miner Module" [Pythonium’s Pyrexp]: https://pythonium.net/regex "Python Regex Tester" - [Patterns App]: http://krillapps.com/patterns/ + [Patterns App]: https://krillapps.com/patterns/ "Patterns App for RegEx Experimentation" [cheat sheet]: https://cheatography.com/davechild/cheat-sheets/regular-expressions/ "Reg Ex Cheat Sheet" - [documentation]: http://docs.python.org/2/library/re.html + [documentation]: https://docs.python.org/2/library/re.html "Re Module Documentation" - [Regular Expression HOWTO documentation]: http://docs.python.org/2/howto/regex.html#regex-howto + [Regular Expression HOWTO documentation]: https://docs.python.org/2/howto/regex.html#regex-howto "Reuglar Expressions HOWTO" - [sed]: http://www.gnu.org/software/sed/ "GNU's sed editor" - [grep]: http://www.gnu.org/software/grep/ "GNU's grep editor" - [re.search()]: http://docs.python.org/2/library/re.html#re.search + [sed]: https://www.gnu.org/software/sed/ "GNU's sed editor" + [grep]: https://www.gnu.org/software/grep/ "GNU's grep editor" + [re.search()]: https://docs.python.org/2/library/re.html#re.search "Explanation of re.search() function" - [re.sub()]: http://docs.python.org/2/library/re.html#re.sub + [re.sub()]: https://docs.python.org/2/library/re.html#re.sub "Explanation of re.sub() function" - [verbose mode]: http://docs.python.org/2/library/re.html#re.VERBOSE + [verbose mode]: https://docs.python.org/2/library/re.html#re.VERBOSE "Explanation of re.verbose mode" - [re.compile()]: http://docs.python.org/2/library/re.html#re.compile + [re.compile()]: https://docs.python.org/2/library/re.html#re.compile "Explanation of re.compile() function" diff --git a/en/lessons/collaborative-blog-with-jekyll-github.md b/en/lessons/collaborative-blog-with-jekyll-github.md index b6182e3d25..6e8a946cd9 100644 --- a/en/lessons/collaborative-blog-with-jekyll-github.md +++ b/en/lessons/collaborative-blog-with-jekyll-github.md @@ -297,7 +297,7 @@ Even if you are the only person authoring on your site, using branches and pull Remember to substitute *https://github.com/your-username/your-repo-name* for *https://github.com/scholarslab/CollabDemo* in these instructions. To set up your site for our review process: -Each person in addition to you you who will write on the site should [create a user account on GitHub.com](http://github.com/join), if they do not already have one. They will need to share their username with you (or someone else who owns the repository). +Each person in addition to you you who will write on the site should [create a user account on GitHub.com](https://github.com/join), if they do not already have one. They will need to share their username with you (or someone else who owns the repository). Your repository page (for example, https://github.com/scholarslab/CollabDemo) has a horizontal row of links just below the name of the repository. Click on the "settings" link, then click on "Manage Access" in the left menu. @@ -617,22 +617,22 @@ If you have read the longer explanations above already and just want a checklist - [Scholars' Lab cheatsheet](https://github.com/scholarslab/scholarslab.org/blob/master/docs/authoring-and-editing.md#markdown--formatting) on basic Markdown formatting, limited to the most frequently used formatting for our particular needs ### Troubleshooting -If you run into an a problem, try reading [Jekyll 's troubleshooting page](https://jekyllrb.com/docs/troubleshooting/). Besides search engines, [the StackExchange site](http://stackexchange.com/) is a good place to find questions and answers from people who have run into the same problem as you in the past (and, hopefully, recorded how they solved it). You might also [join the Digital Humanities Slack](http://tinyurl.com/DHslack) (anyone can join, even if you have no DH experience) and ask questions in the #DHanswers channel. +If you run into an a problem, try reading [Jekyll 's troubleshooting page](https://jekyllrb.com/docs/troubleshooting/). Besides search engines, [the StackExchange site](https://stackexchange.com/) is a good place to find questions and answers from people who have run into the same problem as you in the past (and, hopefully, recorded how they solved it). You might also [join the Digital Humanities Slack](https://tinyurl.com/DHslack) (anyone can join, even if you have no DH experience) and ask questions in the #DHanswers channel. ### Advanced learning The following links are helpful for learning more about documentation, inspiration, and further reading about Jekyll: *Introductions to Jekyll and static sites* -* Amanda Visconti, ["Introducing Static Sites for Digital Humanities Projects (why & what are Jekyll, GitHub, etc.?)"](http://literaturegeek.com/2015/12/08/WhyJekyllGitHub) +* Amanda Visconti, ["Introducing Static Sites for Digital Humanities Projects (why & what are Jekyll, GitHub, etc.?)"](https://literaturegeek.com/2015/12/08/WhyJekyllGitHub) * [Building a static website with Jekyll and GitHub Pages](/en/lessons/building-static-sites-with-jekyll-github-pages) -* Alex Gil, ["How (and Why) to Generate a Static Website Using Jekyll, Part 1"](http://chronicle.com/blogs/profhacker/jekyll1/60913) +* Alex Gil, ["How (and Why) to Generate a Static Website Using Jekyll, Part 1"](https://chronicle.com/blogs/profhacker/jekyll1/60913) * Eduardo Bouças, ["An Introduction to Static Site Generators"](https://davidwalsh.name/introduction-static-site-generators) *Deeper understanding of Jekyll and GitHub Pages* -* [Official Jekyll Documentation](http://jekyllrb.com/docs/home/) -* Jekyll "unofficially" links to two Windows + Jekyll resources: [http://jekyll-windows.juthilo.com/](http://jekyll-windows.juthilo.com/) and [https://davidburela.wordpress.com/2015/11/28/easily-install-jekyll-on-windows-with-3-command-prompt-entries-and-chocolatey/](https://davidburela.wordpress.com/2015/11/28/easily-install-jekyll-on-windows-with-3-command-prompt-entries-and-chocolatey/) +* [Official Jekyll Documentation](https://jekyllrb.com/docs/home/) +* Jekyll "unofficially" links to two Windows + Jekyll resources: [https://jekyll-windows.juthilo.com/](https://jekyll-windows.juthilo.com/) and [https://davidburela.wordpress.com/2015/11/28/easily-install-jekyll-on-windows-with-3-command-prompt-entries-and-chocolatey/](https://davidburela.wordpress.com/2015/11/28/easily-install-jekyll-on-windows-with-3-command-prompt-entries-and-chocolatey/) * [https://help.github.com/articles/using-jekyll-with-pages/](https://help.github.com/articles/using-jekyll-with-pages/) -* [Jekyll Style Guide](http://ben.balter.com/jekyll-style-guide/) +* [Jekyll Style Guide](https://ben.balter.com/jekyll-style-guide/) * [Using a custom domain with GitHub Pages hosting](https://help.github.com/en/articles/using-a-custom-domain-with-github-pages) You can purchase a domain (such as my-own-domain.com; average costs run around $10-20/year) and switch your website to using that instead of *username.github.io/repo-name* but still use GitHub Pages' free hosting.
    @@ -653,7 +653,7 @@ If you set up a custom domain with your GitHub Pages-hosted website, to avoid a * [Exitwp](https://github.com/thomasf/exitwp), a Python script developed by Thomas Frössman that Scholars' Lab used to migrate our blog from WordPress to Jekyll *Tools* -* Robust plain text editor options: [Atom](https://atom.io), [Sublime Text](https://www.sublimetext.com/), [Prose](http://prose.io/) content editor (built on Jekyll) +* Robust plain text editor options: [Atom](https://atom.io), [Sublime Text](https://www.sublimetext.com/), [Prose](https://prose.io/) content editor (built on Jekyll) * Project management options: [Trello](https://trello.com/en-US), [GitHub's project boards](https://help.github.com/en/articles/about-project-boards) *Case study links* diff --git a/en/lessons/corpus-analysis-with-antconc.md b/en/lessons/corpus-analysis-with-antconc.md index 2813d5dc1a..c3b928b51f 100755 --- a/en/lessons/corpus-analysis-with-antconc.md +++ b/en/lessons/corpus-analysis-with-antconc.md @@ -47,13 +47,13 @@ You have done this sort of thing before, if you have ever... * Used [Voyant Tools][48] for looking at patterns in one text * Followed [Programming Historian][51]’s Introduction to Python tutorials -In many ways [Voyant](http://voyant-tools.org/) is a gateway into conducting more sophisticated, replicable analysis, as the DIY aesthetic of Python or R scripting may not appeal to everyone. [AntConc](http://www.laurenceanthony.net/software/antconc/) fills this void by being a standalone software package for linguistic analysis of texts, freely available for Windows, Mac OS, and Linux and is highly maintained by its creator, [Laurence Anthony](http://www.laurenceanthony.net/). There are other concordance software packages available, but it is freely available across platforms and very well maintained. See the [concordance bibliography][56] for other resources. +In many ways [Voyant](https://voyant-tools.org/) is a gateway into conducting more sophisticated, replicable analysis, as the DIY aesthetic of Python or R scripting may not appeal to everyone. [AntConc](https://www.laurenceanthony.net/software/antconc/) fills this void by being a standalone software package for linguistic analysis of texts, freely available for Windows, Mac OS, and Linux and is highly maintained by its creator, [Laurence Anthony](https://www.laurenceanthony.net/). There are other concordance software packages available, but it is freely available across platforms and very well maintained. See the [concordance bibliography][56] for other resources. This tutorial explores several different ways to approach a corpus of texts. It's important to note that corpus linguistic approaches are rarely, if ever, a one-size-fits all affair. So, as you go through each step, it's worth thinking about what you're doing and how it can help you answer a specific question with your data. Although I present this tutorial in a building-block approach of 'do this then that to achieve x', it's not always necessary to follow the exact order outlined here. This lessons provides an outline of some of the methods available, rather than a recipe for success. ### Tutorial downloads -1. Software:[AntConc](http://www.laurenceanthony.net/software/antconc/). -Unzip the download if necessary, and launch the application. Screen shots below may vary slightly from the version you have (and by operationg system, of course), but the procedures are more or less the same across platforms and recent versions of AntConc. This tutorial is written with a (much older) version of AntConc in mind, as I find it easier to use in an introductory context. You are welcome to use the most recent version, but if you wish to follow along with the screenshots provided, you can download the version used here, [version 3.2.4](http://www.laurenceanthony.net/software/antconc/releases/AntConc324/). +1. Software:[AntConc](https://www.laurenceanthony.net/software/antconc/). +Unzip the download if necessary, and launch the application. Screen shots below may vary slightly from the version you have (and by operationg system, of course), but the procedures are more or less the same across platforms and recent versions of AntConc. This tutorial is written with a (much older) version of AntConc in mind, as I find it easier to use in an introductory context. You are welcome to use the most recent version, but if you wish to follow along with the screenshots provided, you can download the version used here, [version 3.2.4](https://www.laurenceanthony.net/software/antconc/releases/AntConc324/). 2. Sample Corpus: Download the [zip file of movie reviews](/assets/corpus-analysis-with-antconc/antconc_corpus_files.zip). @@ -86,10 +86,10 @@ Go to your desktop and check to see you can find your text file. Repeating this a lot is how you would build a corpus of plain text files; this process is called _corpus construction_, which very often involves addressing questions of sampling, representativeness and organization. Remember, *each file you want to use in your corpus _must_ be a plain text file for Antconc to use it.* It is customary to name files with the .txt suffix so that you know what kind of file it is. As you might imagine, it can be rather tedious to build up a substantial corpus one file at a time, especially if you intend to process a large set of documents. It is very common, therefore, to use webscraping (using a small program to automatically grab files from the web for you) to construct your corpus. To learn more about the concepts and techniques for webscraping, see the _Programming Historian_ tutorials [scraping with Beautiful Soup][50] and [automatic downloading with wget][51]. -Rather than build a corpus one document at a time, we're going to use a prepared corpus of positive and negative movie reviews, borrowed from the [Natural Language Processing Toolkit](http://www.nltk.org/). The NLTK movie review corpus has 2000 reviews, organized by positive and negative outcomes; today we will be addressing a small subset of them (200 positive, 200 negative). +Rather than build a corpus one document at a time, we're going to use a prepared corpus of positive and negative movie reviews, borrowed from the [Natural Language Processing Toolkit](https://www.nltk.org/). The NLTK movie review corpus has 2000 reviews, organized by positive and negative outcomes; today we will be addressing a small subset of them (200 positive, 200 negative). Corpus construction is a subfield in its own right. Please see [Representativeness in Corpus Design](https://academic.oup.com/dsh/article-abstract/8/4/243/928942)," _Literary and Linguistic Computing_, 8 (4): 243-257 -and [_Developing Linguistic Corpora: a Guide to Good Practice_](http://www.amazon.com/Developing-Linguistic-Corpora-Practice-Guides/dp/1842172050/ref=sr_1_1) for more information. +and [_Developing Linguistic Corpora: a Guide to Good Practice_](https://www.amazon.com/Developing-Linguistic-Corpora-Practice-Guides/dp/1842172050/ref=sr_1_1) for more information. ### Getting Started with AntConc: The AntConc user interface, loading corpora @@ -255,7 +255,7 @@ You can also opt to swap reference corpus & main files (SWAP REF/MAIN FILES) In Keyword List, just hit Start (with nothing typed in the search box). If you've just swapped the reference corpus and the target files, you may be prompted to create a new word list before AntConc will calculate the keywords. We see a list of Keywords that have words that are much more "unusual" – more statistically unexpected – in the corpus we are looking at when compared to the reference corpus. -> Keyness: this is the frequency of a word in the text when compared with its frequency in a reference corpus, "such that the statistical probability as computed by an appropriate procedure is smaller than or equal to a p value specified by the user." – taken from [here][41].) For those interested in the statistical details, see the section on keyness on p7 of Laurence Anthony's [readme file](http://www.laurenceanthony.net/software/antconc/releases/AntConc335/help.pdf). +> Keyness: this is the frequency of a word in the text when compared with its frequency in a reference corpus, "such that the statistical probability as computed by an appropriate procedure is smaller than or equal to a p value specified by the user." – taken from [here][41].) For those interested in the statistical details, see the section on keyness on p7 of Laurence Anthony's [readme file](https://www.laurenceanthony.net/software/antconc/releases/AntConc335/help.pdf). What are our keywords? @@ -298,17 +298,17 @@ In summary: it's worth thinking about: ### Further resources for this tutorial [A short bibliography on corpus linguistics][43]. -[A more step-by-step version of this tutorial, assuming no computer knowledge](http://hfroehli.ch/workshops/getting-started-with-antconc/) +[A more step-by-step version of this tutorial, assuming no computer knowledge](https://hfroehli.ch/workshops/getting-started-with-antconc/) -[41]: http://www.lexically.net/downloads/version6/HTML/index.html?keyness_definition.htm -[43]: http://hfroehlich.wordpress.com/2014/05/11/intro-bibliography-corpus-linguistics/ -[47]: http://hfroehli.ch/workshops/getting-started-with-antconc/ -[48]: http://voyant-tools.org/ +[41]: https://www.lexically.net/downloads/version6/HTML/index.html?keyness_definition.htm +[43]: https://hfroehlich.wordpress.com/2014/05/11/intro-bibliography-corpus-linguistics/ +[47]: https://hfroehli.ch/workshops/getting-started-with-antconc/ +[48]: https://voyant-tools.org/ [50]: /lessons/intro-to-beautiful-soup [51]: /lessons/automated-downloading-with-wget -[52]: http://www.antlab.sci.waseda.ac.jp/ -[53]: http://notepad-plus-plus.org/ -[54]: http://www.barebones.com/products/textwrangler/ -[55]: http://www.wordfrequency.info/free.asp -[56]: http://hfroehli.ch/2014/05/11/intro-bibliography-corpus-linguistics/ +[52]: https://www.antlab.sci.waseda.ac.jp/ +[53]: https://notepad-plus-plus.org/ +[54]: https://www.barebones.com/products/textwrangler/ +[55]: https://www.wordfrequency.info/free.asp +[56]: https://hfroehli.ch/2014/05/11/intro-bibliography-corpus-linguistics/ diff --git a/en/lessons/correspondence-analysis-in-R.md b/en/lessons/correspondence-analysis-in-R.md index 18ef183f7c..165879223a 100755 --- a/en/lessons/correspondence-analysis-in-R.md +++ b/en/lessons/correspondence-analysis-in-R.md @@ -61,7 +61,7 @@ With more data, CA can uncover more subtle distinctions among groups within a pa ## Canadian Parliamentary Committees -In the Canadian Parliamentary system, citizens elect representatives called Members of Parliament, or MPs, to the House of Commons. MPs are responsible for voting on and proposing changes to legislation in Canada. [Parliamentary Committees (CPCs)](http://www.ourcommons.ca/Committees/en/Home) consist of MPs who inform the House about important details of policy in a topic area. Examples of such committees include the CPCs on Finance, Justice and Health. +In the Canadian Parliamentary system, citizens elect representatives called Members of Parliament, or MPs, to the House of Commons. MPs are responsible for voting on and proposing changes to legislation in Canada. [Parliamentary Committees (CPCs)](https://www.ourcommons.ca/Committees/en/Home) consist of MPs who inform the House about important details of policy in a topic area. Examples of such committees include the CPCs on Finance, Justice and Health. We will use abbreviations for the parliamentary committees because the names can get long, making them hard to read on a plot. You can use this table as a reference guide for the abbreviations and their respective committee names: @@ -91,7 +91,7 @@ As a historian, I suspect that MPs are organized according to committee topics d ## Setting Up R for CA -To do a CA, we will need a library that will do linear algebra for us. For the more mathematics inclined, there is an appendix with some of the details about how this is done. In R, there are a number of options for CA, but we will use the [FactoMineR library](http://factominer.free.fr/)[^factominer] a library focussed on "multivariate exploratory data analysis." FactoMineR can be used to conduct all kinds of different multivariate analysis including hierarchical clusters, factor analysis and so on. +To do a CA, we will need a library that will do linear algebra for us. For the more mathematics inclined, there is an appendix with some of the details about how this is done. In R, there are a number of options for CA, but we will use the [FactoMineR library](https://factominer.free.fr/)[^factominer] a library focussed on "multivariate exploratory data analysis." FactoMineR can be used to conduct all kinds of different multivariate analysis including hierarchical clusters, factor analysis and so on. But first, here is how to install and call the libraries, then pop them into an R object for wrangling. @@ -348,7 +348,7 @@ We also learned how to interpret a CA and how to detect potential analytical pit In general, the benefit of this analysis is to provide a quick overview of two-category dataset as a pathfinder to more substantive historical issues. The use of members and meetings or events in all areas of life (business, not-for-profit, municipal meetings, twitter hashtags etc.) is a common approach to such analysis. Social groups and their preferences is another common use for CA. In each case, the visualisation offers a map with which to observe a snapshot of social, cultural and political life. -Next steps may include adding further categorical dimensions to our analysis, such as incorporating political party, age or gender. When you do CA with more than two categories, it is called [Multiple Correspondence Analysis or MCA](http://www.sthda.com/english/wiki/multiple-correspondence-analysis-essentials-interpretation-and-application-to-investigate-the-associations-between-categories-of-multiple-qualitative-variables-r-software-and-data-mining). While the Mathematics for MCA is more complicated, the end results are quite similar to CA. +Next steps may include adding further categorical dimensions to our analysis, such as incorporating political party, age or gender. When you do CA with more than two categories, it is called [Multiple Correspondence Analysis or MCA](https://www.sthda.com/english/wiki/multiple-correspondence-analysis-essentials-interpretation-and-application-to-investigate-the-associations-between-categories-of-multiple-qualitative-variables-r-software-and-data-mining). While the Mathematics for MCA is more complicated, the end results are quite similar to CA. Hopefully, you can now apply these methods to your own data, helping you to uncover questions and hypotheses that enrich your historical research. Good luck! @@ -398,7 +398,7 @@ JUST 0.408 0.000 0.000 The normalisation process does something interesting. Those who are members of multiple committees and/or who belong to committees with many members will tend to have normalisation scores that are lower, suggesting that they are more central to the network. These members will be put closer to the centre of the matrix. For example, the cell belonging to S Ambler and IWFA has the lowest score of 0.192 because S Ambler is a member of three committees and the IWFA committee has nine members in the graph represented. -The next stage is to find the singular value decomposition of this normalised data. This involves fairly complex linear algebra that will not be covered here, but you can learn more from [these Singular Value Decomposition lecture notes](https://math.mit.edu/classes/18.095/2016IAP/lec2/SVD_Notes.pdf) or in more detail from [this pdf file on SVD](http://davetang.org/file/Singular_Value_Decomposition_Tutorial.pdf). I will try to summarize what happens in lay terms. +The next stage is to find the singular value decomposition of this normalised data. This involves fairly complex linear algebra that will not be covered here, but you can learn more from [these Singular Value Decomposition lecture notes](https://math.mit.edu/classes/18.095/2016IAP/lec2/SVD_Notes.pdf) or in more detail from [this pdf file on SVD](https://davetang.org/file/Singular_Value_Decomposition_Tutorial.pdf). I will try to summarize what happens in lay terms. * Two new matrices are created that show "dimension" scores for the rows (committees) and the columns (MPs) based on eigenvectors. * The number of dimensions is equal to the size of the columns or rows minus 1, which ever is smaller. In this case, there are five committees compared to the MPs eleven, so the number of dimensions is 4. @@ -464,7 +464,7 @@ Another important score is visible on the CA graph - the percentage of explanato [^inertia]: In general, inertia in statistics refers to the variation or "spread" of a dataset. It is analogous to standard deviation in distribution data. -[^pickton]: See Laura Kane (April 3, 2017), "Missing and murdered women's inquiry not reaching out to families, say advocates." *CBC News Indigenous*. +[^pickton]: See Laura Kane (April 3, 2017), "Missing and murdered women's inquiry not reaching out to families, say advocates." *CBC News Indigenous*. [^pvalue]: In statistics, a p-value, short for _probability value_, is an indicator of how likely an outcome would have occurred under random circumstances. A low p-value would suggest a low probability that the result would have occurred at random and thus provides some evidence that a null hypothesis (in this case, that the MPs and CPCs are independent categories) is unlikely. diff --git a/en/lessons/counting-frequencies.md b/en/lessons/counting-frequencies.md index 4a7d676f4f..e317c994ba 100755 --- a/en/lessons/counting-frequencies.md +++ b/en/lessons/counting-frequencies.md @@ -434,8 +434,8 @@ file to make sure you have the correct code. - programming-historian-5 ([zip sync][]) - [list comprehension]: http://docs.python.org/tutorial/datastructures.html#list-comprehensions - [computer scientists at Glasgow]: http://ir.dcs.gla.ac.uk/resources/linguistic_utils/stop_words - [Regular Expressions]: https://web.archive.org/web/20180416143856/http://www.diveintopython.net/regular_expressions/index.html + [list comprehension]: https://docs.python.org/tutorial/datastructures.html#list-comprehensions + [computer scientists at Glasgow]: https://ir.dcs.gla.ac.uk/resources/linguistic_utils/stop_words + [Regular Expressions]: https://web.archive.org/web/20180416143856/https://www.diveintopython.net/regular_expressions/index.html [zip]: /assets/python-lessons4.zip [zip sync]: /assets/python-lessons5.zip diff --git a/en/lessons/creating-and-viewing-html-files-with-python.md b/en/lessons/creating-and-viewing-html-files-with-python.md index e4dce5baab..9edec1d86a 100755 --- a/en/lessons/creating-and-viewing-html-files-with-python.md +++ b/en/lessons/creating-and-viewing-html-files-with-python.md @@ -199,7 +199,7 @@ path to the directory on your own computer. - python-lessons6.zip [zip sync] [zip file from the previous lesson]: /assets/python-lessons5.zip - [Zotero]: http://zotero.org - [W3 Schools HTML tutorial]: http://www.w3schools.com/html/default.asp - [doctype declaration]: http://www.w3schools.com/tags/tag_doctype.asp + [Zotero]: https://zotero.org + [W3 Schools HTML tutorial]: https://www.w3schools.com/html/default.asp + [doctype declaration]: https://www.w3schools.com/tags/tag_doctype.asp [zip sync]: /assets/python-lessons6.zip diff --git a/en/lessons/creating-apis-with-python-and-flask.md b/en/lessons/creating-apis-with-python-and-flask.md index c58d83b2a2..52fe461866 100755 --- a/en/lessons/creating-apis-with-python-and-flask.md +++ b/en/lessons/creating-apis-with-python-and-flask.md @@ -113,13 +113,13 @@ The primary focus of this lesson is on creating an API, not exploring or using a Imagine that our research area is sensationalism and the press: has newspaper coverage of major events in the United States become more or less sensational over time? Narrowing the topic, we might ask whether press coverage of, for example, urban fires has increased or decreased with government reporting on fire-related relief spending. -While we won't be able to explore this question thoroughly, we can begin to approach this research space by collecting historical data on newspaper coverage of fires using an API—in this case, the [Chronicling America Historical Newspaper API](http://chroniclingamerica.loc.gov/about/api/). The Chronicling America API allows access to metadata and text for millions of scanned newspaper pages. In addition, unlike many other APIs, it also does not require an authentication process, allowing us to immediately explore the available data without signing up for an account. +While we won't be able to explore this question thoroughly, we can begin to approach this research space by collecting historical data on newspaper coverage of fires using an API—in this case, the [Chronicling America Historical Newspaper API](https://chroniclingamerica.loc.gov/about/api/). The Chronicling America API allows access to metadata and text for millions of scanned newspaper pages. In addition, unlike many other APIs, it also does not require an authentication process, allowing us to immediately explore the available data without signing up for an account. -Our initial goal in approaching this research question is to find all newspaper stories in the Chronicling America database that use the term "fire." Typically, use of an API starts with its documentation. On the [Chronicling America API page](http://chroniclingamerica.loc.gov/about/api/), we find two pieces of information critical for getting the data we want from the API: the API's **base URL** and the **path** corresponding to the function we want to perform on the API—in this case, searching the database. +Our initial goal in approaching this research question is to find all newspaper stories in the Chronicling America database that use the term "fire." Typically, use of an API starts with its documentation. On the [Chronicling America API page](https://chroniclingamerica.loc.gov/about/api/), we find two pieces of information critical for getting the data we want from the API: the API's **base URL** and the **path** corresponding to the function we want to perform on the API—in this case, searching the database. Our base URL is: - http://chroniclingamerica.loc.gov + https://chroniclingamerica.loc.gov All requests we make to the API must begin with this portion of the URL. All APIs have a base URL like this one that is the same across all requests to the API. @@ -129,17 +129,17 @@ Our path is: If we combine the base URL and the path together into one URL, we'll have created a request to the Chronicling America API that returns all available data in the database: - http://chroniclingamerica.loc.gov/search/pages/results/ + https://chroniclingamerica.loc.gov/search/pages/results/ -If you [visit the link above](http://chroniclingamerica.loc.gov/search/pages/results/), you'll see all items available in Chronicling America (12,243,633 at the time of writing), , not just the entries related to our search term, "fire." This request also returns a formatted HTML view, rather than the structured view we want to use to collect data. +If you [visit the link above](https://chroniclingamerica.loc.gov/search/pages/results/), you'll see all items available in Chronicling America (12,243,633 at the time of writing), , not just the entries related to our search term, "fire." This request also returns a formatted HTML view, rather than the structured view we want to use to collect data. According to the Chronicling America documentation, in order to get structured data specifically relating to fire, we need to pass one more kind of data in our request: **query parameters**. - http://chroniclingamerica.loc.gov/search/pages/results/?format=json&proxtext=fire + https://chroniclingamerica.loc.gov/search/pages/results/?format=json&proxtext=fire The query parameters follow the `?` in the request, and are seperated from one another by the `&` symbol. The first query parameter, `format=json`, changes the returned data from HTML to JSON. The second, `proxtext=fire`, narrows the returned entries to those that include our search term. -If you [follow the above link](http://chroniclingamerica.loc.gov/search/pages/results/?format=json&proxtext=fire) in your browser, you'll see a structured list of the items in the database related to the search term "fire." The format of the returned data is called JSON, and is a structured format that looks like this excerpt from the Chronicling America results: +If you [follow the above link](https://chroniclingamerica.loc.gov/search/pages/results/?format=json&proxtext=fire) in your browser, you'll see a structured list of the items in the database related to the search term "fire." The format of the returned data is called JSON, and is a structured format that looks like this excerpt from the Chronicling America results: ```json "city": [ @@ -172,7 +172,7 @@ We'll begin by using Flask to create a home page for our site. In this step, we' ## Creating a Basic Flask Application -[Flask](http://flask.pocoo.org/) is a web framework for Python, meaning that it provides functionality for building web applications, including managing HTTP requests and rendering templates. In this section, we will create a basic Flask application. In later sections, we'll add to this application to create our API. Don't worry if you don't understand each individual line of code yet—explanations will be forthcoming once you have this initial version of the application working. +[Flask](https://flask.pocoo.org/) is a web framework for Python, meaning that it provides functionality for building web applications, including managing HTTP requests and rendering templates. In this section, we will create a basic Flask application. In later sections, we'll add to this application to create our API. Don't worry if you don't understand each individual line of code yet—explanations will be forthcoming once you have this initial version of the application working.

    Why Flask?

    @@ -224,9 +224,9 @@ You can check if you're in the correct folder by running the `pwd` command. Once You should see output similar to this: - * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit) + * Running on `http://127.0.0.1:5000/` (Press CTRL+C to quit) -You may also see some lines related to debugging. This message means that Flask is running your application locally (on your computer) at that address. Follow the link above, [http://127.0.0.1:5000/](http://127.0.0.1:5000/), using your web browser to see the running application: +You may also see some lines related to debugging. This message means that Flask is running your application locally (on your computer) at that address. Follow the link above, `http://127.0.0.1:5000/`, using your web browser to see the running application: {% include figure.html filename="welcome.png" caption="The home page when rendered in a browser." %} @@ -236,7 +236,7 @@ Congratulations, you've created a working web application! Now that we have a homepage for our archive, let's talk about how Flask works and what the above code is doing. -Flask maps HTTP requests to Python functions. In this case, we've mapped one URL path ('`/`') to one function, `home`. When we connect to the Flask server at [http://127.0.0.1:5000/](http://127.0.0.1:5000/), Flask checks if there is a match between the path provided and a defined function. Since `/`, or no additional provided path, has been mapped to the `home` function, Flask runs the code in the function and displays the returned result in the browser. In this case, the returned result is HTML markup for a home page welcoming visitors to the site hosting our API. +Flask maps HTTP requests to Python functions. In this case, we've mapped one URL path ('`/`') to one function, `home`. When we connect to the Flask server at `http://127.0.0.1:5000/`, Flask checks if there is a match between the path provided and a defined function. Since `/`, or no additional provided path, has been mapped to the `home` function, Flask runs the code in the function and displays the returned result in the browser. In this case, the returned result is HTML markup for a home page welcoming visitors to the site hosting our API. The process of mapping URLs to functions is called **routing**. The @@ -333,7 +333,7 @@ app.run() Run the code (navigate to your `api` folder in the command line and enter `python api.py`). Once the server is running, visit our route URL to view the data in the catalog: -[http://127.0.0.1:5000/api/v1/resources/books/all](http://127.0.0.1:5000/api/v1/resources/books/all) +`http://127.0.0.1:5000/api/v1/resources/books/all` You should see JSON output for the three entries in our test catalog. Flask provides us with a `jsonify` function that allows us to convert lists and dictionaries to JSON format. In the route we created, our book entries are converted from a list of Python dictionaries to JSON before being returned to a user. @@ -407,20 +407,20 @@ def api_id(): return jsonify(results) app.run() -`````` +``` Once you've updated your API with the `api_id` function, run your code as before (`python api.py` from your `api` directory) and visit the below URLs to test the new filtering capability: -[127.0.0.1:5000/api/v1/resources/books?id=0](http://127.0.0.1:5000/api/v1/resources/books?id=0) -[127.0.0.1:5000/api/v1/resources/books?id=1](http://127.0.0.1:5000/api/v1/resources/books?id=1) -[127.0.0.1:5000/api/v1/resources/books?id=2](http://127.0.0.1:5000/api/v1/resources/books?id=2) -[127.0.0.1:5000/api/v1/resources/books?id=3](http://127.0.0.1:5000/api/v1/resources/books?id=3) +- `http://127.0.0.1:5000/api/v1/resources/books?id=0` +- `http://127.0.0.1:5000/api/v1/resources/books?id=1` +- `http://127.0.0.1:5000/api/v1/resources/books?id=2` +- `http://127.0.0.1:5000/api/v1/resources/books?id=3` Each of these should return a different entry, except for the last, which should return an empty list: `[]`, since there is no book for which the id value is 3. (Counting in programming typically starts from 0, so id=3 would be a request for a nonexistent fourth item.) In the next section, we'll explore our updated API in more detail. ## Understanding Our Updated API -In this code, we first create a new function, called `api_id`, with the `@app.route` syntax that maps the function to the path `/api/v1/resources/books`. That means that this function will run when we access [http://127.0.0.1:5000/api/v1/resources/books](http://127.0.0.1:5000/api/v1/resources/books). (Note that accessing this link without providing an ID will give the error message we provided in the code: `Error: No id field provided. Please specify an id.`) +In this code, we first create a new function, called `api_id`, with the `@app.route` syntax that maps the function to the path `/api/v1/resources/books`. That means that this function will run when we access `http://127.0.0.1:5000/api/v1/resources/books`. (Note that accessing this link without providing an ID will give the error message we provided in the code: `Error: No id field provided. Please specify an id.`) Inside our function, we do two things: @@ -428,7 +428,7 @@ First, examine the provided URL for an id and select the books that match that i This part of the code determines if there is a query parameter, like `?id=0`, and then assigns the provided ID to a variable. -``` +```python if 'id' in request.args: id = int(request.args['id']) else: @@ -437,7 +437,7 @@ This part of the code determines if there is a query parameter, like `?id=0`, an Then this section moves through our test catalog of books, matches those books that have the provided ID, and appends them to the list that will be returned to the user: -``` +```python for book in books: if book['id'] == id: results.append(book) @@ -459,38 +459,38 @@ The prevailing design philosophy of modern APIs is called REST. For our purposes Because HTTP requests are so integral to using a REST API, many design principles revolve around how requests should be formatted. We've already created one HTTP request, which returns all books provided in our sample data. To understand the considerations that go into formatting this request, let's first consider a weak or poorly-designed example of an API endpoint: - http://api.example.com/getbook/10 + `http://api.example.com/getbook/10` The formatting of this request has a number of issues. The first is semantic—in a REST API, our verbs are typically `GET`, `POST`, `PUT`, or `DELETE`, and are determined by the request method rather than in the request URL. That means that the word "get" should not appear in our request, since "get" is implied by the fact that we're using a HTTP GET method. In addition, resource collections such as `books` or `users` should be denoted with plural nouns. This makes it clear when an API is referring to a collection (`books`) or an entry (`book`). Incorporating these principles, our API would look like this: - http://api.example.com/books/10 + `http://api.example.com/books/10` The above request uses part of the path (`/10`) to provide the ID. While this is not an uncommon approach, it's somewhat inflexible—with URLs constructed in this manner, you can generally only filter by one field at a time. Query parameters allow for filtering by multiple database fields and make more sense when providing "optional" data, such as an output format: - http://api.example.com/books?author=Ursula+K.+Le Guin&published=1969&output=xml + `http://api.example.com/books?author=Ursula+K.+Le Guin&published=1969&output=xml` When designing how requests to your API should be structured, it also makes sense to plan for future additions. Even if the current version of your API serves information on only one type of resource—`books`, for example—it makes sense to plan as if you might add other resources or non-resource functionality to your API in the future: - http://api.example.com/resources/books?id=10 + `http://api.example.com/resources/books?id=10` Adding an extra segment on your path such as "resources" or "entries" gives you the option to allow users to search across all resources available, making it easier for you to later support requests such as these: - https://api.example.com/v1/resources/images?id=10 - https://api.example.com/v1/resources/all + `https://api.example.com/v1/resources/images?id=10` + `https://api.example.com/v1/resources/all` Another way to plan for your API's future is to add a version number to the path. This means that, should you have to redesign your API, you can continue to support the old version of the API under the old version number while releasing, for example, a second version (`v2`) with improved or different functionality. This way, applications and scripts built using the old version of your API won't cease to function after your upgrade. After incorporating these design improvements, a request to our API might look like this: - https://api.example.com/v1/resources/books?id=10 + `https://api.example.com/v1/resources/books?id=10` ## Documentation and Examples Without documentation, even the best-designed API will be unusable. Your API should have documentation describing the resources or functionality available through your API that also provides concrete working examples of request URLs or code for your API. You should have a section for each resource that describes which fields, such as `id` or `title`, it accepts. Each section should have an example in the form of a sample HTTP request or block of code. -A fairly common practice in documenting APIs is to provide annotations in your code that are then automatically collated into documentation using a tool such as [Doxygen](http://www.doxygen.org/) or [Sphinx](http://www.sphinx-doc.org/en/stable/). These tools create documentation from **docstrings**—comments you make on your function definitions. While this kind of documentation is a good idea, you shouldn't consider your job done if you've only documented your API to this level. Instead, try to imagine yourself as a potential user of your API and provide working examples. In an ideal world, you would have three kinds of documentation for your API: a reference that details each route and its behavior, a guide that explains the reference in prose, and at least one or two tutorials that explain every step in detail. +A fairly common practice in documenting APIs is to provide annotations in your code that are then automatically collated into documentation using a tool such as [Doxygen](https://www.doxygen.org/) or [Sphinx](https://www.sphinx-doc.org/en/stable/). These tools create documentation from **docstrings**—comments you make on your function definitions. While this kind of documentation is a good idea, you shouldn't consider your job done if you've only documented your API to this level. Instead, try to imagine yourself as a potential user of your API and provide working examples. In an ideal world, you would have three kinds of documentation for your API: a reference that details each route and its behavior, a guide that explains the reference in prose, and at least one or two tutorials that explain every step in detail. -For inspiration on how to approach API documentation, see the [New York Public Library Digital Collections API](http://api.repo.nypl.org/), which sets a standard of documentation achievable for many academic projects. For an extensively documented (though sometimes overwhelming) API, see the [MediaWiki Action API](https://www.mediawiki.org/wiki/API:Main_page), which provides documentation to users who pass partial queries to the API. (In our example above, we returned an error on a partial query.) For other professionally maintained API documentation examples, consider the [World Bank API](https://datahelpdesk.worldbank.org/knowledgebase/articles/889392-api-documentation), the various [New York Times APIs](https://developer.nytimes.com/), or the [Europeana Pro API](https://pro.europeana.eu/resources/apis). +For inspiration on how to approach API documentation, see the [New York Public Library Digital Collections API](https://api.repo.nypl.org/), which sets a standard of documentation achievable for many academic projects. For an extensively documented (though sometimes overwhelming) API, see the [MediaWiki Action API](https://www.mediawiki.org/wiki/API:Main_page), which provides documentation to users who pass partial queries to the API. (In our example above, we returned an error on a partial query.) For other professionally maintained API documentation examples, consider the [World Bank API](https://datahelpdesk.worldbank.org/knowledgebase/articles/889392-api-documentation), the various [New York Times APIs](https://developer.nytimes.com/), or the [Europeana Pro API](https://pro.europeana.eu/resources/apis). # Connecting Our API to a Database @@ -572,16 +572,19 @@ def api_filter(): app.run() ``` + Save the code as `api_final.py` in your `api` folder and run it by navigating to your project folder in the terminal and entering the command: - python api_final.py +```python +python api_final.py +``` Note that if a previous version of the code is still running, you will first need to end that process by pressing `Control-C` before executing the new code. Once this example is running, try out the filtering functionality with these HTTP requests: -[http://127.0.0.1:5000/api/v1/resources/books/all](http://127.0.0.1:5000/api/v1/resources/books/all) -[http://127.0.0.1:5000/api/v1/resources/books?author=Connie+Willis](http://127.0.0.1:5000/api/v1/resources/books?author=Connie+Willis) -[http://127.0.0.1:5000/api/v1/resources/books?author=Connie+Willis&published=1999](http://127.0.0.1:5000/api/v1/resources/books?author=Connie+Willis&published=1993) -[http://127.0.0.1:5000/api/v1/resources/books?published=2010](http://127.0.0.1:5000/api/v1/resources/books?published=2010) +- `http://127.0.0.1:5000/api/v1/resources/books/all` +- `http://127.0.0.1:5000/api/v1/resources/books?author=Connie+Willis` +- `http://127.0.0.1:5000/api/v1/resources/books?author=Connie+Willis&published=1999` +- `http://127.0.0.1:5000/api/v1/resources/books?published=2010` The database downloaded for this lesson has 67 entries, one for each of the winners of the Hugo Award for best science fiction novel between 1953 and 2014 (avoiding the voting controversy of 2015). The data set includes the novel's title, author, year of publication, and first sentence. Our API allows users to filter by three fields: `id`, `published` (year of publication), and `author`. @@ -621,24 +624,32 @@ In HTML responses, the code `200` means "OK"(the expected data transferred), whi Our `api_filter` function is an improvement on our previous `api_id` function that returns a book based on its ID. This new function allows for filtering by three different fields: `id`, `published`, and `author`. The function first grabs all the query parameters provided in the URL (remember, query parameters are the part of the URL that follows the `?`, like `?id=10`). +```python query_parameters = request.args +``` It then pulls the supported parameters `id`, `published`, and `author` and binds them to appropriate variables: +```python id = query_parameters.get('id') published = query_parameters.get('published') author = query_parameters.get('author') +``` The next segment begins to build an SQL query that will be used to find the requested information in the database. SQL queries used to find data in a database take this form: +```sql `SELECT FROM WHERE AND ; +``` To get the correct data, we need to build both an SQL query that looks like the above and a list with the filters that will be matched. Combined, the query and the the filters provided by the user will allow us to pull the correct books from our database. We begin to define both the query and the filter list: +```python query = "SELECT * FROM books WHERE" to_filter = [] +``` Then, if `id`, `published`, or `author` were provided as query parameters, we add them to both the query and the filter list: @@ -669,15 +680,19 @@ To perfect our query, we remove the trailing ` AND` and cap the query with the ` Finally, we connect to our database as in our `api_all` function, then execute the query we've built using our filter list: +```python conn = sqlite3.connect('books.db') conn.row_factory = dict_factory cur = conn.cursor() results = cur.execute(query, to_filter).fetchall() +``` Finally, we return the results of our executed SQL query as JSON to the user: +```python return jsonify(results) +``` Whew! When all is said and done, this section of code reads query parameters provided by the user, builds an SQL query based on those parameters, executes that query to find matching books in the database, and returns those matches as JSON to the user. This section of code makes our API's filtering capability considerably more sophisticated—users can now find books by, for example, Ursula K. Le Guin that were published in 1975 or all books in the database published in 2010. @@ -699,13 +714,13 @@ The below resources provide information on useful APIs for researchers in the hu ## APIs for Humanities Researchers -[Chronicling America \(Library Of Congress\)](http://chroniclingamerica.loc.gov/) - A digitized collection of American newspaper articles from the 18th to the 20th century. +[Chronicling America \(Library Of Congress\)](https://chroniclingamerica.loc.gov/) - A digitized collection of American newspaper articles from the 18th to the 20th century. [Connecting Repositories \(CORE\)](https://core.ac.uk/) - A collection of open access articles from various sources hosted by the Open University. [English Broadside Ballad Archive \(EBBA\)](https://diggingintodata.org/repositories/english-broadside-ballad-archive-ebba) -[History Data Service (HDS)](http://hds.essex.ac.uk/) - A collection of data from a wide variety of historical sources. +[History Data Service (HDS)](https://hds.essex.ac.uk/) - A collection of data from a wide variety of historical sources. [Europeana](https://pro.europeana.eu/) diff --git a/en/lessons/creating-mobile-augmented-reality-experiences-in-unity.md b/en/lessons/creating-mobile-augmented-reality-experiences-in-unity.md index 34ddf9c8dc..914274fe8a 100644 --- a/en/lessons/creating-mobile-augmented-reality-experiences-in-unity.md +++ b/en/lessons/creating-mobile-augmented-reality-experiences-in-unity.md @@ -52,13 +52,13 @@ In addition to the above software requirements, you will also need to make sure ## How can Humanists use Augmented Reality? -Novel applications of AR continue to surface within a variety of industries: [museums](https://www.youtube.com/watch?v=gx_UQxx54lo) are integrating AR content into their displays, [companies](http://www.gizmag.com/ikea-augmented-reality-catalog-app/28703/) are promoting AR apps in lieu of print or even web-based catalogs, and [engineering firms](https://www.youtube.com/watch?v=bXqe2zSepQ4) are creating AR applications showcasing their efforts to promote sustainability. [Predicted to grow](https://www.statista.com/statistics/786821/ar-device-and-services-revenue-worldwide/) into a multi-billion industry by 2020, augmented reality is an exciting new medium that humanists cannot afford to ignore. Indeed, many scholars within the growing field of digital humanities are beginning to explore how AR can be utilized as a viable medium of scholarly engagement within public spaces, objects, images, and texts. +Novel applications of AR continue to surface within a variety of industries: [museums](https://www.youtube.com/watch?v=gx_UQxx54lo) are integrating AR content into their displays, [companies](https://www.gizmag.com/ikea-augmented-reality-catalog-app/28703/) are promoting AR apps in lieu of print or even web-based catalogs, and [engineering firms](https://www.youtube.com/watch?v=bXqe2zSepQ4) are creating AR applications showcasing their efforts to promote sustainability. [Predicted to grow](https://www.statista.com/statistics/786821/ar-device-and-services-revenue-worldwide/) into a multi-billion industry by 2020, augmented reality is an exciting new medium that humanists cannot afford to ignore. Indeed, many scholars within the growing field of digital humanities are beginning to explore how AR can be utilized as a viable medium of scholarly engagement within public spaces, objects, images, and texts. {% include figure.html filename="ar-dev-1.png" caption="Augmented reality can be used to overlay digital information onto existing texts such as historical markers. This modified image is based on a photograph by Nicholas Henderson. 2015." %} Since at least 2010, [digital artists](https://manifestarblog.wordpress.com/about/) have been creating AR applications for social advocacy and cultural intervention. For example, Tamiko Thiel's AR project [Clouding Green](https://perma.cc/6NLX-AJBH) reveals the carbon footprint of specific technology companies. More recently, a group of New York artists created a ["vandalized" version of Jeff Koon's Snapchat sculptures](https://techcrunch.com/2017/10/08/jeff-koons-augmented-reality-snapchat-artwork-gets-vandalized/) as a way of protesting the digital takeover of public AR spaces. -At the [Trace Initiative](http://web.archive.org/web/20180421163517/http://english.ufl.edu/trace_arcs/), a digital humanities organization in the University of Florida English Department, we seek to build upon the work of these artists by promoting the creation and circulation of humanities-focused mobile AR applications. We released our first AR application [to the Google Play store](https://web.archive.org/web/20210421123810/http://trace-arcs.english.ufl.edu/projects/scramble.html) in spring 2016. +At the [Trace Initiative](https://web.archive.org/web/20180421163517/https://english.ufl.edu/trace_arcs/), a digital humanities organization in the University of Florida English Department, we seek to build upon the work of these artists by promoting the creation and circulation of humanities-focused mobile AR applications. We released our first AR application [to the Google Play store](https://web.archive.org/web/20210421123810/https://trace-arcs.english.ufl.edu/projects/scramble.html) in spring 2016. The augmented reality software used in this tutorial relies on image-recognition technology, meaning that it requires some kind of visual trigger (a logo, painting, etc.) to know when to display digital content. In the example application depicted in the image above, the application is programmed to only display the digital image of John C. Calhoun if the camera "recognizes" the specific historical marker with which it is associated. For this lesson, you will augment the cover of a physical book with a digital overlay that displays a picture of the author. You could use the technical skills gained throughout this tutorial to create digital overlays for a variety of texts such as historical documents or signs. For example, you might create an application that allows readers to scan the pages of a book or document and access historical context or critique related to that specific page. Humanities scholars could also use this tutorial to create site-specific AR applications to educate visitors about cultural aspects of a location that have been excluded from its historical presentation. @@ -86,7 +86,7 @@ Since the release of Unity 2017.2, the Vuforia SDK is integrated into the Unity ### Java Development Kit -Download and install the [Java Development Kit 8](http://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.html) for your operating system. At this time, Unity is incompatible with JDK 10. +Download and install the [Java Development Kit 8](https://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.html) for your operating system. At this time, Unity is incompatible with JDK 10. Click the file once it has finished downloading, and follow the installation guide. @@ -190,7 +190,7 @@ This cover of *Of Mice and Men* has sufficient visual complexity; however, it is {% include figure.html filename="ar-dev-10.png" caption="Photo courtesy of Mark Skwarek." %} -If you are taking a picture of your book cover, make sure that there are no extraneous features present in the image. In the case of the *Of Mice and Men* image above, this would be anything beyond the edge of the cover. If your image contains such extraneous features, either take another picture or open it in a photo editor such as [Gimp](http://www.gimp.org/) and crop out these features. [Consult the latest Gimp documentation](https://www.gimp.org/docs/) for help on cropping and resizing images. Make sure that your image file is under 2.5 mb and that it is a .jpg or .png file. +If you are taking a picture of your book cover, make sure that there are no extraneous features present in the image. In the case of the *Of Mice and Men* image above, this would be anything beyond the edge of the cover. If your image contains such extraneous features, either take another picture or open it in a photo editor such as [Gimp](https://www.gimp.org/) and crop out these features. [Consult the latest Gimp documentation](https://www.gimp.org/docs/) for help on cropping and resizing images. Make sure that your image file is under 2.5 mb and that it is a .jpg or .png file. {% include figure.html filename="ar-dev-11.png" caption="Crop out the area around the book." %} @@ -343,7 +343,7 @@ Return to Unity to setup your application for an Android or iOS build: To install your own applications on your Android device, -1. [Enable USB debugging](http://developer.Android.com/tools/device.html) by going to Setting > About Device. +1. [Enable USB debugging](https://developer.Android.com/tools/device.html) by going to Setting > About Device. 2. Tap the Build number seven times. 3. Return to the previous screen and you should now see a Developer Options tab. Click it and make sure the option for USB debugging is checked. diff --git a/en/lessons/creating-network-diagrams-from-historical-sources.md b/en/lessons/creating-network-diagrams-from-historical-sources.md index 07203f36a6..b0b8a8cf34 100755 --- a/en/lessons/creating-network-diagrams-from-historical-sources.md +++ b/en/lessons/creating-network-diagrams-from-historical-sources.md @@ -29,7 +29,7 @@ doi: 10.46430/phen0044 Introduction ------------ -Network visualizations can help humanities scholars reveal hidden and complex patterns and structures in textual sources. This tutorial explains how to extract network data (people, institutions, places, etc) from historical sources through the use of non-technical methods developed in Qualitative Data Analysis (QDA) and Social Network Analysis (SNA), and how to visualize this data with the platform-independent and particularly easy-to-use [*Palladio*](http://hdlab.stanford.edu/palladio/). +Network visualizations can help humanities scholars reveal hidden and complex patterns and structures in textual sources. This tutorial explains how to extract network data (people, institutions, places, etc) from historical sources through the use of non-technical methods developed in Qualitative Data Analysis (QDA) and Social Network Analysis (SNA), and how to visualize this data with the platform-independent and particularly easy-to-use [*Palladio*](https://hdlab.stanford.edu/palladio/). {% include figure.html caption="Figure 1: A network visualization in Palladio and what you will be able to create by the end of this tutorial." filename="image09.png" %} @@ -39,7 +39,7 @@ The graph above shows an excerpt from the network of Ralph Neumann, particularly Generally, network analysis provides the tools to explore highly complex constellations of relations between entities. Think of your friends: You will find it very easy to map out who are close and who don't get along well. Now imagine you had to explain these various relationships to somebody who does not know any of your friends. Or you wanted to include the relationships between your friends’ friends. In situations like this language and our capacity to comprehend social structures quickly reach their limits. Graph visualizations can be means to effectively communicate and explore such complex constellations. Generally you can think of Social Network Analysis as a means to transform complexity from a problem to an object of research. Often, nodes in a network represent humans connected to other humans by all imaginable types of social relations. But pretty much anything can be understood as a node: A film, a place, a job title, a point in time, a venue. Similarly, the concept of a tie (also called edge) between nodes is just as flexible: two theaters could be connected by a film shown in both of them, or by co-ownership, geographical proximity, or being in business in the same year. All this depends on your research interests and how you express them in form of nodes and relations in a network. -This tutorial can not replace any of the many existing generic network analysis handbooks, such as [John Scott's _Social Network Analysis_](https://uk.sagepub.com/en-gb/eur/the-sage-handbook-of-social-network-analysis/book277881). For a great general introduction to the field and all its pitfalls for humanists I recommend[ ](https://web.archive.org/web/20240203222438/https://www.scottbot.net/HIAL/index.html@p=6279.html)[*Scott Weingart’s blog post series “Networks Demystified”*](https://web.archive.org/web/20240203222438/https://www.scottbot.net/HIAL/index.html@p=6279.html) as well as[ ](http://hal.archives-ouvertes.fr/docs/00/64/93/16/PDF/lemercier_A_zg.pdf)[*Claire Lemercier’s paper “Formal network methods in history: why and how?"*](http://hal.archives-ouvertes.fr/docs/00/64/93/16/PDF/lemercier_A_zg.pdf). You may also want to explore the bibliography and event calendar over at [_Historical Network Research_](http://historicalnetworkresearch.org/) to get a sense of how historians have made use of networks in their research. +This tutorial can not replace any of the many existing generic network analysis handbooks, such as [John Scott's _Social Network Analysis_](https://uk.sagepub.com/en-gb/eur/the-sage-handbook-of-social-network-analysis/book277881). For a great general introduction to the field and all its pitfalls for humanists I recommend[ ](https://web.archive.org/web/20240203222438/https://www.scottbot.net/HIAL/index.html@p=6279.html)[*Scott Weingart’s blog post series “Networks Demystified”*](https://web.archive.org/web/20240203222438/https://www.scottbot.net/HIAL/index.html@p=6279.html) as well as[ ](https://hal.archives-ouvertes.fr/docs/00/64/93/16/PDF/lemercier_A_zg.pdf)[*Claire Lemercier’s paper “Formal network methods in history: why and how?"*](https://hal.archives-ouvertes.fr/docs/00/64/93/16/PDF/lemercier_A_zg.pdf). You may also want to explore the bibliography and event calendar over at [_Historical Network Research_](https://historicalnetworkresearch.org/) to get a sense of how historians have made use of networks in their research. This tutorial will focus on data extraction from unstructured text and shows one way to visualize it using Palladio. It is purposefully designed to be as simple and robust as possible. For the limited scope of this tutorial it will suffice to say that an actor refers to the persons, institutions, etc. which are the object of study and which are connected by relations. Within the context of a network visualization or computation (also called graph), we call them nodes and we call the connections ties. In all cases it is important to remember that nodes and ties are drastically simplified models used to represent the complexities of past events, and in themselves do not always suffice to generate insight. But it is likely that the graph will highlight interesting aspects, challenge your hypothesis and/or lead you to generate new ones. *Network diagrams become meaningful when they are part of a dialogue with data and other sources of information.* @@ -51,7 +51,7 @@ In other words, the challenge is to systematize text interpretation. Networks cr About the case study -------------------- -The case study I use for this tutorial is a first-person narrative of Ralph Neumann, a Jewish survivor of the Holocaust. You can find the text [*online*](http://web.archive.org/web/20180422010025/http://www.gdw-berlin.de/fileadmin/bilder/publ/publikationen_in_englischer_sprache/2006_Neuman_eng.pdf). The coding scheme which I will introduce below is a simplified version of the one I developed during [*my PhD project on covert support networks during the Second World War*](http://martenduering.com/research/covert-networks-during-the-holocaust/). My research was driven by three questions: To what extent can social relationships help explain why ordinary people took the risks associated with helping? How did such relationships enable people to provide these acts of help given that only very limited resources were available to them? How did social relationships help Jewish refugees to survive in the underground? +The case study I use for this tutorial is a first-person narrative of Ralph Neumann, a Jewish survivor of the Holocaust. You can find the text [*online*](https://web.archive.org/web/20180422010025/https://www.gdw-berlin.de/fileadmin/bilder/publ/publikationen_in_englischer_sprache/2006_Neuman_eng.pdf). The coding scheme which I will introduce below is a simplified version of the one I developed during [*my PhD project on covert support networks during the Second World War*](https://martenduering.com/research/covert-networks-during-the-holocaust/). My research was driven by three questions: To what extent can social relationships help explain why ordinary people took the risks associated with helping? How did such relationships enable people to provide these acts of help given that only very limited resources were available to them? How did social relationships help Jewish refugees to survive in the underground? In this project network visualisations helped me to discover hitherto forgotten yet highly important contact brokers, highlight the overall significance of Jewish refugees as contact brokers and generally to navigate through a total of some 5,000 acts of help which connected some 1,400 people between 1942 and 1945. @@ -135,7 +135,7 @@ The following steps will explain how to visualize network data in Palladio but I Step by Step: -**1. Palladio.** Go to [*http://hdlab.stanford.edu/palladio/*](http://hdlab.stanford.edu/palladio/)*.* +**1. Palladio.** Go to [*https://hdlab.stanford.edu/palladio/*](https://hdlab.stanford.edu/palladio/)*.* **2. Start.** On their website click the “Start” button. @@ -157,7 +157,7 @@ Step by Step: {% include figure.html caption="Figure 9: Linking People to Relations." filename="image08.png" %} -**7. Identify temporal data.** Palladio has nice time visualization features. You can use it if you have start and end points for each relation. The sample data contains two columns with suitable data. Click on “Time Step Start” and select the data type “Year or Date”. Do the same for “Time Step End” (Figure 10). The Palladio team recommends that your data is in the YYYY-MM-DD format, but my more abstract time steps worked well. If you were to load geographical coordinates (not covered by this tutorial but here: [*Palladio Simple Map Scenario*](http://hdlab.stanford.edu/doc/scenario-simple-map.pdf)) you would select the “Coordinates” data type. +**7. Identify temporal data.** Palladio has nice time visualization features. You can use it if you have start and end points for each relation. The sample data contains two columns with suitable data. Click on “Time Step Start” and select the data type “Year or Date”. Do the same for “Time Step End” (Figure 10). The Palladio team recommends that your data is in the YYYY-MM-DD format, but my more abstract time steps worked well. If you were to load geographical coordinates (not covered by this tutorial but here: [*Palladio Simple Map Scenario*](https://hdlab.stanford.edu/doc/scenario-simple-map.pdf)) you would select the “Coordinates” data type. {% include figure.html caption="Figure 10: Changing the data type to 'Year or Date'" filename="image05.png"%} @@ -178,7 +178,7 @@ Network visualizations can be incredibly suggestive. Remember that whatever you {% include figure.html caption="Figure 13: The Facet filter in Palladio." filename="image15.png" %} -**12. Bipartite network visualization.** Now this is nice. But there is something else which makes Palladio a great tool to start out with network visualization: It makes it very easy to produce [*bipartite, or 2-mode networks*](http://en.wikipedia.org/wiki/Bipartite_graph#Examples). What you have seen until now is a so-called unipartite or 1-mode network: It represents relations between source and target nodes of one type (for example “people”) through one or more types of relations, Figures 13 and 14 are examples of this type of graph. +**12. Bipartite network visualization.** Now this is nice. But there is something else which makes Palladio a great tool to start out with network visualization: It makes it very easy to produce [*bipartite, or 2-mode networks*](https://en.wikipedia.org/wiki/Bipartite_graph#Examples). What you have seen until now is a so-called unipartite or 1-mode network: It represents relations between source and target nodes of one type (for example “people”) through one or more types of relations, Figures 13 and 14 are examples of this type of graph. Network analysis however gives you a lot of freedom to rethink what source and targets are. Bipartite networks have two different types of nodes, an example could be to select “people” as the first node type and “point in time” as the second. Figure 15 shows a bipartite network and reveals which recipients of help were present in the network at the same time. Compare this graph to Figure 16 which shows which givers of help were present at the same time. This points at a high rate of fluctuation among helpers, an observation which holds true for all of the networks I studied. While humans are very good at processing people-to-people networks, we find it harder to process these more abstract networks. Give it a try and experiment with different bipartite networks: Click again on “Target” but this time select “Form of Help” or “Sex” or any other category. @@ -204,7 +204,7 @@ Note that if you wanted to see "Giver" and "Recipients" as one node type and "Da {% include figure.html caption="Figure 17: Timeline. isualization of Time Steps." filename="image12.png" %} -**15. Node size.** Palladio lets you size your nodes based on actor attributes. Note that this does not make sense for the sample data given that numerical values represent categories. Node sizes can however be useful if you were to represent the sum of a person’s acts of help, which in this case would correspond to his or her [*Out-Degree*](http://en.wikipedia.org/wiki/Directed_graph#Indegree_and_outdegree), the number of outgoing relations for a node. +**15. Node size.** Palladio lets you size your nodes based on actor attributes. Note that this does not make sense for the sample data given that numerical values represent categories. Node sizes can however be useful if you were to represent the sum of a person’s acts of help, which in this case would correspond to his or her [*Out-Degree*](https://en.wikipedia.org/wiki/Directed_graph#Indegree_and_outdegree), the number of outgoing relations for a node. **16. Export your visualizations.** Palladio lets you export your network as .svg files, a vector-based image format. Use your browser of choice to open them. @@ -241,14 +241,14 @@ Good luck! Other network visualization tools to consider --------------------------------------------- -[*Nodegoat*](http://nodegoat.net/) – similar to Palladio in that it makes data collection, mapping and graph visualizations easy. Allows easy setup of relational databases and lets users store data on their servers. [*Tutorial available here*](http://nodegoat.net/cms/UPLOAD/AsmallguidebyYanan11082014.pdf). +[*Nodegoat*](https://nodegoat.net/) – similar to Palladio in that it makes data collection, mapping and graph visualizations easy. Allows easy setup of relational databases and lets users store data on their servers. [*Tutorial available here*](https://nodegoat.net/cms/UPLOAD/AsmallguidebyYanan11082014.pdf). -[*NodeXL*](https://www.smrfoundation.org/nodexl/) – capable to perform many tasks common in SNA, easy-to-use, open source but requires Windows and MS Office 2007 or newer.[ ](https://www.youtube.com/watch?v=pwsImFyc0lE)[*Tutorial 1*](https://www.youtube.com/watch?v=pwsImFyc0lE), [*Tutorial 2*](http://www.youtube.com/watch?v=xKhYGRpbwOc). +[*NodeXL*](https://www.smrfoundation.org/nodexl/) – capable to perform many tasks common in SNA, easy-to-use, open source but requires Windows and MS Office 2007 or newer.[ ](https://www.youtube.com/watch?v=pwsImFyc0lE)[*Tutorial 1*](https://www.youtube.com/watch?v=pwsImFyc0lE), [*Tutorial 2*](https://www.youtube.com/watch?v=xKhYGRpbwOc). -[*Gephi*](https://gephi.github.io/) – open source, platform independent. The best known and most versatile visualization tool available but expect a steep learning curve. The developers announce support for parallel edges in version 1.0. Tutorials: by [*Clement Levallois*](http://www.clementlevallois.net/training.html) and [*Sebastien Heymann*](http://www.youtube.com/watch?v=L6hHv6y5GsQ). +[*Gephi*](https://gephi.github.io/) – open source, platform independent. The best known and most versatile visualization tool available but expect a steep learning curve. The developers announce support for parallel edges in version 1.0. Tutorials: by [*Clement Levallois*](https://www.clementlevallois.net/training.html) and [*Sebastien Heymann*](https://www.youtube.com/watch?v=L6hHv6y5GsQ). [*VennMaker*](https://www.vennmaker.com) – is platform-independent and can be tested for free. VennMaker inverts the process of data collection: Users start with a customizable canvas and draw self-defined nodes and relations on it. The tool collects the corresponding data in the background. -The most commonly used tools for more mathematical analyses are [*UCINET*](https://sites.google.com/site/ucinetsoftware/home) (licensed, tutorials available on their website) and [*Pajek*](http://pajek.imfm.si/doku.php) (free) for which a great [*handbook*](http://www.cambridge.org/us/academic/subjects/sociology/research-methods-sociology-and-criminology/exploratory-social-network-analysis-pajek-2nd-edition) exists. Both were developed for Windows but run well elsewhere using Wine. +The most commonly used tools for more mathematical analyses are [*UCINET*](https://sites.google.com/site/ucinetsoftware/home) (licensed, tutorials available on their website) and [*Pajek*](https://pajek.imfm.si/doku.php) (free) for which a great [*handbook*](https://www.cambridge.org/us/academic/subjects/sociology/research-methods-sociology-and-criminology/exploratory-social-network-analysis-pajek-2nd-edition) exists. Both were developed for Windows but run well elsewhere using Wine. For Python users the very well documented package[ ](https://networkx.github.io/)[*Networkx*](https://networkx.github.io/) is a great starting point; other packages exist for other programming languages. diff --git a/en/lessons/crowdsourced-data-normalization-with-pandas.md b/en/lessons/crowdsourced-data-normalization-with-pandas.md index 2b0d1beefe..c310adfbf0 100644 --- a/en/lessons/crowdsourced-data-normalization-with-pandas.md +++ b/en/lessons/crowdsourced-data-normalization-with-pandas.md @@ -38,7 +38,7 @@ At the end of the lesson you will: This tutorial is for you if you are new to crowdsourcing and have little previous Python experience. ### Why Use Crowdsourcing? -In recent years, crowdsourcing cultural heritage projects such as [Transcribe Bentham](http://transcribe-bentham.ucl.ac.uk/td/Transcribe_Bentham) have made new research possible. In this example, volunteers can create accounts and transcribe the over 60,000 manuscripts of English philosopher, [Jeremy Bentham (1748-1832)](https://en.wikipedia.org/wiki/Jeremy_Bentham). Transcribe Bentham is making these important historical and philosophical manuscripts accessible to researchers, particularly those participating in text analysis. Other projects, such as [Penguin Watch](https://www.zooniverse.org/projects/penguintom79/penguin-watch) on [Zooniverse](https://www.zooniverse.org/), allowed members of the public to classify different images of penguins, which contributed to identifying environmental threats. Zooniverse itself is an online platform for "people-powered research," allowing millions of people worldwide to contribute to different research projects. These are all cases where data is collected and analyzed on a massive scale and public assistance is needed to complete very large projects. +In recent years, crowdsourcing cultural heritage projects such as [Transcribe Bentham](https://transcribe-bentham.ucl.ac.uk/td/Transcribe_Bentham) have made new research possible. In this example, volunteers can create accounts and transcribe the over 60,000 manuscripts of English philosopher, [Jeremy Bentham (1748-1832)](https://en.wikipedia.org/wiki/Jeremy_Bentham). Transcribe Bentham is making these important historical and philosophical manuscripts accessible to researchers, particularly those participating in text analysis. Other projects, such as [Penguin Watch](https://www.zooniverse.org/projects/penguintom79/penguin-watch) on [Zooniverse](https://www.zooniverse.org/), allowed members of the public to classify different images of penguins, which contributed to identifying environmental threats. Zooniverse itself is an online platform for "people-powered research," allowing millions of people worldwide to contribute to different research projects. These are all cases where data is collected and analyzed on a massive scale and public assistance is needed to complete very large projects. Computation and programming methods are very powerful but some jobs are only possible because of human expertise. There are elements of transcription or identification that are not easy to do using programming alone. Humans are better able to identify small differences and unusual data. However, people can also contribute to projects in larger ways, usually by competing in a contest. An example of macrotasking - a type of crowdsourcing for larger, more specialized projects - is the [Netflix Prize](https://www.netflixprize.com/). The Netflix Prize called for people to develop an algorithm to better predict movie recommendations for customers and winners received a reward or prize. @@ -78,7 +78,7 @@ Unfortunately, the What's on the menu? website was retired in January 202 Although the website is no longer live, you can still follow along with this lesson without any adjustments. -No matter how strict your guidelines or your submission protocols, variation will always be present in your crowdsourced data. However, there are ways to identify and normalize data in those cases. The New York Public Library (NYPL) possesses a digitized collection of approximately 45,000 menus, dating from the 1840s to today, and offers a good case study on how to correct some of these unavoidable issues. This collection is made public through [What's on the menu?](http://menus.nypl.org/). Instead of using optical character recognition (OCR) – a way of programmatically reading hand-written or printed documents into machine-searchable text - NYPL crowdsources transcription of the collection. Methods like OCR can save time but do not guarantee accuracy and often require humans to check and correct the output. In addition, the NYPL’s menus include a wide variety of handwritten texts and complex fonts which meant writing a universal code to ensure OCR accuracy was very difficult. Even if a universal code could be developed, the NYPL determined several parts of each menu that could only be identified by the human eye. +No matter how strict your guidelines or your submission protocols, variation will always be present in your crowdsourced data. However, there are ways to identify and normalize data in those cases. The New York Public Library (NYPL) possesses a digitized collection of approximately 45,000 menus, dating from the 1840s to today, and offers a good case study on how to correct some of these unavoidable issues. This collection is made public through [What's on the menu?](https://menus.nypl.org/). Instead of using optical character recognition (OCR) – a way of programmatically reading hand-written or printed documents into machine-searchable text - NYPL crowdsources transcription of the collection. Methods like OCR can save time but do not guarantee accuracy and often require humans to check and correct the output. In addition, the NYPL’s menus include a wide variety of handwritten texts and complex fonts which meant writing a universal code to ensure OCR accuracy was very difficult. Even if a universal code could be developed, the NYPL determined several parts of each menu that could only be identified by the human eye. Generated twice a month and available for public download, *What’s on the menu?* provides access to four distinct related datasets. The dataset we will use in this tutorial lists each menu and includes location and date information (the other datasets are relational and focus on different elements of each menu). This collection details meals over 150 years and shows what and when people ate in the past, adding a new dimension to historical understanding. The datasets curated by *What's on the menu?* include `Dish.csv`, `MenuItem.csv`, `MenuPage.csv`, and `Menu.csv`. @@ -415,7 +415,7 @@ To avoid this problem, require date- or time-based data entry conform to a stand #### Converting Datatype to Date Once in a determined format, pandas has a function that can help with date normalization. If the dates you are working with are in a standardized specific order, you can use the function `to_datetime()`. This will convert the `date` column from an object datatype (meaning that the contents of the column are made up of either text or numeric and non-numeric values) to a datetime (meaning that the contents within the column consist of a specifically formatted date and time values) datatype. Further [documentation](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.to_datetime.html) details how to customize this function based on the unique date formats in your dataset. -This function is powerful but also potentially limiting because the pandas library only recognizes dates within a [given period of time](http://pandas-docs.github.io/pandas-docs-travis/user_guide/timeseries.html#timestamp-limitations). Because of how the datetime timestamps are calculated within the built-in function, pandas can only deal with a time span of approximately 584 years; the minimum date is 1677 and the maximum date is 2262. Dates outside this timeframe will produce an error. If your datasets date from before 1677, the pandas library is not a good option for this conversion. Other ways to approach date data normalization include using [regular expressions](https://www.oreilly.com/library/view/regular-expressions-cookbook/9781449327453/ch04s04.html), however, this involves being able to identify the specific written pattern(s) in which the errors manifest. +This function is powerful but also potentially limiting because the pandas library only recognizes dates within a [given period of time](https://pandas-docs.github.io/pandas-docs-travis/user_guide/timeseries.html#timestamp-limitations). Because of how the datetime timestamps are calculated within the built-in function, pandas can only deal with a time span of approximately 584 years; the minimum date is 1677 and the maximum date is 2262. Dates outside this timeframe will produce an error. If your datasets date from before 1677, the pandas library is not a good option for this conversion. Other ways to approach date data normalization include using [regular expressions](https://www.oreilly.com/library/view/regular-expressions-cookbook/9781449327453/ch04s04.html), however, this involves being able to identify the specific written pattern(s) in which the errors manifest. Because of this limitation, data entry errors related to the date produce an error when the `to_datetime` function is run. Our dataset contains several such errors. An example is entry 13,112, where the date is entered as `0190-03-06`. This is most likely an example of an input error, which is normal in transcription (human error). This error is identified if you enter this code in your Python file and run it to convert the column datatype to date: @@ -527,7 +527,7 @@ replaced_dates.to_csv("NYPL_NormalMenus.csv") ``` ## Conclusion -The process of normalizing your data is rarely straightforward. In ["Against Cleaning"](http://curatingmenus.org/articles/against-cleaning/), authors Katie Rawson and Trevor Muñoz discuss what makes “cleaning” the NYPL menu datasets difficult. For example, there were changes in the spelling of different foods over time as well as differences in how dishes and drinks were referenced, to properly reflect their period. To “clean” that data - to normalize it - would diminish the historical value. In addition, as the authors discovered, it proved complex to distinguish “which variants in the names of dishes revealed new information (they) should account for in (their) own data, and which variants were simply accidents of transcription or typesetting.” Methods typically used to clean data were no longer sufficient. +The process of normalizing your data is rarely straightforward. In ["Against Cleaning"](https://curatingmenus.org/articles/against-cleaning/), authors Katie Rawson and Trevor Muñoz discuss what makes “cleaning” the NYPL menu datasets difficult. For example, there were changes in the spelling of different foods over time as well as differences in how dishes and drinks were referenced, to properly reflect their period. To “clean” that data - to normalize it - would diminish the historical value. In addition, as the authors discovered, it proved complex to distinguish “which variants in the names of dishes revealed new information (they) should account for in (their) own data, and which variants were simply accidents of transcription or typesetting.” Methods typically used to clean data were no longer sufficient. Collecting data through crowdsourced means can be highly efficient, but normalizing humanities data can be complicated. Rawson and Muñoz found that the concept of “data cleaning” was no longer accurate and the process could not be completed using the “usual” methods. Humanities data is unique. It is diverse. It is complex. And, in many cases, historical detail is vital. Many techniques for normalization can be carried out programmatically but computers are unable to interpret unique situations with ease. As noted by Rawson and Muñoz, variability is not always a bad thing; it is not a mess that requires order above all else - it is a complex diversity that needs to be preserved. Data variability cannot be avoided when data is crowdsourced. Ultimately, it is up to you to determine whether common normalization practices are appropriate for your data as well as for your research questions. diff --git a/en/lessons/data-mining-the-internet-archive.md b/en/lessons/data-mining-the-internet-archive.md index f20cd0a901..2b28db1605 100755 --- a/en/lessons/data-mining-the-internet-archive.md +++ b/en/lessons/data-mining-the-internet-archive.md @@ -649,37 +649,37 @@ analyze which subjects are common in the MARC records. Now that you have the MARC records downloaded and can use `pymarc` to extract information from the fields, the possibilities can multiply rapidly! - [Internet Archive]: http://archive.org/ + [Internet Archive]: https://archive.org/ [early JSTOR journal content]: https://archive.org/details/jstor_ejc [John Adams's personal library]: https://archive.org/details/johnadamsBPL [Haiti collection]: https://archive.org/details/jcbhaiti - [Ian Milligan]: http://activehistory.ca/2013/09/the-internet-archive-rocks-or-two-million-plus-free-sources-to-explore/ - [Anti-Slavery Collection]: http://archive.org/details/bplscas + [Ian Milligan]: https://activehistory.ca/2013/09/the-internet-archive-rocks-or-two-million-plus-free-sources-to-explore/ + [Anti-Slavery Collection]: https://archive.org/details/bplscas [internetarchive]: https://pypi.python.org/pypi/internetarchive [pymarc]: https://pypi.python.org/pypi/pymarc/ - [this letter]: http://archive.org/details/lettertowilliaml00doug - [original manuscript]: http://archive.org/stream/lettertowilliaml00doug/39999066767938#page/n0/mode/2up - [multiple files]: http://archive.org/download/lettertowilliaml00doug - [Dublin Core]: http://archive.org/download/lettertowilliaml00doug/lettertowilliaml00doug_dc.xml - [MARCXML]: http://archive.org/download/lettertowilliaml00doug/lettertowilliaml00doug_marc.xml - [Library of Congress's MARC 21 Format for Bibliographic Data]: http://www.loc.gov/marc/bibliographic/ - [thousands of antislavery letters, manuscripts, and publications]: http://archive.org/search.php?query=collection%3Abplscas&sort=-publicdate + [this letter]: https://archive.org/details/lettertowilliaml00doug + [original manuscript]: https://archive.org/stream/lettertowilliaml00doug/39999066767938#page/n0/mode/2up + [multiple files]: https://archive.org/download/lettertowilliaml00doug + [Dublin Core]: https://archive.org/download/lettertowilliaml00doug/lettertowilliaml00doug_dc.xml + [MARCXML]: https://archive.org/download/lettertowilliaml00doug/lettertowilliaml00doug_marc.xml + [Library of Congress's MARC 21 Format for Bibliographic Data]: https://www.loc.gov/marc/bibliographic/ + [thousands of antislavery letters, manuscripts, and publications]: https://archive.org/search.php?query=collection%3Abplscas&sort=-publicdate [eBook and Texts]: https://archive.org/details/texts - [the way that items and item URLs are structured]: http://blog.archive.org/2011/03/31/how-archive-org-items-are-structured/ + [the way that items and item URLs are structured]: https://blog.archive.org/2011/03/31/how-archive-org-items-are-structured/ [advanced search]: https://archive.org/advancedsearch.php [this page]: https://archive.org/search.php?query=collection%3A%28bplscas%29 - [search the Archive using the Python module that we installed]: http://internetarchive.readthedocs.io/en/latest/quickstart.html#searching - [the advanced search for the collection]: http://archive.org/search.php?query=collection%3Abplscas - [downloading]: http://internetarchive.readthedocs.io/en/latest/quickstart.html#downloading + [search the Archive using the Python module that we installed]: https://internetarchive.readthedocs.io/en/latest/quickstart.html#searching + [the advanced search for the collection]: https://archive.org/search.php?query=collection%3Abplscas + [downloading]: https://internetarchive.readthedocs.io/en/latest/quickstart.html#downloading [remember those?]: /lessons/code-reuse-and-modularity [item files are named according to specific rules]: https://archive.org/about/faqs.php#140 - [handling exceptions]: http://docs.python.org/2/tutorial/errors.html#handling-exceptions - [rules specified for the 260 datafield]: http://www.loc.gov/marc/bibliographic/bd260.html - [MARC standards]: http://www.loc.gov/marc/ + [handling exceptions]: https://docs.python.org/2/tutorial/errors.html#handling-exceptions + [rules specified for the 260 datafield]: https://www.loc.gov/marc/bibliographic/bd260.html + [MARC standards]: https://www.loc.gov/marc/ [1]: https://github.com/edsu/pymarc [functions that it provides for working with MARC XML records]: https://github.com/edsu/pymarc/blob/master/pymarc/marcxml.py [Counting Frequencies]: /lessons/counting-frequencies [Google Maps lesson]: /lessons/googlemaps-googleearth - [Wordle word cloud]: https://web.archive.org/web/20201202151557/http://www.wordle.net/ + [Wordle word cloud]: https://web.archive.org/web/20201202151557/https://www.wordle.net/ [cleaning of your data]: /lessons/cleaning-ocrd-text-with-regular-expressions [Installing Python Modules with pip]: /lessons/installing-python-modules-pip diff --git a/en/lessons/data-wrangling-and-management-in-r.md b/en/lessons/data-wrangling-and-management-in-r.md index 5c72ce12ac..3d0bddb9bd 100755 --- a/en/lessons/data-wrangling-and-management-in-r.md +++ b/en/lessons/data-wrangling-and-management-in-r.md @@ -1,608 +1,608 @@ ---- -title: Data Wrangling and Management in R -slug: data-wrangling-and-management-in-r -layout: lesson -collection: lessons -authors: -- Nabeel Siddiqui -date: 2017-07-31 -reviewers: -- Lauren Tilton -- Ryan Deschamps -editors: -- Ian Milligan -review-ticket: https://github.com/programminghistorian/ph-submissions/issues/60 -difficulty: 2 -activity: transforming -topics: [data-manipulation, data-management, distant-reading, r, data-visualization] -abstract: "This tutorial explores how scholars can organize 'tidy' data, understand R packages to manipulate data, and conduct basic data analysis." -avatar_alt: Bar of soap -doi: 10.46430/phen0063 ---- - -{% include toc.html %} - - - - - -Assumptions -=========== - -This lesson makes a few assumptions about your understanding of R. If -you have not completed the [R Basics with Tabular -Data](/lessons/r-basics-with-tabular-data) -lesson, I suggest you complete that first. Having a background in -another programming language will also be beneficial. If you need a -place to start, I recommend working through the *Programming -Historian's* excellent Python tutorials. - -Lesson Goals -============ - -By the end of this lesson, you will: - -1. Know how to organize data to be "tidy" and why this is important. -2. Understand the dplyr package and use it to manipulate and wrangle - with data. -3. Become acquainted with the pipe operator in R and observe how it can - assist you in creating more readable code. -4. Learn to work through some basic examples of data manipulation to - gain a foundation in exploratory data analysis. - -Introduction -============ - -Data you find "in the wild" will rarely be in a format necessary for -analysis, and you will need to manipulate it before exploring the -questions you are interested in. This may take more time than doing the -analysis itself! In this tutorial, we will learn some basic techniques -for manipulating, managing, and wrangling with our data in R. -Specifically, we will rely on the philosophy of ["tidy -data"](https://www.jstatsoft.org/article/view/v059i10) as articulated by -Hadley Wickham. - -According to [Wickham](http://hadley.nz/), data is "tidy" when it meets -three key criteria: - -1. Each observation is in a row. -2. Each variable is in a column. -3. Each value has its own cell. - -Being observant of these criteria allows us to recognize when data is -organized or unorganized. It also provides us a standardized schema and -set of tools for cleaning up some of the most common ways that datasets -are "messy:" - -1. Column headers are values, not variable names. -2. Multiple variables are stored in one column. -3. Variables are stored in both rows and columns. -4. Multiple types of observational units are stored in the same table. -5. A single observational unit is stored in multiple tables. - -Perhaps most importantly, keeping our data in this format allows us to -use a collection of packages in the -["tidyverse,"](http://tidyverse.org/) which are designed to specifically -work with tidy data. By making sure that our input and output are tidy, -we only have to use a small set of tools to solve a large number of -questions. In addition, we can combine, manipulate, and split tidy -datasets as we see fit. - -In this tutorial, we will be focusing on the -[dplyr](https://cran.r-project.org/web/packages/dplyr/index.html) -package of the tidyverse, but it is worth briefly mentioning some others -we will be running into: - -[**magittr**](http://magrittr.tidyverse.org)--This package gives us -access to the forward pipe operator and makes our code easier to read. -[**ggplot2**](http://ggplot2.tidyverse.org/)--This package utilizes the -["Grammar of Graphics"](http://www.springer.com/us/book/9780387245447) -to provide an easy way to visualize our data. -[**readr**](http://readr.tidyverse.org)--This package makes available a -faster and more streamlined method of importing rectangular data, such -as csv files. -[**tibble**](http://tibble.tidyverse.org/)--This package provides us -access to a reconceptualization of data frames that are easier to work -with and print. - -If you have not already done so, you should install and load the -"tidyverse" before beginning. In addition, make sure that you have the -[latest version of R](https://cran.rstudio.com/) and the [latest version -of R Studio](https://www.rstudio.com/products/rstudio/download/) -installed for your respective platform. - -Copy the following code into RStudio. To run it, you need to highlight -the lines and press Ctrl+Enter (Command+Enter on Mac OS): - - # Install tidyverse libraries and load it - # Do not worry if this takes a while - - install.packages("tidyverse") - library(tidyverse) - -An Example of dplyr in Action -============================= - -Let's go through an example to see how dplyr can aid us as historians by -inputting U.S. decennial census data from 1790 to 2010. Download the -data by [clicking -here](/assets/data-wrangling-and-management-in-r/introductory_state_example.csv) -and place it in the folder that you will use to work through the examples -in this tutorial. - -Since the data is in a csv file, we are going to use the read\_csv() -command in tidyverse's -[readr](https://cran.r-project.org/web/packages/readr/vignettes/readr.html) -package. - -The read\_csv function takes the path of a file we want to import from -as a variable so make sure that you have it set up correctly. - - # Import CSV File and save to us_state_populations_import - # Make sure you set the path of the file correctly - us_state_populations_import<-read_csv("introductory_state_example.csv") - -After you import the data, you will notice that there are three columns: -one for the population, one for the year, and one for the state. This -data is already in a tidy format providing us a multitude of options for -further exploration. - -For this example, let's visualize the population growth of California and -New York to gain a better understanding of Western migration. We will -use dplyr to filter our data so that it only contains information about -the states we are interested in, and we will use ggplot2 to visualize -this information. This exercise is just to provide you a taste of what -dplyr can do, so don't worry if you don't understand the code at this -time. - - # Filter to California and New York states only - california_and_new_york_state_populations<-us_state_populations_import %>% - filter(state %in% c("California", "New York")) - - # Plot California and New York State Populations - ggplot(data=california_and_new_york_state_populations, aes(x=year, y=population, color=state)) + - geom_line() + - geom_point() - -{% include figure.html filename="en-or-data-wrangling-and-management-in-r-01.png" caption="Graph of California and New York population" %} - -As we can see, the population of California has grown considerably -compared to New York. While this particular example may seem obvious -given the history of U.S. migration, the code itself provides us a -foundation that we can build on to ask a multitude of similar questions. -For instance, with a quick change of code, we can create a similar graph -with two different states such as Mississippi and Virginia. - - # Filter to Mississippi and Virginia - mississippi_and_virginia_state_populations<-us_state_populations_import %>% - filter(state %in% c("Mississippi", "Virginia")) - - # Plot California and New York State Populations - ggplot(data=mississippi_and_virginia_state_populations, aes(x=year, y=population, color=state)) + - geom_line() + - geom_point() - -{% include figure.html filename="en-or-data-wrangling-and-management-in-r-02.png" caption="Graph of Mississippi and Virginia population" %} - -Quickly making changes to our code and reanalyzing our data is a -fundamental part of exploratory data analysis (EDA). Rather than trying -to "prove" a hypothesis, exploratory data analysis helps us understand -our data better and ask questions about it. For historians, EDA provides -an easy means of knowing when to dig deeper into a subject and when to -step back, and it is an area where R excels. - -Pipe Operator -============= - -Before looking at dplyr, we need to go over the pipe operator (%>%) -in R since we will often run into it in our examples. As mentioned -earlier, the pipe operator is part of the -[magrittr](https://cran.r-project.org/web/packages/magrittr/vignettes/magrittr.html) -package created by [Stefan Milton Bache](http://stefanbache.dk/) and -[Hadley Wickham](http://hadley.nz/) and is included in the tidyverse. -Its name is an homage to surrealest painter Rene Magritte, whose "The -Treachery of Images" famously depicted a pipe with the words "this is -not a pipe" underneath in French. - -The pipe operator allows you to pass what is to the left of the pipe as -the first variable in a function specified on the right. Although it may -seem strange at first, once you learn it, you will find that it makes -your code more readable by avoiding nested statements. Don't worry if -all this is a little confusing right now. It will become more clear as -we go through the examples. - -Let's say that we are interested in getting the square root of each -population value and then summing all the square roots before getting -the mean. Obviously, this isn't a useful measurement, but it demonstrates -just how quickly R code can become difficult to read. Normally, we would -nest such statements: - - mean(sum(sqrt(us_state_populations_import$population))) - - ## [1] 1256925 - -As you can see, with enough nested commands, it is hard to remember how -many parenthesis you need and makes the code awkward to read. To mitigate -this, some people may create temporary vectors in between each function -call. - - # Get square root of all the state populations - - sqrt_state_populations_vector<-sqrt(us_state_populations_import$population) - - # Get sum of all the sqrts of the temporary variable - - sum_sqrt_state_populations_vector<-sum(sqrt_state_populations_vector) - - # Get mean of the temporary variable - - mean_sum_sqrt_state_populations_vector<-mean(sum_sqrt_state_populations_vector) - - # Display the mean - - mean_sum_sqrt_state_populations_vector - - ## [1] 1256925 - -Although you get the same answer, this is a lot more readable. However, -it can quickly clutter your workspace if you forget to delete the -temporary vectors. The pipe operator does all this for you. Here is the -same code using the pipe operator. - - us_state_populations_import$population%>%sqrt%>%sum%>%mean - - ## [1] 1256925 - -This is a lot easier to read, and you could make it even more clear by -writing this on multiple lines. - - # Make sure to put the operator at the end of the line - us_state_populations_import$population%>% - sqrt%>% - sum%>% - mean - - ## [1] 1256925 - -Please note that the vectors or data frames that the pipe operator -creates are discarded after the operation is complete. If you want to -store them, you should pass them to a new variable. - - permanent_sqrt_and_sum_state_populations_vector <- us_state_populations_import$population%>%sqrt%>%sum%>%mean - permanent_sqrt_and_sum_state_populations_vector - - ## [1] 1256925 - -We Need a New Dataset -===================== - -Now that we have an understanding of the pipe operator, we are ready to -begin looking at and wrangling with some data. Unfortunately, for -historians, there are only a few easily available datasets--perhaps you -can help change this by making yours available to the public! We are -going to rely on the [history -data](https://www.google.com/search?q=cran%20historydata) package -created by [Lincoln Mullen](http://lincolnmullen.com/). - -Lets go ahead and install and load the package: - - # Install historydata package - install.packages("historydata") - - # Load historydata package - library(historydata) - -This packages contains samples of historical datasets--the earlier U.S. -Census data sample was taken from this package. Throughout this -tutorial, we are specifically going to work with the early\_colleges -dataset that contains data about colleges founded before 1848. Lets -start by loading the data and view it. - - # Make sure you have installed the historydata package and loaded it before this - - data(early_colleges) - early_colleges - - ## # A tibble: 65 × 6 - ## college original_name city state established sponsorship - ## - ## 1 Harvard Cambridge MA 1636 Congregational; after 1805 Unitarian - ## 2 William and Mary Williamsburg VA 1693 Anglican - ## 3 Yale New Haven CT 1701 Congregational - ## 4 Pennsylvania, Univ. of Philadelphia PA 1740 Nondenominational - ## 5 Princeton College of New Jersey Princeton NJ 1746 Presbyterian - ## 6 Columbia King's College New York NY 1754 Anglican - ## 7 Brown Providence RI 1765 Baptist - ## 8 Rutgers Queen's College New Brunswick NJ 1766 Dutch Reformed - ## 9 Dartmouth Hanover NH 1769 Congregational - ## 10 Charleston, Coll. Of Charleston SC 1770 Anglican - ## # ... with 55 more rows - -As you can observe, this dataset contains the current name of the college, its -original name, the city and state where it was founded, when the college was -established, and its sponsorship. As we discussed earlier, before we can -work with a dataset, it is important to think about how to organize the -data. Let's see if any of our data is not in a "tidy" format. Do you see -any cells that do not match the three criteria for tidy data? - -If you guessed the sponsorship of Harvard, you are correct. In addition -to noting the original sponsorship, it also mentions that it changed -sponsorship in 1805. Usually, you want to keep as much information about -your data that you can, but for the purposes of this tutorial, we are -going to change the column to only have the original sponsorship. - - early_colleges[1,6] <- "Congregational" - early_colleges - - ## # A tibble: 65 × 6 - ## college original_name city state established sponsorship - ## - ## 1 Harvard Cambridge MA 1636 Congregational - ## 2 William and Mary Williamsburg VA 1693 Anglican - ## 3 Yale New Haven CT 1701 Congregational - ## 4 Pennsylvania, Univ. of Philadelphia PA 1740 Nondenominational - ## 5 Princeton College of New Jersey Princeton NJ 1746 Presbyterian - ## 6 Columbia King's College New York NY 1754 Anglican - ## 7 Brown Providence RI 1765 Baptist - ## 8 Rutgers Queen's College New Brunswick NJ 1766 Dutch Reformed - ## 9 Dartmouth Hanover NH 1769 Congregational - ## 10 Charleston, Coll. Of Charleston SC 1770 Anglican - ## # ... with 55 more rows - -Now that we have our data in a tidy format, we can shape it through the -dplyr package. - -What is Dplyr? -============== - -[Dplyr](https://cran.rstudio.com/web/packages/dplyr/vignettes/dplyr.html) -is another part of the tidyverse that provides functions for -manipulating and transforming your data. Because we are keeping our data -"tidy," we only need a small set of tools to explore our data. Compared -to base R, using dplyr is often faster, and guarantees that if our -input is tidy then our output will also be tidy. Perhaps most importantly, -dplyr makes our code easier to read and utilizes "verbs" that are, in -most cases, intuitive. Each function in dplyr corresponds to these verbs, -with the five key ones being filter, select, arrange, mutate, and -summarise--dplyr uses the British spelling. Let's go through each of them -individually to see how they work in practice. - -### Select - -If we look at the early\_colleges data, we can observe that there are a -lot of NA's in the original names column. NA signifies that the data is -not available, and we may want to view our data with this column -removed. dplyr's select() function gives us the ability to do this. It -takes the data frame you want to manipulate as the first argument, followed by a -list signifying which columns you would like to keep: - - # Remove the original names column using select() - # Note that you do not have to append the column name with a $ to the end of early_colleges since - # dplyr automatically assumes that a "," represents AND - - select(early_colleges, college, city, state, established, sponsorship) - - ## # A tibble: 65 × 5 - ## college city state established sponsorship - ## - ## 1 Harvard Cambridge MA 1636 congregational - ## 2 William and Mary Williamsburg VA 1693 Anglican - ## 3 Yale New Haven CT 1701 Congregational - ## 4 Pennsylvania, Univ. of Philadelphia PA 1740 Nondenominational - ## 5 Princeton Princeton NJ 1746 Presbyterian - ## 6 Columbia New York NY 1754 Anglican - ## 7 Brown Providence RI 1765 Baptist - ## 8 Rutgers New Brunswick NJ 1766 Dutch Reformed - ## 9 Dartmouth Hanover NH 1769 Congregational - ## 10 Charleston, Coll. Of Charleston SC 1770 Anglican - ## # ℹ 55 more rows - -Let's also go ahead and see how to write this using the pipe operator -(%>%): - - early_colleges%>% - select(college, city, state, established, sponsorship) - - ## # A tibble: 65 × 5 - ## college city state established sponsorship - ## - ## 1 Harvard Cambridge MA 1636 congregational - ## 2 William and Mary Williamsburg VA 1693 Anglican - ## 3 Yale New Haven CT 1701 Congregational - ## 4 Pennsylvania, Univ. of Philadelphia PA 1740 Nondenominational - ## 5 Princeton Princeton NJ 1746 Presbyterian - ## 6 Columbia New York NY 1754 Anglican - ## 7 Brown Providence RI 1765 Baptist - ## 8 Rutgers New Brunswick NJ 1766 Dutch Reformed - ## 9 Dartmouth Hanover NH 1769 Congregational - ## 10 Charleston, Coll. Of Charleston SC 1770 Anglican - ## # ℹ 55 more rows - -Referencing each of the columns that we want to keep just to get rid of -one is a little tedous. We can use the minus symbol (-) to demonstrate -that we want to remove a column. - - early_colleges%>% - select(-original_name) - - ## # A tibble: 65 × 5 - ## college city state established sponsorship - ## - ## 1 Harvard Cambridge MA 1636 congregational - ## 2 William and Mary Williamsburg VA 1693 Anglican - ## 3 Yale New Haven CT 1701 Congregational - ## 4 Pennsylvania, Univ. of Philadelphia PA 1740 Nondenominational - ## 5 Princeton Princeton NJ 1746 Presbyterian - ## 6 Columbia New York NY 1754 Anglican - ## 7 Brown Providence RI 1765 Baptist - ## 8 Rutgers New Brunswick NJ 1766 Dutch Reformed - ## 9 Dartmouth Hanover NH 1769 Congregational - ## 10 Charleston, Coll. Of Charleston SC 1770 Anglican - ## # ℹ 55 more rows - -### Filter - -The filter() function does the same thing as the select function but -rather than choosing the column name, we can use it to filter rows using -a test requirement. For instance, we can view all the colleges that -existed before the turn of the century. - - early_colleges%>% - filter(established < 1800) - - ## # A tibble: 20 × 6 - ## college original_name city state established sponsorship - ## - ## 1 Harvard Cambridge MA 1636 Congregational - ## 2 William and Mary Williamsburg VA 1693 Anglican - ## 3 Yale New Haven CT 1701 Congregational - ## 4 Pennsylvania, Univ. of Philadelphia PA 1740 Nondenominational - ## 5 Princeton College of New Jersey Princeton NJ 1746 Presbyterian - ## 6 Columbia King's College New York NY 1754 Anglican - ## 7 Brown Providence RI 1765 Baptist - ## 8 Rutgers Queen's College New Brunswick NJ 1766 Dutch Reformed - ## 9 Dartmouth Hanover NH 1769 Congregational - ## 10 Charleston, Coll. Of Charleston SC 1770 Anglican - ## 11 Hampden-Sydney Hampden-Sydney VA 1775 Presbyterian - ## 12 Transylvania Lexington KY 1780 Disciples of Christ - ## 13 Georgia, Univ. of Athens GA 1785 Secular - ## 14 Georgetown Washington DC 1789 Roman Catholic - ## 15 North Carolina, Univ. of Chapel Hill NC 1789 Secular - ## 16 Vermont, Univ. of Burlington VT 1791 Nondenominational - ## 17 Williams Williamstown MA 1793 Congregational - ## 18 Tennessee, Univ. of Blount College Knoxville TN 1794 Secular - ## 19 Union College Schenectady NY 1795 Presbyterian with Congregational - ## 20 Marietta Marietta OH 1797 Congregational - -### Mutate - -The mutate command allows you to add a column to your data frame. Right -now, we have the city and state in two separate columns. We can use the -paste command to combine two strings and specify a seperator. Let's place -them in a single column called "location." - - early_colleges%>%mutate(location=paste(city,state,sep=",")) - - ## # A tibble: 65 × 7 - ## college original_name city state established sponsorship location - ## - ## 1 Harvard Cambridge MA 1636 Congregational Cambridge,MA - ## 2 William and Mary Williamsburg VA 1693 Anglican Williamsburg,VA - ## 3 Yale New Haven CT 1701 Congregational New Haven,CT - ## 4 Pennsylvania, Univ. of Philadelphia PA 1740 Nondenominational Philadelphia,PA - ## 5 Princeton College of New Jersey Princeton NJ 1746 Presbyterian Princeton,NJ - ## 6 Columbia King's College New York NY 1754 Anglican New York,NY - ## 7 Brown Providence RI 1765 Baptist Providence,RI - ## 8 Rutgers Queen's College New Brunswick NJ 1766 Dutch Reformed New Brunswick,NJ - ## 9 Dartmouth Hanover NH 1769 Congregational Hanover,NH - ## 10 Charleston, Coll. Of Charleston SC 1770 Anglican Charleston,SC - ## # ... with 55 more rows - -Again, you need to remember that dplyr does not save the data or -manipulate the original. Instead, it creates a temporary data frame at -each step. If you want to keep it, you need to create a permanent -variable. - - early_colleges_with_location <- early_colleges%>% - mutate(location=paste(city, state, sep=",")) - - # View the new tibble with the location added - early_colleges_with_location - - ## # A tibble: 65 × 7 - ## college original_name city state established sponsorship location - ## - ## 1 Harvard Cambridge MA 1636 Congregational Cambridge,MA - ## 2 William and Mary Williamsburg VA 1693 Anglican Williamsburg,VA - ## 3 Yale New Haven CT 1701 Congregational New Haven,CT - ## 4 Pennsylvania, Univ. of Philadelphia PA 1740 Nondenominational Philadelphia,PA - ## 5 Princeton College of New Jersey Princeton NJ 1746 Presbyterian Princeton,NJ - ## 6 Columbia King's College New York NY 1754 Anglican New York,NY - ## 7 Brown Providence RI 1765 Baptist Providence,RI - ## 8 Rutgers Queen's College New Brunswick NJ 1766 Dutch Reformed New Brunswick,NJ - ## 9 Dartmouth Hanover NH 1769 Congregational Hanover,NH - ## 10 Charleston, Coll. Of Charleston SC 1770 Anglican Charleston,SC - ## # ... with 55 more rows - -### Arrange - -The arrange() function allows us to order our columns in a new way. -Currently, the colleges are organized by year in ascending order. Lets -place them in descending order of establishment, in this case, from the end of the Mexican-American War. - - early_colleges %>% - arrange(desc(established)) - - ## # A tibble: 65 × 6 - ## college original_name city state established sponsorship - ## - ## 1 Wisconsin, Univ. of Madison WI 1848 Secular - ## 2 Earlham Richmond IN 1847 Quaker - ## 3 Beloit Beloit WI 1846 Congregational - ## 4 Bucknell Lewisburg PA 1846 Baptist - ## 5 Grinnell Grinnell IA 1846 Congregational - ## 6 Mount Union Alliance OH 1846 Methodist - ## 7 Louisiana, Univ. of New Orleans LA 1845 Secular - ## 8 U.S. Naval Academy Annapolis MD 1845 Secular - ## 9 Mississipps, Univ. of Oxford MI 1844 Secular - ## 10 Holy Cross Worchester MA 1843 Roman Catholic - ## # ... with 55 more rows - -### Summarise - -The last key function in dplyr is summarise()--note the British -spelling. Summarise() takes a function or operation, and is usually used -to create a data frame that contains summary statistics for plotting. We -will use it to calculate the average year that colleges before 1848 were -founded. - - early_colleges%>%summarise(mean(established)) - - ## # A tibble: 1 x 1 - ## `mean(established)` - ## - ## 1 1809.831 - -Putting it All Together -======================= - -Now that we have gone through the five main verbs for dplyr, we can use -them to create a quick visualization of our data. Let's go ahead and -create a bar graph showing the number of secular and non-secular -colleges founded before the U.S. War of 1812: - - secular_colleges_before_1812<-early_colleges%>% - filter(established < 1812)%>% - mutate(is_secular=ifelse(sponsorship!="Secular", "no", "yes")) - - ggplot(secular_colleges_before_1812) + - geom_bar(aes(x=is_secular, fill=is_secular))+ - labs(x="Is the college secular?") - -{% include figure.html filename="en-or-data-wrangling-and-management-in-r-03.png" caption="Number of secular and non-secular colleges before War of 1812" %} - -Again, by making a quick change to our code, we can also look at the -number of secular versus non-secular colleges founded after the start of -the War of 1812: - - secular_colleges_after_1812<-early_colleges%>% - filter(established > 1812)%>% - mutate(is_secular=ifelse(sponsorship!="Secular", "no", "yes")) - - ggplot(secular_colleges_after_1812) + - geom_bar(aes(x=is_secular, fill=is_secular))+ - labs(x="Is the college secular?") - -({% include figure.html filename="en-or-data-wrangling-and-management-in-r-04.png" caption="Number of secular and non-secular colleges after War of 1812" %} - -Conclusion -========== - -This tutorial should put you well on the way to thinking about how to -organize and manipulate your data in R. Later, you will probably want to -graph your data in some way. I recommend that you begin looking at the -[ggplot2](https://ggplot2.tidyverse.org/) package for a set of tools that work -well with dplyr. In addition, you may want to examine some of the -other functions that come with dplyr to hone your skills. Either way, -this should provide a good foundation to build on and cover a lot of -the common problems you will encounter. +--- +title: Data Wrangling and Management in R +slug: data-wrangling-and-management-in-r +layout: lesson +collection: lessons +authors: +- Nabeel Siddiqui +date: 2017-07-31 +reviewers: +- Lauren Tilton +- Ryan Deschamps +editors: +- Ian Milligan +review-ticket: https://github.com/programminghistorian/ph-submissions/issues/60 +difficulty: 2 +activity: transforming +topics: [data-manipulation, data-management, distant-reading, r, data-visualization] +abstract: "This tutorial explores how scholars can organize 'tidy' data, understand R packages to manipulate data, and conduct basic data analysis." +avatar_alt: Bar of soap +doi: 10.46430/phen0063 +--- + +{% include toc.html %} + + + + + +Assumptions +=========== + +This lesson makes a few assumptions about your understanding of R. If +you have not completed the [R Basics with Tabular +Data](/lessons/r-basics-with-tabular-data) +lesson, I suggest you complete that first. Having a background in +another programming language will also be beneficial. If you need a +place to start, I recommend working through the *Programming +Historian's* excellent Python tutorials. + +Lesson Goals +============ + +By the end of this lesson, you will: + +1. Know how to organize data to be "tidy" and why this is important. +2. Understand the dplyr package and use it to manipulate and wrangle + with data. +3. Become acquainted with the pipe operator in R and observe how it can + assist you in creating more readable code. +4. Learn to work through some basic examples of data manipulation to + gain a foundation in exploratory data analysis. + +Introduction +============ + +Data you find "in the wild" will rarely be in a format necessary for +analysis, and you will need to manipulate it before exploring the +questions you are interested in. This may take more time than doing the +analysis itself! In this tutorial, we will learn some basic techniques +for manipulating, managing, and wrangling with our data in R. +Specifically, we will rely on the philosophy of ["tidy +data"](https://www.jstatsoft.org/article/view/v059i10) as articulated by +Hadley Wickham. + +According to [Wickham](https://hadley.nz/), data is "tidy" when it meets +three key criteria: + +1. Each observation is in a row. +2. Each variable is in a column. +3. Each value has its own cell. + +Being observant of these criteria allows us to recognize when data is +organized or unorganized. It also provides us a standardized schema and +set of tools for cleaning up some of the most common ways that datasets +are "messy:" + +1. Column headers are values, not variable names. +2. Multiple variables are stored in one column. +3. Variables are stored in both rows and columns. +4. Multiple types of observational units are stored in the same table. +5. A single observational unit is stored in multiple tables. + +Perhaps most importantly, keeping our data in this format allows us to +use a collection of packages in the +["tidyverse,"](https://tidyverse.org/) which are designed to specifically +work with tidy data. By making sure that our input and output are tidy, +we only have to use a small set of tools to solve a large number of +questions. In addition, we can combine, manipulate, and split tidy +datasets as we see fit. + +In this tutorial, we will be focusing on the +[dplyr](https://cran.r-project.org/web/packages/dplyr/index.html) +package of the tidyverse, but it is worth briefly mentioning some others +we will be running into: + +[**magittr**](https://magrittr.tidyverse.org)--This package gives us +access to the forward pipe operator and makes our code easier to read. +[**ggplot2**](https://ggplot2.tidyverse.org/)--This package utilizes the +["Grammar of Graphics"](https://www.springer.com/us/book/9780387245447) +to provide an easy way to visualize our data. +[**readr**](https://readr.tidyverse.org)--This package makes available a +faster and more streamlined method of importing rectangular data, such +as csv files. +[**tibble**](https://tibble.tidyverse.org/)--This package provides us +access to a reconceptualization of data frames that are easier to work +with and print. + +If you have not already done so, you should install and load the +"tidyverse" before beginning. In addition, make sure that you have the +[latest version of R](https://cran.rstudio.com/) and the [latest version +of R Studio](https://www.rstudio.com/products/rstudio/download/) +installed for your respective platform. + +Copy the following code into RStudio. To run it, you need to highlight +the lines and press Ctrl+Enter (Command+Enter on Mac OS): + + # Install tidyverse libraries and load it + # Do not worry if this takes a while + + install.packages("tidyverse") + library(tidyverse) + +An Example of dplyr in Action +============================= + +Let's go through an example to see how dplyr can aid us as historians by +inputting U.S. decennial census data from 1790 to 2010. Download the +data by [clicking +here](/assets/data-wrangling-and-management-in-r/introductory_state_example.csv) +and place it in the folder that you will use to work through the examples +in this tutorial. + +Since the data is in a csv file, we are going to use the read\_csv() +command in tidyverse's +[readr](https://cran.r-project.org/web/packages/readr/vignettes/readr.html) +package. + +The read\_csv function takes the path of a file we want to import from +as a variable so make sure that you have it set up correctly. + + # Import CSV File and save to us_state_populations_import + # Make sure you set the path of the file correctly + us_state_populations_import<-read_csv("introductory_state_example.csv") + +After you import the data, you will notice that there are three columns: +one for the population, one for the year, and one for the state. This +data is already in a tidy format providing us a multitude of options for +further exploration. + +For this example, let's visualize the population growth of California and +New York to gain a better understanding of Western migration. We will +use dplyr to filter our data so that it only contains information about +the states we are interested in, and we will use ggplot2 to visualize +this information. This exercise is just to provide you a taste of what +dplyr can do, so don't worry if you don't understand the code at this +time. + + # Filter to California and New York states only + california_and_new_york_state_populations<-us_state_populations_import %>% + filter(state %in% c("California", "New York")) + + # Plot California and New York State Populations + ggplot(data=california_and_new_york_state_populations, aes(x=year, y=population, color=state)) + + geom_line() + + geom_point() + +{% include figure.html filename="en-or-data-wrangling-and-management-in-r-01.png" caption="Graph of California and New York population" %} + +As we can see, the population of California has grown considerably +compared to New York. While this particular example may seem obvious +given the history of U.S. migration, the code itself provides us a +foundation that we can build on to ask a multitude of similar questions. +For instance, with a quick change of code, we can create a similar graph +with two different states such as Mississippi and Virginia. + + # Filter to Mississippi and Virginia + mississippi_and_virginia_state_populations<-us_state_populations_import %>% + filter(state %in% c("Mississippi", "Virginia")) + + # Plot California and New York State Populations + ggplot(data=mississippi_and_virginia_state_populations, aes(x=year, y=population, color=state)) + + geom_line() + + geom_point() + +{% include figure.html filename="en-or-data-wrangling-and-management-in-r-02.png" caption="Graph of Mississippi and Virginia population" %} + +Quickly making changes to our code and reanalyzing our data is a +fundamental part of exploratory data analysis (EDA). Rather than trying +to "prove" a hypothesis, exploratory data analysis helps us understand +our data better and ask questions about it. For historians, EDA provides +an easy means of knowing when to dig deeper into a subject and when to +step back, and it is an area where R excels. + +Pipe Operator +============= + +Before looking at dplyr, we need to go over the pipe operator (%>%) +in R since we will often run into it in our examples. As mentioned +earlier, the pipe operator is part of the +[magrittr](https://cran.r-project.org/web/packages/magrittr/vignettes/magrittr.html) +package created by [Stefan Milton Bache](https://stefanbache.dk/) and +[Hadley Wickham](https://hadley.nz/) and is included in the tidyverse. +Its name is an homage to surrealest painter Rene Magritte, whose "The +Treachery of Images" famously depicted a pipe with the words "this is +not a pipe" underneath in French. + +The pipe operator allows you to pass what is to the left of the pipe as +the first variable in a function specified on the right. Although it may +seem strange at first, once you learn it, you will find that it makes +your code more readable by avoiding nested statements. Don't worry if +all this is a little confusing right now. It will become more clear as +we go through the examples. + +Let's say that we are interested in getting the square root of each +population value and then summing all the square roots before getting +the mean. Obviously, this isn't a useful measurement, but it demonstrates +just how quickly R code can become difficult to read. Normally, we would +nest such statements: + + mean(sum(sqrt(us_state_populations_import$population))) + + ## [1] 1256925 + +As you can see, with enough nested commands, it is hard to remember how +many parenthesis you need and makes the code awkward to read. To mitigate +this, some people may create temporary vectors in between each function +call. + + # Get square root of all the state populations + + sqrt_state_populations_vector<-sqrt(us_state_populations_import$population) + + # Get sum of all the sqrts of the temporary variable + + sum_sqrt_state_populations_vector<-sum(sqrt_state_populations_vector) + + # Get mean of the temporary variable + + mean_sum_sqrt_state_populations_vector<-mean(sum_sqrt_state_populations_vector) + + # Display the mean + + mean_sum_sqrt_state_populations_vector + + ## [1] 1256925 + +Although you get the same answer, this is a lot more readable. However, +it can quickly clutter your workspace if you forget to delete the +temporary vectors. The pipe operator does all this for you. Here is the +same code using the pipe operator. + + us_state_populations_import$population%>%sqrt%>%sum%>%mean + + ## [1] 1256925 + +This is a lot easier to read, and you could make it even more clear by +writing this on multiple lines. + + # Make sure to put the operator at the end of the line + us_state_populations_import$population%>% + sqrt%>% + sum%>% + mean + + ## [1] 1256925 + +Please note that the vectors or data frames that the pipe operator +creates are discarded after the operation is complete. If you want to +store them, you should pass them to a new variable. + + permanent_sqrt_and_sum_state_populations_vector <- us_state_populations_import$population%>%sqrt%>%sum%>%mean + permanent_sqrt_and_sum_state_populations_vector + + ## [1] 1256925 + +We Need a New Dataset +===================== + +Now that we have an understanding of the pipe operator, we are ready to +begin looking at and wrangling with some data. Unfortunately, for +historians, there are only a few easily available datasets--perhaps you +can help change this by making yours available to the public! We are +going to rely on the [history +data](https://www.google.com/search?q=cran%20historydata) package +created by [Lincoln Mullen](https://lincolnmullen.com/). + +Lets go ahead and install and load the package: + + # Install historydata package + install.packages("historydata") + + # Load historydata package + library(historydata) + +This packages contains samples of historical datasets--the earlier U.S. +Census data sample was taken from this package. Throughout this +tutorial, we are specifically going to work with the early\_colleges +dataset that contains data about colleges founded before 1848. Lets +start by loading the data and view it. + + # Make sure you have installed the historydata package and loaded it before this + + data(early_colleges) + early_colleges + + ## # A tibble: 65 × 6 + ## college original_name city state established sponsorship + ## + ## 1 Harvard Cambridge MA 1636 Congregational; after 1805 Unitarian + ## 2 William and Mary Williamsburg VA 1693 Anglican + ## 3 Yale New Haven CT 1701 Congregational + ## 4 Pennsylvania, Univ. of Philadelphia PA 1740 Nondenominational + ## 5 Princeton College of New Jersey Princeton NJ 1746 Presbyterian + ## 6 Columbia King's College New York NY 1754 Anglican + ## 7 Brown Providence RI 1765 Baptist + ## 8 Rutgers Queen's College New Brunswick NJ 1766 Dutch Reformed + ## 9 Dartmouth Hanover NH 1769 Congregational + ## 10 Charleston, Coll. Of Charleston SC 1770 Anglican + ## # ... with 55 more rows + +As you can observe, this dataset contains the current name of the college, its +original name, the city and state where it was founded, when the college was +established, and its sponsorship. As we discussed earlier, before we can +work with a dataset, it is important to think about how to organize the +data. Let's see if any of our data is not in a "tidy" format. Do you see +any cells that do not match the three criteria for tidy data? + +If you guessed the sponsorship of Harvard, you are correct. In addition +to noting the original sponsorship, it also mentions that it changed +sponsorship in 1805. Usually, you want to keep as much information about +your data that you can, but for the purposes of this tutorial, we are +going to change the column to only have the original sponsorship. + + early_colleges[1,6] <- "Congregational" + early_colleges + + ## # A tibble: 65 × 6 + ## college original_name city state established sponsorship + ## + ## 1 Harvard Cambridge MA 1636 Congregational + ## 2 William and Mary Williamsburg VA 1693 Anglican + ## 3 Yale New Haven CT 1701 Congregational + ## 4 Pennsylvania, Univ. of Philadelphia PA 1740 Nondenominational + ## 5 Princeton College of New Jersey Princeton NJ 1746 Presbyterian + ## 6 Columbia King's College New York NY 1754 Anglican + ## 7 Brown Providence RI 1765 Baptist + ## 8 Rutgers Queen's College New Brunswick NJ 1766 Dutch Reformed + ## 9 Dartmouth Hanover NH 1769 Congregational + ## 10 Charleston, Coll. Of Charleston SC 1770 Anglican + ## # ... with 55 more rows + +Now that we have our data in a tidy format, we can shape it through the +dplyr package. + +What is Dplyr? +============== + +[Dplyr](https://cran.rstudio.com/web/packages/dplyr/vignettes/dplyr.html) +is another part of the tidyverse that provides functions for +manipulating and transforming your data. Because we are keeping our data +"tidy," we only need a small set of tools to explore our data. Compared +to base R, using dplyr is often faster, and guarantees that if our +input is tidy then our output will also be tidy. Perhaps most importantly, +dplyr makes our code easier to read and utilizes "verbs" that are, in +most cases, intuitive. Each function in dplyr corresponds to these verbs, +with the five key ones being filter, select, arrange, mutate, and +summarise--dplyr uses the British spelling. Let's go through each of them +individually to see how they work in practice. + +### Select + +If we look at the early\_colleges data, we can observe that there are a +lot of NA's in the original names column. NA signifies that the data is +not available, and we may want to view our data with this column +removed. dplyr's select() function gives us the ability to do this. It +takes the data frame you want to manipulate as the first argument, followed by a +list signifying which columns you would like to keep: + + # Remove the original names column using select() + # Note that you do not have to append the column name with a $ to the end of early_colleges since + # dplyr automatically assumes that a "," represents AND + + select(early_colleges, college, city, state, established, sponsorship) + + ## # A tibble: 65 × 5 + ## college city state established sponsorship + ## + ## 1 Harvard Cambridge MA 1636 congregational + ## 2 William and Mary Williamsburg VA 1693 Anglican + ## 3 Yale New Haven CT 1701 Congregational + ## 4 Pennsylvania, Univ. of Philadelphia PA 1740 Nondenominational + ## 5 Princeton Princeton NJ 1746 Presbyterian + ## 6 Columbia New York NY 1754 Anglican + ## 7 Brown Providence RI 1765 Baptist + ## 8 Rutgers New Brunswick NJ 1766 Dutch Reformed + ## 9 Dartmouth Hanover NH 1769 Congregational + ## 10 Charleston, Coll. Of Charleston SC 1770 Anglican + ## # ℹ 55 more rows + +Let's also go ahead and see how to write this using the pipe operator +(%>%): + + early_colleges%>% + select(college, city, state, established, sponsorship) + + ## # A tibble: 65 × 5 + ## college city state established sponsorship + ## + ## 1 Harvard Cambridge MA 1636 congregational + ## 2 William and Mary Williamsburg VA 1693 Anglican + ## 3 Yale New Haven CT 1701 Congregational + ## 4 Pennsylvania, Univ. of Philadelphia PA 1740 Nondenominational + ## 5 Princeton Princeton NJ 1746 Presbyterian + ## 6 Columbia New York NY 1754 Anglican + ## 7 Brown Providence RI 1765 Baptist + ## 8 Rutgers New Brunswick NJ 1766 Dutch Reformed + ## 9 Dartmouth Hanover NH 1769 Congregational + ## 10 Charleston, Coll. Of Charleston SC 1770 Anglican + ## # ℹ 55 more rows + +Referencing each of the columns that we want to keep just to get rid of +one is a little tedous. We can use the minus symbol (-) to demonstrate +that we want to remove a column. + + early_colleges%>% + select(-original_name) + + ## # A tibble: 65 × 5 + ## college city state established sponsorship + ## + ## 1 Harvard Cambridge MA 1636 congregational + ## 2 William and Mary Williamsburg VA 1693 Anglican + ## 3 Yale New Haven CT 1701 Congregational + ## 4 Pennsylvania, Univ. of Philadelphia PA 1740 Nondenominational + ## 5 Princeton Princeton NJ 1746 Presbyterian + ## 6 Columbia New York NY 1754 Anglican + ## 7 Brown Providence RI 1765 Baptist + ## 8 Rutgers New Brunswick NJ 1766 Dutch Reformed + ## 9 Dartmouth Hanover NH 1769 Congregational + ## 10 Charleston, Coll. Of Charleston SC 1770 Anglican + ## # ℹ 55 more rows + +### Filter + +The filter() function does the same thing as the select function but +rather than choosing the column name, we can use it to filter rows using +a test requirement. For instance, we can view all the colleges that +existed before the turn of the century. + + early_colleges%>% + filter(established < 1800) + + ## # A tibble: 20 × 6 + ## college original_name city state established sponsorship + ## + ## 1 Harvard Cambridge MA 1636 Congregational + ## 2 William and Mary Williamsburg VA 1693 Anglican + ## 3 Yale New Haven CT 1701 Congregational + ## 4 Pennsylvania, Univ. of Philadelphia PA 1740 Nondenominational + ## 5 Princeton College of New Jersey Princeton NJ 1746 Presbyterian + ## 6 Columbia King's College New York NY 1754 Anglican + ## 7 Brown Providence RI 1765 Baptist + ## 8 Rutgers Queen's College New Brunswick NJ 1766 Dutch Reformed + ## 9 Dartmouth Hanover NH 1769 Congregational + ## 10 Charleston, Coll. Of Charleston SC 1770 Anglican + ## 11 Hampden-Sydney Hampden-Sydney VA 1775 Presbyterian + ## 12 Transylvania Lexington KY 1780 Disciples of Christ + ## 13 Georgia, Univ. of Athens GA 1785 Secular + ## 14 Georgetown Washington DC 1789 Roman Catholic + ## 15 North Carolina, Univ. of Chapel Hill NC 1789 Secular + ## 16 Vermont, Univ. of Burlington VT 1791 Nondenominational + ## 17 Williams Williamstown MA 1793 Congregational + ## 18 Tennessee, Univ. of Blount College Knoxville TN 1794 Secular + ## 19 Union College Schenectady NY 1795 Presbyterian with Congregational + ## 20 Marietta Marietta OH 1797 Congregational + +### Mutate + +The mutate command allows you to add a column to your data frame. Right +now, we have the city and state in two separate columns. We can use the +paste command to combine two strings and specify a seperator. Let's place +them in a single column called "location." + + early_colleges%>%mutate(location=paste(city,state,sep=",")) + + ## # A tibble: 65 × 7 + ## college original_name city state established sponsorship location + ## + ## 1 Harvard Cambridge MA 1636 Congregational Cambridge,MA + ## 2 William and Mary Williamsburg VA 1693 Anglican Williamsburg,VA + ## 3 Yale New Haven CT 1701 Congregational New Haven,CT + ## 4 Pennsylvania, Univ. of Philadelphia PA 1740 Nondenominational Philadelphia,PA + ## 5 Princeton College of New Jersey Princeton NJ 1746 Presbyterian Princeton,NJ + ## 6 Columbia King's College New York NY 1754 Anglican New York,NY + ## 7 Brown Providence RI 1765 Baptist Providence,RI + ## 8 Rutgers Queen's College New Brunswick NJ 1766 Dutch Reformed New Brunswick,NJ + ## 9 Dartmouth Hanover NH 1769 Congregational Hanover,NH + ## 10 Charleston, Coll. Of Charleston SC 1770 Anglican Charleston,SC + ## # ... with 55 more rows + +Again, you need to remember that dplyr does not save the data or +manipulate the original. Instead, it creates a temporary data frame at +each step. If you want to keep it, you need to create a permanent +variable. + + early_colleges_with_location <- early_colleges%>% + mutate(location=paste(city, state, sep=",")) + + # View the new tibble with the location added + early_colleges_with_location + + ## # A tibble: 65 × 7 + ## college original_name city state established sponsorship location + ## + ## 1 Harvard Cambridge MA 1636 Congregational Cambridge,MA + ## 2 William and Mary Williamsburg VA 1693 Anglican Williamsburg,VA + ## 3 Yale New Haven CT 1701 Congregational New Haven,CT + ## 4 Pennsylvania, Univ. of Philadelphia PA 1740 Nondenominational Philadelphia,PA + ## 5 Princeton College of New Jersey Princeton NJ 1746 Presbyterian Princeton,NJ + ## 6 Columbia King's College New York NY 1754 Anglican New York,NY + ## 7 Brown Providence RI 1765 Baptist Providence,RI + ## 8 Rutgers Queen's College New Brunswick NJ 1766 Dutch Reformed New Brunswick,NJ + ## 9 Dartmouth Hanover NH 1769 Congregational Hanover,NH + ## 10 Charleston, Coll. Of Charleston SC 1770 Anglican Charleston,SC + ## # ... with 55 more rows + +### Arrange + +The arrange() function allows us to order our columns in a new way. +Currently, the colleges are organized by year in ascending order. Lets +place them in descending order of establishment, in this case, from the end of the Mexican-American War. + + early_colleges %>% + arrange(desc(established)) + + ## # A tibble: 65 × 6 + ## college original_name city state established sponsorship + ## + ## 1 Wisconsin, Univ. of Madison WI 1848 Secular + ## 2 Earlham Richmond IN 1847 Quaker + ## 3 Beloit Beloit WI 1846 Congregational + ## 4 Bucknell Lewisburg PA 1846 Baptist + ## 5 Grinnell Grinnell IA 1846 Congregational + ## 6 Mount Union Alliance OH 1846 Methodist + ## 7 Louisiana, Univ. of New Orleans LA 1845 Secular + ## 8 U.S. Naval Academy Annapolis MD 1845 Secular + ## 9 Mississipps, Univ. of Oxford MI 1844 Secular + ## 10 Holy Cross Worchester MA 1843 Roman Catholic + ## # ... with 55 more rows + +### Summarise + +The last key function in dplyr is summarise()--note the British +spelling. Summarise() takes a function or operation, and is usually used +to create a data frame that contains summary statistics for plotting. We +will use it to calculate the average year that colleges before 1848 were +founded. + + early_colleges%>%summarise(mean(established)) + + ## # A tibble: 1 x 1 + ## `mean(established)` + ## + ## 1 1809.831 + +Putting it All Together +======================= + +Now that we have gone through the five main verbs for dplyr, we can use +them to create a quick visualization of our data. Let's go ahead and +create a bar graph showing the number of secular and non-secular +colleges founded before the U.S. War of 1812: + + secular_colleges_before_1812<-early_colleges%>% + filter(established < 1812)%>% + mutate(is_secular=ifelse(sponsorship!="Secular", "no", "yes")) + + ggplot(secular_colleges_before_1812) + + geom_bar(aes(x=is_secular, fill=is_secular))+ + labs(x="Is the college secular?") + +{% include figure.html filename="en-or-data-wrangling-and-management-in-r-03.png" caption="Number of secular and non-secular colleges before War of 1812" %} + +Again, by making a quick change to our code, we can also look at the +number of secular versus non-secular colleges founded after the start of +the War of 1812: + + secular_colleges_after_1812<-early_colleges%>% + filter(established > 1812)%>% + mutate(is_secular=ifelse(sponsorship!="Secular", "no", "yes")) + + ggplot(secular_colleges_after_1812) + + geom_bar(aes(x=is_secular, fill=is_secular))+ + labs(x="Is the college secular?") + +({% include figure.html filename="en-or-data-wrangling-and-management-in-r-04.png" caption="Number of secular and non-secular colleges after War of 1812" %} + +Conclusion +========== + +This tutorial should put you well on the way to thinking about how to +organize and manipulate your data in R. Later, you will probably want to +graph your data in some way. I recommend that you begin looking at the +[ggplot2](https://ggplot2.tidyverse.org/) package for a set of tools that work +well with dplyr. In addition, you may want to examine some of the +other functions that come with dplyr to hone your skills. Either way, +this should provide a good foundation to build on and cover a lot of +the common problems you will encounter. diff --git a/en/lessons/dealing-with-big-data-and-network-analysis-using-neo4j.md b/en/lessons/dealing-with-big-data-and-network-analysis-using-neo4j.md index 013ebe33ee..6464d1b2d4 100755 --- a/en/lessons/dealing-with-big-data-and-network-analysis-using-neo4j.md +++ b/en/lessons/dealing-with-big-data-and-network-analysis-using-neo4j.md @@ -62,8 +62,8 @@ The final section of this lesson contains code and data to illustrate the key po Although beyond the scope of this tutorial, those interested in trying to better understand social networks can refer to a number of sources. -Sociologists Robert A. Hanneman and Mark Riddle maintain an [on-line textbook on network analysis](http://faculty.ucr.edu/~hanneman/nettext/). -There are also regular conferences hosted and useful resources available from the [International Network for Social Network Analysis](http://www.insna.org). +Sociologists Robert A. Hanneman and Mark Riddle maintain an [on-line textbook on network analysis](https://faculty.ucr.edu/~hanneman/nettext/). +There are also regular conferences hosted and useful resources available from the [International Network for Social Network Analysis](https://www.insna.org).
    I strongly recommend that you read the lesson through before trying the example data. @@ -348,7 +348,7 @@ provides us with the basic corporate interlock network that existed in Canada in If we use the web interface that comes with Neo4j we'll be able to see what parts of this network looks like by using a simple query. With the Neo4j database running, we can open up the built in browser to make more Cypher queries. -(Or we can put the following URL into a browser [http://localhost:7474/browser/](http://localhost:7474/browser/). +(Or we can put the following URL into a browser [https://localhost:7474/browser/](https://localhost:7474/browser/). Add the following Cypher query. @@ -402,7 +402,7 @@ web site. # Putting it all together: A working example -If we return to the [web interface on your local machine](http://localhost:7474) we can query our new database. +If we return to the [web interface on your local machine](https://localhost:7474) we can query our new database. Let's look at the firms that have the greatest number of connections (i.e. the highest degree). To calculate degree we can make a simple query with Cypher. @@ -438,7 +438,7 @@ return c0, r, c1; {% include figure.html filename="graph_example.png" caption="Example graph" %} -You can download the data used in this lesson [here](http://jgmackay.com/) (search for the relevant blog posts). +You can download the data used in this lesson [here](https://jgmackay.com/) (search for the relevant blog posts). If you make use of this data, please cite the following in addition to this lesson: Mackay, Jon. 2017. "Canadian Regional and National Business Elites in 1912: Who Was Connected, Who Wasn't and diff --git a/en/lessons/detecting-text-reuse-with-passim.md b/en/lessons/detecting-text-reuse-with-passim.md index f6d5c61f48..a10324afa4 100644 --- a/en/lessons/detecting-text-reuse-with-passim.md +++ b/en/lessons/detecting-text-reuse-with-passim.md @@ -40,7 +40,7 @@ The following list includes just some of the libraries available that perform au - [Basic Local Alignment Search Tool (BLAST)](https://blast.ncbi.nlm.nih.gov/Blast.cgi) - [Tesserae](https://github.com/tesserae/tesserae) (PHP, Perl) - [TextPAIR (Pairwise Alignment for Intertextual Relations)](https://github.com/ARTFL-Project/text-pair) -- [Passim](https://github.com/dasmiq/passim) (Scala) developed by [David Smith](http://www.ccs.neu.edu/home/dasmith/ +- [Passim](https://github.com/dasmiq/passim) (Scala) developed by [David Smith](https://www.ccs.neu.edu/home/dasmith/ ) (Northeastern University) For this tutorial we chose the Passim library for three main reasons. Firstly, it can be adapted to a variety of use cases as it works well on a small text collection as well as on a large-scale corpus. Secondly, while the documentation for Passim is extensive, because of its relatively advanced user audience, a more user-centered step-by-step tutorial about detecting text reuse with Passim would be beneficial to the user community. Lastly, the following examples illustrate the variety of scenarios in which text reuse is a useful methodology: @@ -204,7 +204,7 @@ export PATH="/home/simon/Passim/bin:$PATH" ### Installing Spark -1. Navigate to the [download section](http://spark.apache.org/downloads) of the Spark website and select Spark release version '3.x.x' (where '*x*' means any version that starts with '3.'), and package type 'Pre-built for Apache Hadoop 2.7' from the dropdown menus. +1. Navigate to the [download section](https://spark.apache.org/downloads) of the Spark website and select Spark release version '3.x.x' (where '*x*' means any version that starts with '3.'), and package type 'Pre-built for Apache Hadoop 2.7' from the dropdown menus. 2. Extract the compressed binaries to a directory of your choice (e.g. `/Applications`): ```bash @@ -402,7 +402,7 @@ Ultimately, what constitutes a document, and how these documents should be divid ## Basic JSON format -The input format for Passim consists of JSON documents in the [JSON lines format](http://jsonlines.org/) (i.e. each line of text contains a single JSON document). +The input format for Passim consists of JSON documents in the [JSON lines format](https://jsonlines.org/) (i.e. each line of text contains a single JSON document). The following file content for a file named `test.json` illustrates a minimal example of the input format for Passim: @@ -707,7 +707,7 @@ You are now ready to go forward with your first text reuse project. For now, do not worry about the additional arguments `SPARK_SUBMIT_ARGS='--master local[12] --driver-memory 8G --executor-memory 4G'`; in the section ["Case Study 2"](#case-study-2:-text-reuse-in-a-large-corpus-of-historical-newspapers) we will explain them in detail. -This test case takes approximatively eight minutes on a recent laptop with eight threads. You can also follow the progress of the detection at http://localhost:4040 — an interactive dashboard created by Spark (Note: the dashboard will shut down as soon as Passim has finished running). +This test case takes approximatively eight minutes on a recent laptop with eight threads. You can also follow the progress of the detection at https://localhost:4040 — an interactive dashboard created by Spark (Note: the dashboard will shut down as soon as Passim has finished running). ## Case study 2: Text Reuse in a large corpus of historical newspapers @@ -895,15 +895,15 @@ MR gratefully acknowledges the financial support of the Swiss National Science F # Bibliography -1. Greta Franzini, Maria Moritz, Marco Büchler, Marco Passarotti. Using and evaluating TRACER for an Index fontium computatus of the Summa contra Gentiles of Thomas Aquinas. In *Proceedings of the Fifth Italian Conference on Computational Linguistics (CLiC-it 2018)*. (2018). [Link](http://ceur-ws.org/Vol-2253/paper22.pdf) -2. David A. Smith, Ryan Cordell, Abby Mullen. Computational Methods for Uncovering Reprinted Texts in Antebellum Newspapers. *American Literary History* **27**, E1–E15 Oxford University Press, 2015. [Link](http://dx.doi.org/10.1093/alh/ajv029) -3. Ryan Cordell. Reprinting Circulation, and the Network Author in Antebellum Newspapers. *American Literary History* **27**, 417–445 Oxford University Press (OUP), 2015. [Link](http://dx.doi.org/10.1093/alh/ajv028) -4. Daniel Vogler, Linards Udris, Mark Eisenegger. Measuring Media Content Concentration at a Large Scale Using Automated Text Comparisons. *Journalism Studies* **0**, 1–20 Taylor & Francis, 2020. [Link](http://dx.doi.org/10.1080/1461670x.2020.1761865) +1. Greta Franzini, Maria Moritz, Marco Büchler, Marco Passarotti. Using and evaluating TRACER for an Index fontium computatus of the Summa contra Gentiles of Thomas Aquinas. In *Proceedings of the Fifth Italian Conference on Computational Linguistics (CLiC-it 2018)*. (2018). [Link](https://ceur-ws.org/Vol-2253/paper22.pdf) +2. David A. Smith, Ryan Cordell, Abby Mullen. Computational Methods for Uncovering Reprinted Texts in Antebellum Newspapers. *American Literary History* **27**, E1–E15 Oxford University Press, 2015. [Link](https://dx.doi.org/10.1093/alh/ajv029) +3. Ryan Cordell. Reprinting Circulation, and the Network Author in Antebellum Newspapers. *American Literary History* **27**, 417–445 Oxford University Press (OUP), 2015. [Link](https://dx.doi.org/10.1093/alh/ajv028) +4. Daniel Vogler, Linards Udris, Mark Eisenegger. Measuring Media Content Concentration at a Large Scale Using Automated Text Comparisons. *Journalism Studies* **0**, 1–20 Taylor & Francis, 2020. [Link](https://dx.doi.org/10.1080/1461670x.2020.1761865) 5. Lincoln Mullen. textreuse: Detect Text Reuse and Document Similarity. (2016). [Link](https://github.com/ropensci/textreuse) -6. Marco Büchler, Philip R. Burns, Martin Müller, Emily Franzini, Greta Franzini. Towards a Historical Text Re-use Detection. 221–238 In *Text Mining: From Ontology Learning to Automated Text Processing Applications*. Springer International Publishing, 2014. [Link](http://dx.doi.org/10.1007/978-3-319-12655-5_11) -7. Paul Vierthaler, Meet Gelein. A BLAST-based, Language-agnostic Text Reuse Algorithm with a MARKUS Implementation and Sequence Alignment Optimized for Large Chinese Corpora. *Journal of Cultural Analytics* (2019). [Link](http://dx.doi.org/10.22148/16.034) +6. Marco Büchler, Philip R. Burns, Martin Müller, Emily Franzini, Greta Franzini. Towards a Historical Text Re-use Detection. 221–238 In *Text Mining: From Ontology Learning to Automated Text Processing Applications*. Springer International Publishing, 2014. [Link](https://dx.doi.org/10.1007/978-3-319-12655-5_11) +7. Paul Vierthaler, Meet Gelein. A BLAST-based, Language-agnostic Text Reuse Algorithm with a MARKUS Implementation and Sequence Alignment Optimized for Large Chinese Corpora. *Journal of Cultural Analytics* (2019). [Link](https://dx.doi.org/10.22148/16.034) 8. Aleksi Vesanto, Asko Nivala, Heli Rantala, Tapio Salakoski, Hannu Salmi, Filip Ginter. Applying BLAST to Text Reuse Detection in Finnish Newspapers and Journals, 1771-1910. 54–58 In *Proceedings of the NoDaLiDa 2017 Workshop on Processing Historical Language*. Linköping University Electronic Press, 2017. [Link](https://aclanthology.org/W17-0510.pdf) 9. Hannu Salmi, Heli Rantala, Aleksi Vesanto, Filip Ginter. The long-term reuse of text in the Finnish press, 1771–1920. **2364**, 394–544 In *CEUR Workshop Proceedings*. (2019). -10. Axel J Soto, Abidalrahman Mohammad, Andrew Albert, Aminul Islam, Evangelos Milios, Michael Doyle, Rosane Minghim, Maria Cristina de Oliveira. Similarity-Based Support for Text Reuse in Technical Writing. 97–106 In *Proceedings of the 2015 ACM Symposium on Document Engineering*. ACM, 2015. [Link](http://dx.doi.org/10.1145/2682571.2797068) +10. Axel J Soto, Abidalrahman Mohammad, Andrew Albert, Aminul Islam, Evangelos Milios, Michael Doyle, Rosane Minghim, Maria Cristina de Oliveira. Similarity-Based Support for Text Reuse in Technical Writing. 97–106 In *Proceedings of the 2015 ACM Symposium on Document Engineering*. ACM, 2015. [Link](https://dx.doi.org/10.1145/2682571.2797068) 11. Alexandra Schofield, Laure Thompson, David Mimno. Quantifying the Effects of Text Duplication on Semantic Models. 2737–2747 In *Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing*. Association for Computational Linguistics, 2017. [Link](https://doi.org/10.18653/v1/D17-1290) 12. Matteo Romanello, Aurélien Berra, Alexandra Trachsel. Rethinking Text Reuse as Digital Classicists. *Digital Humanities conference*, 2014. [Link](https://wiki.digitalclassicist.org/Text_Reuse) diff --git a/en/lessons/displaying-georeferenced-map-knightlab-storymap-js.md b/en/lessons/displaying-georeferenced-map-knightlab-storymap-js.md index ffe4b03d3c..2e6865cb56 100644 --- a/en/lessons/displaying-georeferenced-map-knightlab-storymap-js.md +++ b/en/lessons/displaying-georeferenced-map-knightlab-storymap-js.md @@ -58,7 +58,7 @@ To access the StoryMap JS authoring tool, go to the [StoryMap JS website](https: {% include figure.html filename="make_storymapjs.png" caption="Story Map JS: Make a StoryMap." %} -You will be prompted to login with a Google account. If you don’t have one, you can create one at [gmail.com](http://gmail.com). +You will be prompted to login with a Google account. If you don’t have one, you can create one at [gmail.com](https://gmail.com). {% include figure.html filename="gmail_signin.png" caption="StoryMap JS: Sign in with Google." %} diff --git a/en/lessons/downloading-multiple-records-using-query-strings.md b/en/lessons/downloading-multiple-records-using-query-strings.md index cc2bbe44dd..08676100e9 100644 --- a/en/lessons/downloading-multiple-records-using-query-strings.md +++ b/en/lessons/downloading-multiple-records-using-query-strings.md @@ -1166,20 +1166,20 @@ have recently released an API and the documentation can be quite helpful: - Old Bailey Online API - () -- Python Best way to create directory if it doesn’t exist for file write? () + () +- Python Best way to create directory if it doesn’t exist for file write? () - [Old Bailey Online]: http://www.oldbaileyonline.org/ + [Old Bailey Online]: https://www.oldbaileyonline.org/ [Automated Downloading with WGET]: /lessons/automated-downloading-with-wget - [Benjamin Bowsey’s case]: http://www.oldbaileyonline.org/browse.jsp?id=t17800628-33&div=t17800628-33 - [advanced search form]: http://www.oldbaileyonline.org/forms/formMain.jsp + [Benjamin Bowsey’s case]: https://www.oldbaileyonline.org/browse.jsp?id=t17800628-33&div=t17800628-33 + [advanced search form]: https://www.oldbaileyonline.org/forms/formMain.jsp [Viewing HTML Files]: /lessons/viewing-html-files [Working with Webpages]: /lessons/working-with-web-pages [From HTML to a List of Words 2]: /lessons/from-html-to-list-of-words-2 [range]: https://docs.python.org/3/tutorial/controlflow.html#the-range-function [regular expressions]: https://docs.python.org/3/library/re.html [Counting Frequencies]: /lessons/counting-frequencies - [time out]: http://www.checkupdown.com/status/E408.html + [time out]: https://www.checkupdown.com/status/E408.html [Python Programming Basics]: /lessons/introduction-and-installation - [try / except]: http://docs.python.org/tutorial/errors.html + [try / except]: https://docs.python.org/tutorial/errors.html diff --git a/en/lessons/editing-audio-with-audacity.md b/en/lessons/editing-audio-with-audacity.md index b1b1b92cac..57119c7e7e 100755 --- a/en/lessons/editing-audio-with-audacity.md +++ b/en/lessons/editing-audio-with-audacity.md @@ -29,7 +29,7 @@ doi: 10.46430/phen0050 For those interested in audio, basic sound editing skills go a long way. Being able to handle and manipulate the materials can help you take control of your object of study: you can zoom in and extract particular moments to analyze, process the audio, and upload the materials to a server to compliment a blog post on the topic. On a more practical level, these skills could also allow you to record and package recordings of yourself or others for distribution. That guest lecture taking place in your department? Record it and edit it yourself! Doing so is a lightweight way to distribute resources among various institutions, and it also helps make the materials more accessible for readers and listeners with a wide variety of learning needs. -In this lesson you will learn how to use *[Audacity](http://audacityteam.org/)* to load, record, edit, mix, and export audio files. Sound editing platforms are often expensive and offer extensive capabilities that can be overwhelming to the first-time user, but *Audacity* is a free and open source alternative that offers powerful capabilities for sound editing with a low barrier for entry. +In this lesson you will learn how to use *[Audacity](https://audacityteam.org/)* to load, record, edit, mix, and export audio files. Sound editing platforms are often expensive and offer extensive capabilities that can be overwhelming to the first-time user, but *Audacity* is a free and open source alternative that offers powerful capabilities for sound editing with a low barrier for entry. For this lesson we will work with two audio files: a recording of [Bach's Goldberg Variations]({{ root_url }}/assets/editing-audio-with-audacity/bach-goldberg-variations.mp3) and another recording of your own voice that will be made in the course of the lesson. @@ -41,7 +41,7 @@ First, download the necessary files. You will need the [mp3 file of Bach's Goldberg Variations]({{ root_url }}/assets/editing-audio-with-audacity/bach-goldberg-variations.mp3). To download, right click [here]({{ root_url }}/assets/editing-audio-with-audacity/bach-goldberg-variations.mp3) and select 'Save Link As' to download the file onto your computer as an MP3. -Next, download and install *Audacity*, which is available on [the project site](http://audacityteam.org/). *Audacity* can be used on Mac OSX, Windows, or Linux. +Next, download and install *Audacity*, which is available on [the project site](https://audacityteam.org/). *Audacity* can be used on Mac OSX, Windows, or Linux. Download the program and double-click to install. @@ -53,7 +53,7 @@ The interface will change to reflect the loaded data: *Audacity* converts your sound into a waveform, a commonly used mode for representing sound. The x-axis represents time as seconds (or minutes and seconds, depending on the length of the clip). The beginning of the sound occurs at the far left of the interface, and *Audacity* ticks off periodic time markers as the wave continues to the right. If we click the play button, *Audacity* will move from left to right over the sound, with a vertical line representing our currrent point in the clip. -The y-axis represents amplitude, what we experience as loudness or volume. By default, the y-axis measures volume on a vertical linear scale from -1 to 1: the -1 and 1 extremes represent the loudest possible recorded sound without distortion, while 0 represents silence. So silence begins as a flat line, and the sound will get taller and deeper as it increases in intensity. For more information on why some of the numbers are negative, check out Jeffrey Hass' very short [primer to acoustics](http://web.archive.org/web/20161119231053/http://www.indiana.edu:80/~emusic/acoustics/amplitude.htm). +The y-axis represents amplitude, what we experience as loudness or volume. By default, the y-axis measures volume on a vertical linear scale from -1 to 1: the -1 and 1 extremes represent the loudest possible recorded sound without distortion, while 0 represents silence. So silence begins as a flat line, and the sound will get taller and deeper as it increases in intensity. For more information on why some of the numbers are negative, check out Jeffrey Hass' very short [primer to acoustics](https://web.archive.org/web/20161119231053/https://www.indiana.edu:80/~emusic/acoustics/amplitude.htm). *Audacity*'s representation of time and amplitude are your first and easiest point of reference for editing sound, and the tool offers handy ways to navigate around them. I keep calling this a wave, but it doesn't look all that much like one just yet. Let's take a closer look by selecting a piece of the audio track. @@ -89,7 +89,7 @@ You will be presented with something that looks like this: Our original Bach recording stays at the top of the interface, while our new recording gets added below it. By default, *Audacity* will not overwrite your previous recording. Instead, it isolates both soundstreams, or tracks, allowing us to manipulate separate components before we mix them together in a final recording. We can make changes to one without affecting the other. Note how, time-wise, the new track by default was recorded at the beginning of the audacity project. For right now, the Bach and vocal tracks both begin at the same time. There are potentially some other imperfections in your unique recording, some of which we can fix. -Finally, note how in my example there are two waveforms for the Bach recording but only one for the recording of my own voice. The Bach recording was made in *stereo*, meaning there were two input feeds, while my own recording was made in *mono*. Audacity allows you to record in both, and either one will work for this lesson, so don't worry if your recording appears in stereo. You can change from mono to stereo recording and vice versa from the 'Edit' toolbar, accessible from the 'Toolbars' portion of the 'View' menu. For more information on mono vs stereo, check out this [reading](http://www.diffen.com/difference/Mono_vs_Stereo). +Finally, note how in my example there are two waveforms for the Bach recording but only one for the recording of my own voice. The Bach recording was made in *stereo*, meaning there were two input feeds, while my own recording was made in *mono*. Audacity allows you to record in both, and either one will work for this lesson, so don't worry if your recording appears in stereo. You can change from mono to stereo recording and vice versa from the 'Edit' toolbar, accessible from the 'Toolbars' portion of the 'View' menu. For more information on mono vs stereo, check out this [reading](https://www.diffen.com/difference/Mono_vs_Stereo). An aside: it can frequently be helpful to turn your laptop's sound output into its input, so that you can record the sounds playing from your computer without worrying about extraneous noise from the outside world or to rerecord digital audio. For information on how to carry out this process, check out [Soundflower](https://github.com/mattingalls/Soundflower). @@ -143,7 +143,7 @@ But we will eventually want to transition the track's focus away from the intro * Selecting "Crossfade Tracks" from the Effect menu will tell Audacity to fade out the top track while fading in the bottom track - the positioning of the tracks matters in this case. -*Audacity* will prompt you with options for your crossfade, but for now it is fine to go with the default setting of "Constant Gain." This setting ensures that both tracks will fade in or linearly (for more information, check out the *Audacity* [documentation on crossfades](http://manual.audacityteam.org/man/crossfade_tracks.html)) +*Audacity* will prompt you with options for your crossfade, but for now it is fine to go with the default setting of "Constant Gain." This setting ensures that both tracks will fade in or linearly (for more information, check out the *Audacity* [documentation on crossfades](https://manual.audacityteam.org/man/crossfade_tracks.html)) {% include figure.html filename="editing-audio-with-audacity-13.png" caption="Post-crossfade" %} @@ -157,6 +157,6 @@ By default, everything you do in *Audacity* is saved in the tool's own filetype, Doing so will mix the multiple tracks down to a single audio file and give you the opportunity to provide your work with metadata. -There are a range of different options for you to refine the exporting process, but the most important is "File Type." MP3 and Ogg are good options for audio meant to be displayed on the web, as they both compress the files so that they will be quicker to load. For best results, you can actually include both formats and only display the one as a fallback when one is not supported by a user's web browser. For more information, *NCH Software* provides a [good technical breakdown of the different options](http://www.nch.com.au/acm/formats.html), while Jonathan Sterne has done [fascinating work](https://www.dukeupress.edu/mp3) on the cultural implications of such format decisions. And the W3Schools offer a [good comparison](http://www.w3schools.com/html/html5_audio.asp) of these file formats for use in web development. +There are a range of different options for you to refine the exporting process, but the most important is "File Type." MP3 and Ogg are good options for audio meant to be displayed on the web, as they both compress the files so that they will be quicker to load. For best results, you can actually include both formats and only display the one as a fallback when one is not supported by a user's web browser. For more information, *NCH Software* provides a [good technical breakdown of the different options](https://www.nch.com.au/acm/formats.html), while Jonathan Sterne has done [fascinating work](https://www.dukeupress.edu/mp3) on the cultural implications of such format decisions. And the W3Schools offer a [good comparison](https://www.w3schools.com/html/html5_audio.asp) of these file formats for use in web development. Congratulations! You have successfully produced a baby podcast. It might not seem like much, but I frequently employ this same bag of tricks for presentations, websites, and scholarship. This lesson has by no means begun to exhaust the many topics under that umbrella. But it should have given you some basic tools useful to working with sound in digital humanities projects. diff --git a/en/lessons/exploring-and-analyzing-network-data-with-python.md b/en/lessons/exploring-and-analyzing-network-data-with-python.md index 3609734a2c..0db27c9f9f 100755 --- a/en/lessons/exploring-and-analyzing-network-data-with-python.md +++ b/en/lessons/exploring-and-analyzing-network-data-with-python.md @@ -50,7 +50,7 @@ In this tutorial, you will learn: This tutorial assumes that you have: - a basic familiarity with networks and/or have read ["From Hermeneutics to Data to Networks: Data Extraction and Network Visualization of Historical Sources"](/lessons/creating-network-diagrams-from-historical-sources) by Martin Düring here on *Programming Historian*; -- Installed Python 3, not the Python 2 that is installed natively in Unix-based operating systems such as Macs (If you need assistance installing Python 3, check out the [Hitchhiker's Guide to Python](http://docs.python-guide.org/en/latest/starting/installation/)); and +- Installed Python 3, not the Python 2 that is installed natively in Unix-based operating systems such as Macs (If you need assistance installing Python 3, check out the [Hitchhiker's Guide to Python](https://docs.python-guide.org/en/latest/starting/installation/)); and - Installed the `pip` package installer.[^pipinstall] It's possible to have two versions of Python (2 *and* 3) installed on your computer at one time. For this reason, when accessing Python 3 you will often have to explicitly declare it by typing `python3` and `pip3` instead of simply `python` and `pip`. Check out the *Programming Historian* tutorials on [installing Python](/lessons/introduction-and-installation) and [working with pip](/lessons/installing-python-modules-pip) for more information. @@ -69,7 +69,7 @@ This tutorial will help you answer questions such as: Before there were Facebook friends, there was the Society of Friends, known as the Quakers. Founded in England in the mid-seventeenth century, the Quakers were Protestant Christians who dissented from the official Church of England and promoted broad religious toleration, preferring Christians' supposed "inner light" and consciences to state-enforced orthodoxy. Quakers' numbers grew rapidly in the mid- to late-seventeenth century and their members spread through the British Isles, Europe, and the New World colonies---especially Pennsylvania, founded by Quaker leader William Penn and the home of your four authors. -Since scholars have long linked Quakers' growth and endurance to the effectiveness of their networks, the data used in this tutorial is a list of names and relationships among the earliest seventeenth-century Quakers. This dataset is derived from the *[Oxford Dictionary of National Biography](http://www.oxforddnb.com)* and from the ongoing work of the *[Six Degrees of Francis Bacon](http://www.sixdegreesoffrancisbacon.com)* project, which is reconstructing the social networks of early modern Britain (1500-1700). +Since scholars have long linked Quakers' growth and endurance to the effectiveness of their networks, the data used in this tutorial is a list of names and relationships among the earliest seventeenth-century Quakers. This dataset is derived from the *[Oxford Dictionary of National Biography](https://www.oxforddnb.com)* and from the ongoing work of the *[Six Degrees of Francis Bacon](https://www.sixdegreesoffrancisbacon.com)* project, which is reconstructing the social networks of early modern Britain (1500-1700). # Data Prep and NetworkX Installation @@ -330,7 +330,7 @@ The visualization below, created in network visualization tool [Gephi](https://g {% include figure.html filename="exploring-and-analyzing-network-data-with-python-1.png" caption="Force-directed network visualization of the Quaker data, created in Gephi" %} -There are lots of ways to visualize a network, and a [force-directed layout](https://en.wikipedia.org/wiki/Force-directed_graph_drawing), of which the above image is an example, is among the most common. Force-directed graphs attempt to find the optimum placement for nodes with a calculation based on the [tension of springs in Hooke's Law](http://6dfb.tumblr.com/post/159420498411/ut-tensio-sic-vis-introducing-the-hooke-graph), which for smaller graphs often creates clean, easy-to-read visualizations. The visualization embedded above shows you there is a single large **component** of connected nodes (in the center) and several small components with just one or two connections around the edges. This is a fairly common network structure. Knowing that there are multiple components in the network will usefully limit the calculations you'll want to perform on it. By displaying the number of connections (known as **degree**, see below) as the size of nodes, the visualization also shows that there are a few nodes with lots of connections that keep the central component tied together. These large nodes are known as **hubs**, and the fact that they show up so clearly here gives you a clue as to what you'll find when you measure **centrality** in the next section. +There are lots of ways to visualize a network, and a [force-directed layout](https://en.wikipedia.org/wiki/Force-directed_graph_drawing), of which the above image is an example, is among the most common. Force-directed graphs attempt to find the optimum placement for nodes with a calculation based on the [tension of springs in Hooke's Law](https://6dfb.tumblr.com/post/159420498411/ut-tensio-sic-vis-introducing-the-hooke-graph), which for smaller graphs often creates clean, easy-to-read visualizations. The visualization embedded above shows you there is a single large **component** of connected nodes (in the center) and several small components with just one or two connections around the edges. This is a fairly common network structure. Knowing that there are multiple components in the network will usefully limit the calculations you'll want to perform on it. By displaying the number of connections (known as **degree**, see below) as the size of nodes, the visualization also shows that there are a few nodes with lots of connections that keep the central component tied together. These large nodes are known as **hubs**, and the fact that they show up so clearly here gives you a clue as to what you'll find when you measure **centrality** in the next section. Visualizations, however, only get you so far. The more networks you work with, the more you realize most appear similar enough that it's hard to tell one from the next. Quantitative metrics let you differentiate networks, learn about their topologies, and turn a jumble of nodes and edges into something you can learn from. @@ -347,7 +347,7 @@ print("Network density:", density) The output of density is a number, so that's what you'll see when you print the value. In this case, the density of our network is approximately 0.0248. On a scale of 0 to 1, not a very dense network, which comports with what you can see in the visualization.[^density] A 0 would mean that there are no connections at all, and a 1 would indicate that all *possible* edges are present (a perfectly connected network): this Quaker network is on the lower end of that scale, but still far from 0. -A shortest path measurement is a bit more complex. It calculates the shortest possible series of nodes and edges that stand between any two nodes, something hard to see in large network visualizations. This measure is essentially finding friends-of-friends---if my mother knows someone that I don't, then mom is the shortest path between me and that person. The Six Degrees of Kevin Bacon game, from which [our project](http://sixdegreesoffrancisbacon.com/) takes its name, is basically a game of finding shortest paths (with a **path length** of six or less) from Kevin Bacon to any other actor. +A shortest path measurement is a bit more complex. It calculates the shortest possible series of nodes and edges that stand between any two nodes, something hard to see in large network visualizations. This measure is essentially finding friends-of-friends---if my mother knows someone that I don't, then mom is the shortest path between me and that person. The Six Degrees of Kevin Bacon game, from which [our project](https://sixdegreesoffrancisbacon.com/) takes its name, is basically a game of finding shortest paths (with a **path length** of six or less) from Kevin Bacon to any other actor. To calculate a shortest path, you'll need to pass several input variables (information you give to a Python function): the whole graph, your source node, and your target node. Let's find the shortest path between Margaret Fell and George Whitehead. Since we used names to uniquely identify our nodes in the network, you can access those nodes (as the **source** and **target** of your path), using the names directly. @@ -488,7 +488,7 @@ Another common thing to ask about a network dataset is what the subgroups or com Very dense networks are often more difficult to split into sensible partitions. Luckily, as you discovered earlier, this network is not all that dense. There aren't nearly as many actual connections as possible connections, and there are several altogether disconnected components. Its worthwhile partitioning this sparse network with modularity and seeing if the result make historical and analytical sense. -Community detection and partitioning in NetworkX requires a little more setup than some of the other metrics. There are some built-in approaches to community detection (like [minimum cut](https://networkx.github.io/documentation/stable/reference/algorithms/generated/networkx.algorithms.flow.minimum_cut.html), but modularity is not included with NetworkX. Fortunately there's an [additional python module](https://github.com/taynaud/python-louvain/) you can use with NetworkX, which you already installed and imported at the beginning of this tutorial. You can read the [full documentation](http://perso.crans.org/aynaud/communities/api.html) for all of the functions it offers, but for most community detection purposes you'll only want `best_partition()`: +Community detection and partitioning in NetworkX requires a little more setup than some of the other metrics. There are some built-in approaches to community detection (like [minimum cut](https://networkx.github.io/documentation/stable/reference/algorithms/generated/networkx.algorithms.flow.minimum_cut.html), but modularity is not included with NetworkX. Fortunately there's an [additional python module](https://github.com/taynaud/python-louvain/) you can use with NetworkX, which you already installed and imported at the beginning of this tutorial. You can read the [full documentation](https://perso.crans.org/aynaud/communities/api.html) for all of the functions it offers, but for most community detection purposes you'll only want `best_partition()`: ```python communities = community.greedy_modularity_communities(G) @@ -541,7 +541,7 @@ Working with NetworkX alone will get you far, and you can find out a lot about m # Exporting Data -NetworkX supports a very large number of file formats for [data export](https://networkx.github.io/documentation/stable/reference/readwrite/index.html). If you wanted to export a plaintext edgelist to load into Palladio, there's a [convenient wrapper](https://networkx.github.io/documentation/stable/reference/readwrite/generated/networkx.readwrite.edgelist.write_edgelist.html) for that. Frequently at *Six Degrees of Francis Bacon*, we export NetworkX data in [D3's specialized JSON format](https://networkx.github.io/documentation/stable/reference/readwrite/generated/networkx.readwrite.json_graph.node_link_data.html), for visualization in the browser. You could even [export](https://networkx.github.io/documentation/stable/reference/generated/networkx.convert_matrix.to_pandas_adjacency.html) your graph as a [Pandas dataframe](http://pandas.pydata.org/) if there were more advanced statistical operations you wanted to run. There are lots of options, and if you've been diligently adding all your metrics back into your Graph object as attributes, all your data will be exported in one fell swoop. +NetworkX supports a very large number of file formats for [data export](https://networkx.github.io/documentation/stable/reference/readwrite/index.html). If you wanted to export a plaintext edgelist to load into Palladio, there's a [convenient wrapper](https://networkx.github.io/documentation/stable/reference/readwrite/generated/networkx.readwrite.edgelist.write_edgelist.html) for that. Frequently at *Six Degrees of Francis Bacon*, we export NetworkX data in [D3's specialized JSON format](https://networkx.github.io/documentation/stable/reference/readwrite/generated/networkx.readwrite.json_graph.node_link_data.html), for visualization in the browser. You could even [export](https://networkx.github.io/documentation/stable/reference/generated/networkx.convert_matrix.to_pandas_adjacency.html) your graph as a [Pandas dataframe](https://pandas.pydata.org/) if there were more advanced statistical operations you wanted to run. There are lots of options, and if you've been diligently adding all your metrics back into your Graph object as attributes, all your data will be exported in one fell swoop. Most of the export options work in roughly the same way, so for this tutorial you'll learn how to export your data into Gephi's GEXF format. Once you've exported the file, you can upload it [directly into Gephi](https://gephi.org/users/supported-graph-formats/) for visualization. diff --git a/en/lessons/extracting-illustrated-pages.md b/en/lessons/extracting-illustrated-pages.md index 01f979fbc3..2b7d429ec0 100644 --- a/en/lessons/extracting-illustrated-pages.md +++ b/en/lessons/extracting-illustrated-pages.md @@ -35,7 +35,7 @@ To see how many *unillustrated* pages have been filtered out, compare against th {% include figure.html filename="parley-full-thumbnails.png" caption="View of HathiTrust thumbnails for all pages." %} -This lesson shows how complete these filtering and downloading steps for public-domain text volumes held by HathiTrust (HT) and Internet Archive (IA), two of the largest digital libraries in the world. It will be of interest to anyone who wants to create image corpora in order to learn about the history of illustration and the layout (*mise en page*) of books. Visual approaches to digital bibliography are becoming popular, following the pioneering efforts of [EBBA](https://ebba.english.ucsb.edu/) and [AIDA](http://projectaida.org/). Recently completed or funded projects explore ways to [identify footnotes](https://web.archive.org/web/20190526050917/http://culturalanalytics.org/2018/12/detecting-footnotes-in-32-million-pages-of-ecco/) and [track marginalia](http://www.ccs.neu.edu/home/dasmith/ichneumon-proposal.pdf), to give just two [examples](https://www.neh.gov/divisions/odh/grant-news/announcing-new-2017-odh-grant-awards). +This lesson shows how complete these filtering and downloading steps for public-domain text volumes held by HathiTrust (HT) and Internet Archive (IA), two of the largest digital libraries in the world. It will be of interest to anyone who wants to create image corpora in order to learn about the history of illustration and the layout (*mise en page*) of books. Visual approaches to digital bibliography are becoming popular, following the pioneering efforts of [EBBA](https://ebba.english.ucsb.edu/) and [AIDA](https://projectaida.org/). Recently completed or funded projects explore ways to [identify footnotes](https://web.archive.org/web/20190526050917/https://culturalanalytics.org/2018/12/detecting-footnotes-in-32-million-pages-of-ecco/) and [track marginalia](https://www.ccs.neu.edu/home/dasmith/ichneumon-proposal.pdf), to give just two [examples](https://www.neh.gov/divisions/odh/grant-news/announcing-new-2017-odh-grant-awards). My own research tries to answer empirical questions about changes in the frequency and mode of illustration in nineteenth-century medical and educational texts. This involves aggregating counts of pictures per book and trying to estimate what printing process was used to make those pictures. A more targeted use case for extracting picture pages might be the collation of illustrations across [different editions](https://www.cambridge.org/core/books/cambridge-companion-to-robinson-crusoe/iconic-crusoe-illustrations-and-images-of-robinson-crusoe/B83352C33FB1A9929A856FFA8E2D0CD0/core-reader) of the same book. Future work might profitably investigate the visual characteristics and *meaning* of the extracted pictures: their color, size, theme, genre, number of figures, and so on. @@ -116,7 +116,7 @@ Anaconda is the leading scientific Python distribution. Its `conda` package mana Download and install [Miniconda](https://conda.io/miniconda.html). Choose the latest stable release of Python 3. If everything goes well, you should be able to run `which conda` (linux/macOS) or `where conda` (Windows) in your shell and see the location of the executable program in the output. -Anaconda has a handy [cheat sheet](http://web.archive.org/web/20190115051900/https://conda.io/docs/_downloads/conda-cheatsheet.pdf) for frequently used commands. +Anaconda has a handy [cheat sheet](https://web.archive.org/web/20190115051900/https://conda.io/docs/_downloads/conda-cheatsheet.pdf) for frequently used commands. ### Create an Environment diff --git a/en/lessons/extracting-keywords.md b/en/lessons/extracting-keywords.md index c505eda463..6826d83719 100644 --- a/en/lessons/extracting-keywords.md +++ b/en/lessons/extracting-keywords.md @@ -30,7 +30,7 @@ doi: 10.46430/phen0045 ## Lesson Goals -If you have a copy of a text in electronic format stored on your computer, it is relatively easy to keyword search for a single term. Often you can do this by using the built-in search features in your favourite text editor. However, scholars are increasingly needing to find instances of many terms within a text or texts. For example, a scholar may want to use a [gazetteer](http://en.wikipedia.org/wiki/Gazetteer) to extract all mentions of English placenames within a collection of texts so that those places can later be plotted on a map. Alternatively, they may want to extract all male given names, all pronouns, [stop words](http://en.wikipedia.org/wiki/Stop_words), or any other set of words. Using those same built-in search features to achieve this more complex goal is time consuming and clunky. This lesson will teach you how to use Python to extract a set of keywords very quickly and systematically from a set of texts. +If you have a copy of a text in electronic format stored on your computer, it is relatively easy to keyword search for a single term. Often you can do this by using the built-in search features in your favourite text editor. However, scholars are increasingly needing to find instances of many terms within a text or texts. For example, a scholar may want to use a [gazetteer](https://en.wikipedia.org/wiki/Gazetteer) to extract all mentions of English placenames within a collection of texts so that those places can later be plotted on a map. Alternatively, they may want to extract all male given names, all pronouns, [stop words](https://en.wikipedia.org/wiki/Stop_words), or any other set of words. Using those same built-in search features to achieve this more complex goal is time consuming and clunky. This lesson will teach you how to use Python to extract a set of keywords very quickly and systematically from a set of texts. It is expected that once you have completed this lesson, you will be able to generalise the skills to extract custom sets of keywords from any set of locally saved files. @@ -38,7 +38,7 @@ It is expected that once you have completed this lesson, you will be able to gen This lesson is useful for anyone who works with historical sources that are stored locally on their own computer, and that are transcribed into mutable electronic text (eg, .txt, .xml, .rtf, .md). It is particularly useful for people interested in identifying subsets of documents containing one or more of a fairly large number of keywords. This might be useful for identifying a relevant subset for closer reading, or for extracting and structuring the keywords in a format that can be used in another tool: as input for a mapping exercise, for example. -The present tutorial will show users how to extract all mentions of English and Welsh county names from a series of 6,692 mini-biographies of individuals who began their studies at the University of Oxford during the reign of James I of England (1603-1625). These records were transcribed by [British History Online](http://www.british-history.ac.uk/alumni-oxon/1500-1714), from the printed version of *Alumni Oxonienses, 1500-1714*. These biographies contain information about each graduate, which includes the date of their studies and the college(s) they attended. Often entries contain additional information when known, including date or birth and death, the name or occupation of their father, where they originated, and what they went on to do in later life. The biographies are a rich resource, providing reasonably comparable data about a large number of similar individuals (rich men who went to Oxford). The 6,692 entries have been pre-processed by the author and saved to a [CSV file](http://en.wikipedia.org/wiki/Comma-separated_values) with one entry per row. +The present tutorial will show users how to extract all mentions of English and Welsh county names from a series of 6,692 mini-biographies of individuals who began their studies at the University of Oxford during the reign of James I of England (1603-1625). These records were transcribed by [British History Online](https://www.british-history.ac.uk/alumni-oxon/1500-1714), from the printed version of *Alumni Oxonienses, 1500-1714*. These biographies contain information about each graduate, which includes the date of their studies and the college(s) they attended. Often entries contain additional information when known, including date or birth and death, the name or occupation of their father, where they originated, and what they went on to do in later life. The biographies are a rich resource, providing reasonably comparable data about a large number of similar individuals (rich men who went to Oxford). The 6,692 entries have been pre-processed by the author and saved to a [CSV file](https://en.wikipedia.org/wiki/Comma-separated_values) with one entry per row. In this tutorial, the dataset involves geographical keywords. Once extracted, these placenames could be geo-referenced to their place on the globe and then mapped using digital mapping. This might make it possible to discern which colleges attracted students from what parts of the country, or to determine if these patterns changed over time. For a practical tutorial on taking this next step, see the lesson by Fred Gibbs mentioned at the end of this lesson. Readers may also be interested in [georeferencing in QGIS 2.0](/lessons/georeferencing-qgis), also available from the *Programming Historian*. @@ -64,7 +64,7 @@ The first step of this process is to take a look at the data that we will be usi Download the dataset and spend a couple of minutes looking at the types of information available. You should notice three columns of information. The first, 'Name', contains the name of the graduate. The second: 'Details', contains the biographical information known about that person. The final column, 'Matriculation Year', contains the year in which the person matriculated (began their studies). This final column was extracted from the details column in the pre-processing stage of this tutorial. The first two columns are as you would find them on the British History Online version of the *Alumni Oxonienses*. If you notice more than three columns then your spreadsheet programme has incorrectly set the [delimiter](https://en.wikipedia.org/wiki/Delimiter) between columns. It should be set to "," (double quotes, comma). How you do this depends on your spreadsheet programme, but you should be able to find the solution online. -Most (but not all) of these bibliographic entries contain enough information to tell us what county the graduate came from. Notice that a large number of entries contain placenames that correspond to either major cities ('of London', in the first entry) or English counties ('of Middlesex' in entry 5 or 'of Wilts' - short for Wiltshire in entry 6). If you are not British you may not be familiar with these county names. You can find a list of [historic counties of England](http://en.wikipedia.org/wiki/Historic_counties_of_England) on Wikipedia. +Most (but not all) of these bibliographic entries contain enough information to tell us what county the graduate came from. Notice that a large number of entries contain placenames that correspond to either major cities ('of London', in the first entry) or English counties ('of Middlesex' in entry 5 or 'of Wilts' - short for Wiltshire in entry 6). If you are not British you may not be familiar with these county names. You can find a list of [historic counties of England](https://en.wikipedia.org/wiki/Historic_counties_of_England) on Wikipedia. Unfortunately, the information is not always available in the same format. Sometimes it's the first thing mentioned in an entry. Sometimes it's in the middle. Our challenge is to extract those counties of origin from within this messy text, and store it in a new column next to that person's entry. @@ -116,9 +116,9 @@ Worcestershire Yorkshire ``` -Make sure that there are no blank lines in the gazetteer file. If there are, your program will think all spaces are a matching keyword. Some text editing programs (particularly in Linux) will want to add a blank line at the end of your file. If this is the case, try another text editor. It's best to use software that puts you in control. For more on this problem, see [the explanation on Stack Overflow](http://stackoverflow.com/questions/3056740/gedit-adds-line-at-end-of-file) - with thanks to John Levin for the link. +Make sure that there are no blank lines in the gazetteer file. If there are, your program will think all spaces are a matching keyword. Some text editing programs (particularly in Linux) will want to add a blank line at the end of your file. If this is the case, try another text editor. It's best to use software that puts you in control. For more on this problem, see [the explanation on Stack Overflow](https://stackoverflow.com/questions/3056740/gedit-adds-line-at-end-of-file) - with thanks to John Levin for the link. -If you ever need to add to this set of keywords, you can open this file in your text editor and add new words, each on their own line. Komodo Edit is a good text editor for this task, especially if you have set it up to run with Python, but you can also use any plain text editor as long as it is *not* a [word processor](http://en.wikipedia.org/wiki/Word_processor) such as Microsoft Word or Open Office. Word processors are inappropriate for writing code because of how they stylise apostrophes and quotes, causing havoc for your code. +If you ever need to add to this set of keywords, you can open this file in your text editor and add new words, each on their own line. Komodo Edit is a good text editor for this task, especially if you have set it up to run with Python, but you can also use any plain text editor as long as it is *not* a [word processor](https://en.wikipedia.org/wiki/Word_processor) such as Microsoft Word or Open Office. Word processors are inappropriate for writing code because of how they stylise apostrophes and quotes, causing havoc for your code. ## Loading your texts @@ -158,7 +158,7 @@ The first line is a comment for our own benefit, to tells us (the human) what th The second line opens the `gazetteer.txt` file, and reads it, which is signified by the 'r' (as opposed to 'w' for write, or 'a' for append). That means we will not be changing the contents of the file. Only reading it. -The third line reads everything in that file, converts it to `lower()` case, and splits the contents into a Python list, using the [newline character](http://stackoverflow.com/questions/11497376/new-line-python) as the delimiter. Effectively that means each time the program comes across a new line, it stores it as a new entry. We then save that Python list containing the 39 counties into a variable that we have called `allKeywords`. +The third line reads everything in that file, converts it to `lower()` case, and splits the contents into a Python list, using the [newline character](https://stackoverflow.com/questions/11497376/new-line-python) as the delimiter. Effectively that means each time the program comes across a new line, it stores it as a new entry. We then save that Python list containing the 39 counties into a variable that we have called `allKeywords`. The fourth line closes the open text file. The fifth line prints out the results, and the sixth line tells us how many results were found. @@ -182,7 +182,7 @@ python extractKeywords.py ``` -Once you have run the program you should see your gazetteer printed as a Python list in the command output, along with the number of entries in your list (39). If you can, great! Move on to step 2. If the last line of your output tells you that there was 1 result, that means the code has not worked properly, since we know that there should be 39 keywords in your gazetteer. Double check your code to make sure you havn't included any typos. If you still can't solve the problem, try changing "\n" to "\r" on line three. Some text editors will use [carriage returns](http://en.wikipedia.org/wiki/Carriage_return) instead of 'newline characters' when creating a new line. The \r means 'carriage return' and should solve your problem if you're experiencing one. +Once you have run the program you should see your gazetteer printed as a Python list in the command output, along with the number of entries in your list (39). If you can, great! Move on to step 2. If the last line of your output tells you that there was 1 result, that means the code has not worked properly, since we know that there should be 39 keywords in your gazetteer. Double check your code to make sure you havn't included any typos. If you still can't solve the problem, try changing "\n" to "\r" on line three. Some text editors will use [carriage returns](https://en.wikipedia.org/wiki/Carriage_return) instead of 'newline characters' when creating a new line. The \r means 'carriage return' and should solve your problem if you're experiencing one. ### Step 2: Load the texts @@ -206,7 +206,7 @@ If the code worked, you should see a big wall of text. Those are the texts we in ### Step 3: Remove unwanted punctuation -When matching strings, you have to make sure the punctuation doesn't get in the way. Technically, 'London.' is a different string than 'London' or ';London' because of the added punctuation. These three strings which all mean the same thing to us as human readers will be viewed by the computer as distinct entities. To solve that problem, the easiest thing to do is just to remove all of the punctuation. You can do this with [regular expressions](http://en.wikipedia.org/wiki/Regular_expression), and [Doug Knox](/lessons/understanding-regular-expressions) and [Laura Turner O'Hara](/lessons/cleaning-ocrd-text-with-regular-expressions) have provided great introductions at *Programming Historian* for doing so. +When matching strings, you have to make sure the punctuation doesn't get in the way. Technically, 'London.' is a different string than 'London' or ';London' because of the added punctuation. These three strings which all mean the same thing to us as human readers will be viewed by the computer as distinct entities. To solve that problem, the easiest thing to do is just to remove all of the punctuation. You can do this with [regular expressions](https://en.wikipedia.org/wiki/Regular_expression), and [Doug Knox](/lessons/understanding-regular-expressions) and [Laura Turner O'Hara](/lessons/cleaning-ocrd-text-with-regular-expressions) have provided great introductions at *Programming Historian* for doing so. To keep things simple, this program will just replace the most common types of punctuation with nothing instead (effectively deleting punctuation). @@ -441,7 +441,7 @@ There are a few extra lines of code here, but you didn't need to cut and paste a (Error: new-line character seen in unquoted field - do you need to open the file in universal-newline mode?). ``` -To solve this problem, open your CSV file in a spreadsheet program (eg., Excel) and 'Save As' and under format chose 'Windows Comma Separated (csv)'. This should solve the problem. To read more on this issue, see [Stack Overflow](http://stackoverflow.com/questions/17315635/csv-new-line-character-seen-in-unquoted-field-error) +To solve this problem, open your CSV file in a spreadsheet program (eg., Excel) and 'Save As' and under format chose 'Windows Comma Separated (csv)'. This should solve the problem. To read more on this issue, see [Stack Overflow](https://stackoverflow.com/questions/17315635/csv-new-line-character-seen-in-unquoted-field-error) --- @@ -565,4 +565,4 @@ This approach created longer and more complex code, but the result is a powerful ## Suggested Further Reading -Readers who have completed this lesson might be interested in then geo-referencing the output using the Google API and mapping the results. You can learn more about this process from Fred Gibbs's tutorial, [Extract and Geocode Placenames from a Text File](http://fredgibbs.net/tutorials/extract-geocode-placenames-from-text-file.html). This will let you visualise the practical outputs of this tutorial. Alternatively, readers may be interested in [Jim Clifford et. al's tutorial on georeferencing in QGIS 2.0](/lessons/georeferencing-qgis), an open source [GIS](https://en.wikipedia.org/wiki/Geographic_information_system) program. +Readers who have completed this lesson might be interested in then geo-referencing the output using the Google API and mapping the results. You can learn more about this process from Fred Gibbs's tutorial, [Extract and Geocode Placenames from a Text File](https://fredgibbs.net/tutorials/extract-geocode-placenames-from-text-file.html). This will let you visualise the practical outputs of this tutorial. Alternatively, readers may be interested in [Jim Clifford et. al's tutorial on georeferencing in QGIS 2.0](/lessons/georeferencing-qgis), an open source [GIS](https://en.wikipedia.org/wiki/Geographic_information_system) program. diff --git a/en/lessons/facial-recognition-ai-python.md b/en/lessons/facial-recognition-ai-python.md index d6df71d126..cc416eaaad 100644 --- a/en/lessons/facial-recognition-ai-python.md +++ b/en/lessons/facial-recognition-ai-python.md @@ -474,8 +474,8 @@ Additionally, there are several companies like [Roboflow](https://roboflow.com/) [^2]: Christina Kotchemidova, Why We Say “Cheese”: Producing the Smile in Snapshot Photography," *Critical Studies in Media Communication,* 22 no. 1 (2005): 2-25, [https://www.tandfonline.com/doi/abs/10.1080/0739318042000331853](https://www.tandfonline.com/doi/abs/10.1080/0739318042000331853). [^3]: Paul Viola and Michael Jones, "Rapid object detection using a boosted cascade of simple features," *Proceedings of the 2001 IEEE Computer Society Conference on Computer Vision and Pattern Recognition, CVPR 2001* (2001): 1-9, [https://ieeexplore.ieee.org/document/990517/authors#authors](https://ieeexplore.ieee.org/document/990517/authors#authors). [^4]: Taylor R. Wondergem and Mihaela Friedlmeier, "Gender and Ethnic Differences in Smiling: A Yearbook Photographs Analysis from Kindergarten Through 12th Grade," *Sex Roles* 67, no. 7-8 (2012): 403-411. [https://doi.org/10.1007/s11199-012-0158-y](https://doi.org/10.1007/s11199-012-0158-y) -[^5]: Joy Buolamwini and Timnit Gebru, "Gender Shades: Intersectional Accuracy Disparities in Commercial Gender Classification," *Proceedings of Machine Learning Research,* 81 (2018): 1–15, [http://proceedings.mlr.press/v81/buolamwini18a/buolamwini18a.pdf](https://perma.cc/F8JT-R9KA). -[^6]: Hu Han and Anil K. Jain, "Age, Gender and Race Estimation from Unconstrained Face Images," (2014) [http://biometrics.cse.msu.edu/Publications/Face/HanJain_UnconstrainedAgeGenderRaceEstimation_MSUTechReport2014.pdf](https://perma.cc/J95Z-89FQ). +[^5]: Joy Buolamwini and Timnit Gebru, "Gender Shades: Intersectional Accuracy Disparities in Commercial Gender Classification," *Proceedings of Machine Learning Research,* 81 (2018): 1–15, [https://proceedings.mlr.press/v81/buolamwini18a/buolamwini18a.pdf](https://perma.cc/F8JT-R9KA). +[^6]: Hu Han and Anil K. Jain, "Age, Gender and Race Estimation from Unconstrained Face Images," (2014) [https://biometrics.cse.msu.edu/Publications/Face/HanJain_UnconstrainedAgeGenderRaceEstimation_MSUTechReport2014.pdf](https://perma.cc/J95Z-89FQ). [^7]: Angela Wang and Olga Russakovsky, "Overwriting Pretrained Bias with Finetuning Data," *2023 IEEE/CVF International Conference on Computer Vision (ICCV), Paris, France* (2023): 3934-3945, [https://openaccess.thecvf.com/content/ICCV2023/papers/Wang_Overwriting_Pretrained_Bias_with_Finetuning_Data_ICCV_2023_paper.pdf](https://perma.cc/2TE4-ED6Z). [^8]: Mei Wang, Weihong Deng, *et al.*, "Racial Faces in-the-Wild: Reducing Racial Bias by Information Maximization Adaptation Network," *Proceedings of the 2019 IEEE Computer Society Conference on Computer Vision and Pattern Recognition, CVPR 2019* (2019): 692-702, [https://arxiv.org/pdf/1812.00194.pdf](https://perma.cc/Y2Y3-G7R9). [^9]: Claudia Goldin and Lawrence F. Katz, "Putting the “Co” in Education: Timing, Reasons, and Consequences of College Coeducation from 1835 to the Present," *Journal of Human Capital*, 5 no. 4 (2011): 377-417. diff --git a/en/lessons/fetch-and-parse-data-with-openrefine.md b/en/lessons/fetch-and-parse-data-with-openrefine.md index 5991a6a0c5..b10a1e7501 100755 --- a/en/lessons/fetch-and-parse-data-with-openrefine.md +++ b/en/lessons/fetch-and-parse-data-with-openrefine.md @@ -48,7 +48,7 @@ David Huynh, the creator of Freebase Gridworks (2009) which became GoogleRefine - more provisional / exploratory / experimental / playful than a database [^huynh] Refine is a unique tool that combines the power of databases and scripting languages into an interactive and user friendly visual interface. -Because of this flexibility it has been embraced by [journalists](https://www.propublica.org/nerds/item/using-google-refine-for-data-cleaning), [librarians](http://web.archive.org/web/20180129051941/http://data-lessons.github.io/library-openrefine/), [scientists](http://www.datacarpentry.org/OpenRefine-ecology-lesson/), and others needing to wrangle data from diverse sources and formats into structured information. +Because of this flexibility it has been embraced by [journalists](https://www.propublica.org/nerds/item/using-google-refine-for-data-cleaning), [librarians](https://web.archive.org/web/20180129051941/https://data-lessons.github.io/library-openrefine/), [scientists](https://www.datacarpentry.org/OpenRefine-ecology-lesson/), and others needing to wrangle data from diverse sources and formats into structured information. {% include figure.html filename="openrefine.png" caption="OpenRefine terminal and GUI" %} @@ -72,11 +72,11 @@ This lesson presents three examples demonstrating workflows to harvest and proce This example downloads a single web page and parses it into a structured table using Refine's built in functions. A similar workflow can be applied to a list of URLs, often generated by parsing another web page, creating a flexible web harvesting tool. -The raw data for this example is an HTML copy of Shakespeare's [Sonnets](http://www.gutenberg.org/ebooks/1105) from [Project Gutenberg](http://www.gutenberg.org/). +The raw data for this example is an HTML copy of Shakespeare's [Sonnets](https://www.gutenberg.org/ebooks/1105) from [Project Gutenberg](https://www.gutenberg.org/). Processing a book of poems into structured data enables new ways of reading text, allowing us to sort, manipulate, and connect with other information.
    -Please note that Project Gutenberg provides feeds to bulk download catalog data. +Please note that Project Gutenberg provides feeds to bulk download catalog data. Their public website should not be used for web scraping purposes. A copy of the HTML ebook is hosted on GitHub for this example to avoid redirects built in to the Gutenberg site.
    @@ -404,7 +404,7 @@ The url will open in a new tab, returning a JSON response. Fetch the URLs using *url* column by selecting *Edit column* > *Add column by fetching urls*. Name the new column "fetch" and click *OK*. -In a few seconds, the operation should complete and the *fetch* column will be filled with [JSON](http://www.json.org/) data. +In a few seconds, the operation should complete and the *fetch* column will be filled with [JSON](https://www.json.org/) data. ## Parse JSON to Get Items @@ -487,9 +487,9 @@ This workflow uses the HTTP GET protocol, meaning the query is encoded in the UR Instead, many API services used to enhance text data, such as [geocoding](https://en.wikipedia.org/wiki/Geocoding) or [named entity recognition](https://en.wikipedia.org/wiki/Named-entity_recognition), use HTTP POST to transfer information to the server for processing. GREL does not have a built in function to use this type of API. -However, the expression window language can be changed to [Jython](http://www.jython.org/), providing a more complete scripting environment where it is possible to implement a POST request. +However, the expression window language can be changed to [Jython](https://www.jython.org/), providing a more complete scripting environment where it is possible to implement a POST request. -> [Jython](http://www.jython.org/) is Python implemented for the Java VM and comes bundled with Refine. +> [Jython](https://www.jython.org/) is Python implemented for the Java VM and comes bundled with Refine. > This means [Python 2](https://docs.python.org/2.7/) scripts using the Standard Library can be written or loaded into the expression window, and Refine will apply them to each cell in the transformation. > The [official documentation](https://github.com/OpenRefine/OpenRefine/wiki/Jython) is sparse, but the built-in Jython can be extended with non-standard libraries using a [work around](https://github.com/OpenRefine/OpenRefine/wiki/Extending-Jython-with-pypi-modules). > @@ -538,8 +538,8 @@ The URL could be replaced with cell variables to construct a query similar to th ## POST Request Urllib2 will automatically send a POST if data is added to the request object. -For example, [Text Processing](http://text-processing.com/) provides natural language processing APIs based on [Python NLTK](http://www.nltk.org/). -The documentation for the [Sentiment Analysis service](http://text-processing.com/docs/sentiment.html) provides a base URL and the name of the key used for the data to be analyzed. +For example, [Text Processing](https://text-processing.com/) provides natural language processing APIs based on [Python NLTK](https://www.nltk.org/). +The documentation for the [Sentiment Analysis service](https://text-processing.com/docs/sentiment.html) provides a base URL and the name of the key used for the data to be analyzed. No authentication is required and 1,000 calls per day are free for non-commercial use.[^use] This type of API is often demonstrated using [curl](https://curl.haxx.se/) on the commandline. @@ -603,7 +603,7 @@ else: ## Compare Sentiment -To practice constructing a POST request, read the documentation for [Sentiment Tool](http://sentiment.vivekn.com/docs/api/), another free API. +To practice constructing a POST request, read the documentation for [Sentiment Tool](https://sentiment.vivekn.com/docs/api/), another free API. Find the service URL and data key necessary to modify the Jython pattern above. Create a new column from *first* named `sentiment2` and test the script. @@ -629,7 +629,7 @@ Archaic words and phrases contribute significantly to the sonnets' sentiment, ye While comparing the metrics is fascinating, neither is likely to produce quality results for this data set. Rather than an accurate sentiment, we might be surprised to find a quantifiable dissonance between the sonnet's English and our modern web usage. However, a model optimized to Shakespeare's words could be developed using more appropriate training data. -To learn more about classifiers and how to implement one, see Vilja Hulden's PH lesson ["Supervised Classification: The Naive Bayesian Returns to the Old Bailey"](/lessons/naive-bayesian) or Steven Bird, Ewan Klein, and Edward Loper's ["Learning to Classify Text"](http://www.nltk.org/book/ch06.html) in the [NTLK Book](http://www.nltk.org/book/). +To learn more about classifiers and how to implement one, see Vilja Hulden's PH lesson ["Supervised Classification: The Naive Bayesian Returns to the Old Bailey"](/lessons/naive-bayesian) or Steven Bird, Ewan Klein, and Edward Loper's ["Learning to Classify Text"](https://www.nltk.org/book/ch06.html) in the [NTLK Book](https://www.nltk.org/book/). Accessing data and services on the web opens new possibilities and efficiencies for humanities research. While powerful, these APIs are often not aimed at humanities scholarship and may not be appropriate or optimized for our inquiries. @@ -642,7 +642,7 @@ We can critically evaluate data sources, algorithms, and API services, as well a With its unique ability to interactively wrangle data from raw aggregation to analysis, Refine supports exploratory research and offers a wonderfully fluid and playful approach to tabular data. OpenRefine is a flexible, pragmatic tool that simplifies routine tasks and, when combined with domain knowledge, extends research capabilities. -[^huynh]: David Huynh, "Google Refine", Computer-Assisted Reporting Conference 2011, [http://web.archive.org/web/20150528125345/http://davidhuynh.net/spaces/nicar2011/tutorial.pdf](http://web.archive.org/web/20150528125345/http://davidhuynh.net/spaces/nicar2011/tutorial.pdf). -[^use]: As of July 2017, see [API Documentation](http://text-processing.com/docs/index.html). -[^1]: Jacob Perkins, "Sentiment Analysis with Python NLTK Text Classification", [http://text-processing.com/demo/sentiment/](http://text-processing.com/demo/sentiment/). +[^huynh]: David Huynh, "Google Refine", Computer-Assisted Reporting Conference 2011, [https://web.archive.org/web/20150528125345/https://davidhuynh.net/spaces/nicar2011/tutorial.pdf](https://web.archive.org/web/20150528125345/https://davidhuynh.net/spaces/nicar2011/tutorial.pdf). +[^use]: As of July 2017, see [API Documentation](https://text-processing.com/docs/index.html). +[^1]: Jacob Perkins, "Sentiment Analysis with Python NLTK Text Classification", [https://text-processing.com/demo/sentiment/](https://text-processing.com/demo/sentiment/). [^2]: Vivek Narayanan, Ishan Arora, and Arjun Bhatia, "Fast and accurate sentiment classification using an enhanced Naive Bayes model", 2013, [arXiv:1305.6143](https://arxiv.org/abs/1305.6143). diff --git a/en/lessons/from-html-to-list-of-words-1.md b/en/lessons/from-html-to-list-of-words-1.md index cd5190694f..b0c5c5b61e 100755 --- a/en/lessons/from-html-to-list-of-words-1.md +++ b/en/lessons/from-html-to-list-of-words-1.md @@ -251,10 +251,10 @@ that’s ok! - programming-historian-2 ([zip][]) [/lessons/working-with-web-pages]: /lessons/working-with-web-pages - [Benjamin Bowsey’s 1780 criminal trial transcript]: http://www.oldbaileyonline.org/browse.jsp?id=t17800628-33&div=t17800628-33 - [HTML]: http://www.oldbaileyonline.org/browse.jsp?id=t17800628-33-defend448&div=t17800628-33 - [XML]: http://www.oldbaileyonline.org/browse.jsp?foo=bar&path=sessionsPapers/17800628.xml&div=t17800628-33&xml=yes - [1]: http://www.w3schools.com/html/ + [Benjamin Bowsey’s 1780 criminal trial transcript]: https://www.oldbaileyonline.org/browse.jsp?id=t17800628-33&div=t17800628-33 + [HTML]: https://www.oldbaileyonline.org/browse.jsp?id=t17800628-33-defend448&div=t17800628-33 + [XML]: https://www.oldbaileyonline.org/browse.jsp?foo=bar&path=sessionsPapers/17800628.xml&div=t17800628-33&xml=yes + [1]: https://www.w3schools.com/html/ [zip file from the previous lesson here.]: /lessons/manipulating-strings-in-python#code-syncing [Manipulating Strings in Python]: /lessons/manipulating-strings-in-python [Code Reuse and Modularity]: /lessons/code-reuse-and-modularity diff --git a/en/lessons/from-html-to-list-of-words-2.md b/en/lessons/from-html-to-list-of-words-2.md index 3485dcb6b0..4771d8f2b1 100755 --- a/en/lessons/from-html-to-list-of-words-2.md +++ b/en/lessons/from-html-to-list-of-words-2.md @@ -350,7 +350,7 @@ to make sure you have the correct code. - python-lessons3.zip ([zip sync][]) [From HTML to a List of Words (part 1)]: /lessons/from-html-to-list-of-words-1 - [integer]: http://docs.python.org/2.4/lib/typesnumeric.html - [types]: http://docs.python.org/3/library/types.html + [integer]: https://docs.python.org/2.4/lib/typesnumeric.html + [types]: https://docs.python.org/3/library/types.html [zip]: /assets/python-lessons2.zip [zip sync]: /assets/python-lessons3.zip diff --git a/en/lessons/generating-an-ordered-data-set-from-an-OCR-text-file.md b/en/lessons/generating-an-ordered-data-set-from-an-OCR-text-file.md index e55e15e258..a323d75c84 100755 --- a/en/lessons/generating-an-ordered-data-set-from-an-OCR-text-file.md +++ b/en/lessons/generating-an-ordered-data-set-from-an-OCR-text-file.md @@ -37,13 +37,13 @@ It is often the case that historians involved in digital projects wish to work w 2. Even if you had such an army of helpers, proof-reading the OCR output of, say, a collection of twelfth century Italian charters transcribed and published in 1935, will quickly drive them all mad, make their eyes bleed, and the result will still be a great wad of text containing a great many errors, and you will __still__ have to do __something__ to it before it becomes useful in any context. -Going through a text file line by line and correcting OCR errors one at a time is hugely error-prone, as any proof reader will tell you. There are ways to automate some of this tedious work. A scripting language like Perl or Python can allow you to search your OCR output text for common errors and correct them using "Regular Expressions", a language for describing patterns in text. (So called because they express a ["regular language"](http://en.wikipedia.org/wiki/Regular_language). See L.T. O'Hara's [tutorial on Regular Expressions](/lessons/cleaning-ocrd-text-with-regular-expressions.html) here at the PM.) Regular Expressions, however, are only useful if the expressions you are searching for are ... well ... regular. Unfortunately, much of what you have in OCR output is highly *irregular*. If you could impose some order on it: create an ordered data set out of it, your Regular Expression tools would become much more powerful. +Going through a text file line by line and correcting OCR errors one at a time is hugely error-prone, as any proof reader will tell you. There are ways to automate some of this tedious work. A scripting language like Perl or Python can allow you to search your OCR output text for common errors and correct them using "Regular Expressions", a language for describing patterns in text. (So called because they express a ["regular language"](https://en.wikipedia.org/wiki/Regular_language). See L.T. O'Hara's [tutorial on Regular Expressions](/lessons/cleaning-ocrd-text-with-regular-expressions.html) here at the PM.) Regular Expressions, however, are only useful if the expressions you are searching for are ... well ... regular. Unfortunately, much of what you have in OCR output is highly *irregular*. If you could impose some order on it: create an ordered data set out of it, your Regular Expression tools would become much more powerful. Consider, for example, what happens if your OCR interpreted a lot of strings like this "21 July, 1921" as "2l July, 192l", turning the integer '1' into an 'l'. You would love to be able to write a search and replace script that would turn all instances of 2l into 21, but then what would happen if you had lots of occurrences of strings like this in your text: "2lb. hammer". You'd get a bunch of 21b. hammers; not what you want. If only you could tell your script: only change 2l into 21 in sections where there are dates, not weights. If you had an ordered data set, you could do things like that. Very often the texts that historians wish to digitize are, in fact, ordered data sets: ordered collections of primary source documents, or a legal code say, or a cartulary. But the editorial structure imposed upon such resources is usually designed for a particular kind of data retrieval technology i.e., a codex, a book. For a digitized text you need a different kind of structure. If you can get rid of the book related infrastructure and reorganize the text according to the sections and divisions that you're interested in, you will wind up with data that is much easier to do search and replace operations on, and as a bonus, your text will become immediately useful in a variety of other contexts as well. -This is where a scripting language like Python comes very much in handy. For our project we wanted to prepare some of the documents from a [12th century collection of *imbreviatura*](http://www.worldcat.org/oclc/17591390) from the Italian scribe known as Giovanni Scriba (you can [access the PDF here](https://notariorumitinera.eu/Docs/Biblioteca_Digitale/SB/3a47488c28eef2aedfea52ebbde2c634/dd361cb1479ab2309f5ceef1f875c2a5.pdf)) so that they could be marked up by historians for subsequent NLP analysis or potentially for other purposes as well. The pages of the 1935 published edition look like this. +This is where a scripting language like Python comes very much in handy. For our project we wanted to prepare some of the documents from a [12th century collection of *imbreviatura*](https://www.worldcat.org/oclc/17591390) from the Italian scribe known as Giovanni Scriba (you can [access the PDF here](https://notariorumitinera.eu/Docs/Biblioteca_Digitale/SB/3a47488c28eef2aedfea52ebbde2c634/dd361cb1479ab2309f5ceef1f875c2a5.pdf)) so that they could be marked up by historians for subsequent NLP analysis or potentially for other purposes as well. The pages of the 1935 published edition look like this. {% include figure.html filename="gs_pg110.png" caption="GS page 110" %} @@ -156,7 +156,7 @@ Unfortunately, regular expressions won't help you much here. This text can appea IL CIRTOL.'RE DI G:OV.I\N( sca:FR 339 342 NI .\ßlO CHIAUDANO 9LtTTIA MORESCO -These strings are not regular enough to reliably find with regular expressions; however, if you know what the strings are *supposed* to look like, you can compose some kind of string similarity algorithm to test each string against an exemplar and measure the likelihood that it is a page header. Fortunately, I didn't have to compose such an algorithm, Vladimir Levenshtein did it for us in 1965 (see: ). A computer language can encode this algorithm in any number of ways; here's an effective Python function that will work for us: +These strings are not regular enough to reliably find with regular expressions; however, if you know what the strings are *supposed* to look like, you can compose some kind of string similarity algorithm to test each string against an exemplar and measure the likelihood that it is a page header. Fortunately, I didn't have to compose such an algorithm, Vladimir Levenshtein did it for us in 1965 (see: ). A computer language can encode this algorithm in any number of ways; here's an effective Python function that will work for us: ```python @@ -823,7 +823,7 @@ Note that the `try: except:` blocks come to the rescue again here. The loop abov > NOTA BENE: Again, bear in mind that we are modifying a data structure in memory rather than editing successive text files. So this loop should be __added__ to your script __below__ the summary and marginal loop, which is __below__ the loop that created your skeleton dictionary. ## Parse Dates and add to the dictionary -Dates are hard. Students of British history cling to [Cheyney](http://www.worldcat.org/oclc/41238508) as to a spar on a troubled ocean. And, given the way the Gregorian calendar was adopted so gradually, and innumerable other local variations, correct date reckoning for medieval sources will always require care and local knowledge. Nevertheless, here too Python can be of some help. +Dates are hard. Students of British history cling to [Cheyney](https://www.worldcat.org/oclc/41238508) as to a spar on a troubled ocean. And, given the way the Gregorian calendar was adopted so gradually, and innumerable other local variations, correct date reckoning for medieval sources will always require care and local knowledge. Nevertheless, here too Python can be of some help. Our Italian summary line invariably contains a date drawn from the text, and it's conveniently set off from the rest of the line by parentheses. So we can parse them and create Python `date` objects. Then, if we want, we can do some simple calendar arithmetic. @@ -850,7 +850,7 @@ for ch in charters: Once you're satisfied that all the parenthetical date expressions are present and correct, and conform to your regular expression, you can parse them and add them to your data structure as dates rather than just strings. For this you can use the `datetime` module. -This module is part of the standard library, is a deep subject, and ought to be the subject of its own tutorial, given the importance of dates for historians. As with a lot of other python modules, a good introduction is Doug Hellmann's [PyMOTW](https://pymotw.com/3/datetime/index.html)(module of the week). An even more able extension library is [mxDateTime](http://www.egenix.com/products/python/mxBase/mxDateTime/). Suffice it here to say that the `datetime.date` module expects parameters like this: +This module is part of the standard library, is a deep subject, and ought to be the subject of its own tutorial, given the importance of dates for historians. As with a lot of other python modules, a good introduction is Doug Hellmann's [PyMOTW](https://pymotw.com/3/datetime/index.html)(module of the week). An even more able extension library is [mxDateTime](https://www.egenix.com/products/python/mxBase/mxDateTime/). Suffice it here to say that the `datetime.date` module expects parameters like this: ```python >>> from datetime import date @@ -957,7 +957,7 @@ Print out our resulting dictionary using `pprint(charters)` and you'll see somet } ``` -Printing out your Python dictionary as a literal string is not a bad thing to do. For a text this size, the resulting file is perfectly manageable, can be mailed around usefully and read into a python repl session very simply using `eval()`, or pasted directly into a Python module file. On the other hand, if you want an even more reliable way to serialize it in an exclusively Python context, look into [`Pickle`](https://docs.python.org/3.7/library/pickle.html). If you need to move it to some other context, JavaScript for example, or some `RDF` triple stores, Python's [`json`](https://docs.python.org/3.7/library/json.html#module-json) module will translate effectively. If you have to get some kind of XML output, I will be very sorry for you, but the [`lxml`](http://lxml.de/) python module may ease the pain a little. +Printing out your Python dictionary as a literal string is not a bad thing to do. For a text this size, the resulting file is perfectly manageable, can be mailed around usefully and read into a python repl session very simply using `eval()`, or pasted directly into a Python module file. On the other hand, if you want an even more reliable way to serialize it in an exclusively Python context, look into [`Pickle`](https://docs.python.org/3.7/library/pickle.html). If you need to move it to some other context, JavaScript for example, or some `RDF` triple stores, Python's [`json`](https://docs.python.org/3.7/library/json.html#module-json) module will translate effectively. If you have to get some kind of XML output, I will be very sorry for you, but the [`lxml`](https://lxml.de/) python module may ease the pain a little. ## Order from disorder, huzzah. Now that we have an ordered data structure, we can do many things with it. As a very simple example, let's append some code that just prints `charters` out as html for display on a web-site: @@ -1049,7 +1049,7 @@ Being able to do this with your, still mostly uncorrected, OCR output is not a t And, our original problem, OCR cleanup, is now much more tractable because we can target regular expressions for the specific sorts of metadata we have: errors in the Italian summary or in the Latin text? Or we could design search-and-replace routines just for specific charters, or groups of charters. -Beyond this though, there's lots you can do with an ordered data set, including feeding it back through a markup tool like the [brat](http://brat.nlplab.org) as we did for the ChartEx project. Domain experts can then start adding layers of semantic tagging even if you don't do any further OCR error correction. Moreover, with an ordered dataset we can get all sorts of output, some other flavor of XML (if you must) for example: TEI (Text Encoding Initiative), or EAD (Encoded Archival Description). Or you could read your dataset directly into a relational database, or some kind of key/value store. All of these things are essentially impossible if you're working simply with a plain text file. +Beyond this though, there's lots you can do with an ordered data set, including feeding it back through a markup tool like the [brat](https://brat.nlplab.org) as we did for the ChartEx project. Domain experts can then start adding layers of semantic tagging even if you don't do any further OCR error correction. Moreover, with an ordered dataset we can get all sorts of output, some other flavor of XML (if you must) for example: TEI (Text Encoding Initiative), or EAD (Encoded Archival Description). Or you could read your dataset directly into a relational database, or some kind of key/value store. All of these things are essentially impossible if you're working simply with a plain text file. The bits of code above are in no way a turn-key solution for cleaning arbitrary OCR output. There is no such magic wand. The Google approach to scanning the contents of research libraries threatens to drown us in an ocean of bad data. Worse, it elides a fundamental fact of digital scholarship: digital sources are hard to get. Reliable, flexible, and useful digital texts require careful redaction and persistent curation. Google, Amazon, Facebook, *et alia* do not have to concern themselves with the quality of their data, just its quantity. Historians, on the other hand, must care first for the integrity of their sources. diff --git a/en/lessons/geocoding-qgis.md b/en/lessons/geocoding-qgis.md index d39aac97ad..cf610b3f1f 100755 --- a/en/lessons/geocoding-qgis.md +++ b/en/lessons/geocoding-qgis.md @@ -37,7 +37,7 @@ Many types of sources used by historians are inherently spatial. For example: - Imports and exports - Routes and itineraries -In this tutorial, you will learn how to 'geocode' historial data containing placenames (towns, counties, countries, etc), thus making them mappable using [QGIS](http://www.qgis.org/en/site/), a digital mapping software suite. This will allow you to: +In this tutorial, you will learn how to 'geocode' historial data containing placenames (towns, counties, countries, etc), thus making them mappable using [QGIS](https://www.qgis.org/en/site/), a digital mapping software suite. This will allow you to: - Display your data as a map (whether it originated as a list, table, or prose) - Analyse distances between locations in your data @@ -75,9 +75,9 @@ This tutorial was prepared using QGIS 2.14 'Essen' on Mac OS X 10.11. Menus, win You will also need to use a relational database such as Microsoft Access or [LibreOffice Base](https://www.libreoffice.org/get-help/install-howto/), or alternatively be very proficient with spreadsheets. The instructions in the tutorial are designed for use with LibreOffice Base, which is a free download as part of the [LibreOffice](https://www.libreoffice.org/get-help/install-howto/) suite. -**NB** LibreOffice requires a full installation of Java in order to use the Base application. This is achieved most easily by downloading and installing the Java 8 Development Kit for your operating system from [Oracle](http://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.html). The Java 8 Runtime Environment does NOT work with LibreOffice on Mac OS X 10.11. +**NB** LibreOffice requires a full installation of Java in order to use the Base application. This is achieved most easily by downloading and installing the Java 8 Development Kit for your operating system from [Oracle](https://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.html). The Java 8 Runtime Environment does NOT work with LibreOffice on Mac OS X 10.11. -The tutorial will map the data extracted from [*Alumni Oxonienses*](http://www.british-history.ac.uk/alumni-oxon/1500-1714) in the *Programming Historian* lesson [Using Gazetteers to Extract Sets of Keywords from Free-Flowing Texts](/lessons/extracting-keywords) using publically available maps of English and Welsh historic counties. If you complete that tutorial first it will help you to understand the nature of the data which is being mapped here. These data are provided as both a full dataset and also a separate file which is a summary of the numbers of Oxford alumni by their county of origin, created from the first file using an Excel PivotTable. +The tutorial will map the data extracted from [*Alumni Oxonienses*](https://www.british-history.ac.uk/alumni-oxon/1500-1714) in the *Programming Historian* lesson [Using Gazetteers to Extract Sets of Keywords from Free-Flowing Texts](/lessons/extracting-keywords) using publically available maps of English and Welsh historic counties. If you complete that tutorial first it will help you to understand the nature of the data which is being mapped here. These data are provided as both a full dataset and also a separate file which is a summary of the numbers of Oxford alumni by their county of origin, created from the first file using an Excel PivotTable. # The Data @@ -116,9 +116,9 @@ In this short tutorial we will map the total numbers of early modern University * Set up a new Project file in QGIS and save it in your choice of location. (*NB.* QGIS defaults to saving 'relative pathnames' which means that as long as you save all of your project files in the same folder or its subfolders, you can move it to a different location, such as a USB stick. You can check this setting via the menu `Project>Project Properties` and the `General` side tab). * It is very important to set the [Coordinate Reference System](https://en.wikipedia.org/wiki/Spatial_reference_system) (CRS) to one that suits the data you will import, and the location you plan to map. Go to the menu `Project>Project Properties` and select the 'CRS' tab at the side. First select ‘Enable on the fly CRS transformation’ at the top of this window then use the filter box to find and select `OSGB 1936 / the British National Grid` with the authority ID `ESPG:27700` from under the projected coordinate systems heading. -There is an important distinction between Geographic Coordinate Systems, which simply define measurement units and the datum, and Projected Coordinate Systems, which also define the way in which the globe is ‘flattened’ onto a map. [OSGB](https://en.wikipedia.org/wiki/Ordnance_Survey_National_Grid) is available in both variants in QGIS, so choose the 'projected' version to get a map in which the United Kingdom appears the shape you would expect. For more details on projections in GIS, see the [Working with Projections in QGIS Tutorial](http://www.qgistutorials.com/en/docs/3/working_with_projections.html). +There is an important distinction between Geographic Coordinate Systems, which simply define measurement units and the datum, and Projected Coordinate Systems, which also define the way in which the globe is ‘flattened’ onto a map. [OSGB](https://en.wikipedia.org/wiki/Ordnance_Survey_National_Grid) is available in both variants in QGIS, so choose the 'projected' version to get a map in which the United Kingdom appears the shape you would expect. For more details on projections in GIS, see the [Working with Projections in QGIS Tutorial](https://www.qgistutorials.com/en/docs/3/working_with_projections.html). -* Download a Shapefile containing polygons of the historic counties of England and Wales from [http://www.county-borders.co.uk](http://www.county-borders.co.uk/) (choose the file `Definition A: SHP OSGB36 Simplified` which is a version of the pre-1843 county boundaries of Great Britain projected on the OS National Grid, without detached portions of counties). Unzip the contents of the ZIP file in the same folder as your project file +* Download a Shapefile containing polygons of the historic counties of England and Wales from [https://www.county-borders.co.uk](https://www.county-borders.co.uk/) (choose the file `Definition A: SHP OSGB36 Simplified` which is a version of the pre-1843 county boundaries of Great Britain projected on the OS National Grid, without detached portions of counties). Unzip the contents of the ZIP file in the same folder as your project file * Click the `Add Vector Layer` button (looks like a line graph) from the Manage Layers toolbar and then `Browse` to select and add the Shapefile `UKDefinitionA.shp` from within the folder you’ve unzipped. {% include figure.html filename="QGISFigureAddVector.png" caption="Figure 1: The QGIS Add Vector window on MacOS (the Add Vector button is circled on the left hand toolbar)" %} @@ -142,7 +142,7 @@ This data can now be shown as a [choropleth map](https://en.wikipedia.org/wiki/C {% include figure.html filename="QGISFigure2.png" caption="Figure 3: The vector layer Styles tab showing classified values based on the field joined from the table" %} -For more information on choosing the correct classification method for your data, start by looking at this article on [Classification in GIS](http://wiki.gis.com/wiki/index.php/Classification). Examine the results of your map and think about what is actually being represented. Are the raw numbers of alumni, coloured according to the same classes, for very differently sized counties, helpful? Choropleth maps should normally display data that has been normalised in some way, for example showing population density, rather than raw population. +For more information on choosing the correct classification method for your data, start by looking at this article on [Classification in GIS](https://wiki.gis.com/wiki/index.php/Classification). Examine the results of your map and think about what is actually being represented. Are the raw numbers of alumni, coloured according to the same classes, for very differently sized counties, helpful? Choropleth maps should normally display data that has been normalised in some way, for example showing population density, rather than raw population. You may wish to experiment with the Expression Builder (accessed via the ∑ symbol next to `Column` in `Properties>Style`) to normalise these values using other columns and values that are available to you. Ideally we might normalise by population, but in the absence of this data, you might experiment by using the `$area` property, which is intrinsic to polygon shape layers in GIS. The very simple expression needed to create a map colour ramp on this would be (note that the field name contains spaces, so needs to be contained within double quotation marks): @@ -156,7 +156,7 @@ When you alter any of these settings within the graduated style page you will ne Geocoding is a much more powerful technique than simple table joins because each and every line of your data remains visible and able to be analysed within the GIS software as an individual point on the map (as in table 2). Fundamentally the aim is to join each item of data to a pair of coordinates. Most historical data cannot be geocoded automatically using online tools or QGIS plugins. The geocoding process must therefore be carried out manually to match each data row with a location. This is a simple database operation joining (matching) your data with a gazetteer (a list of places with coordinates). Many gazetteers are available, but relatively few are suitable for use with historical data, for example, for England: -- [Association of British Counties Gazetteer](http://www.gazetteer.org.uk/index.php) (data available to purchase) +- [Association of British Counties Gazetteer](https://www.gazetteer.org.uk/index.php) (data available to purchase) - [The Historical Gazetteer of England's Place Names](https://www.placenames.org.uk/) allows you to geocode individual locations online only, unfortunately the API service for accessing this data for use in automated geocoding, known as DEEP, part of Unlock, has now (late 2016) been withdrawn. A better browsing interface is available for those with UK Higher Education logins at the [Survey of English Place-Names](https://epns.nottingham.ac.uk/browse) If no gazetteer exists for the area or period that you are studying, you can make your own relatively simply from a vector map by creating a point layer containing the information that you require within QGIS (potentially by combining information from other existing layers) and exporting that complete with XY coordinates. For some parts of the world there are neither historical gazetters, nor vector maps suitable for historical periods, in these cases you will have to investigate creating your own vector and point layer; see the tutorial [Creating New Vector Layers in QGIS 2.0](/lessons/vector-layers-qgis). @@ -166,7 +166,7 @@ If no gazetteer exists for the area or period that you are studying, you can mak If you have completed the first part, you can carry on and follow the steps below in the same project. If you did not, or you want to start a new clean project, follow the instructions from the first section to: * Set up a new Project file in QGIS, and set the Coordinate Reference System to `OSGB 1936/the British National Grid` with the authority ID `ESPG:27700` as a projected coordinate system using `Project>Project Properties>CRS` -* Download a Shapefile containing polygons of the historic counties of England and Wales from [http://www.county-borders.co.uk/](http://www.county-borders.co.uk/) (choose definition A and the OS National Grid). +* Download a Shapefile containing polygons of the historic counties of England and Wales from [https://www.county-borders.co.uk/](https://www.county-borders.co.uk/) (choose definition A and the OS National Grid). Using your existing project, you can now start to add more layers to create your gazetteer: @@ -188,7 +188,7 @@ This data can now be matched against your existing data to complete the geocodin We can now create a composite table of these locations and the data from our original table. This is created by matching the name of the county in the 'place' field of the alumni table with its equivalent in the new gazetteer using a relational database. This tutorial assumes that you have many hundreds or thousands or rows of data (as we do in this tutorial), requiring an automated method. If you only have a few rows, or you have difficulties using these methods, it is possible to do it manually - see 'Geocoding your own Historical Data' below. -In simple scenarios (such as this one where we are only matching a single 'place' attribute – i.e. only 'county') it is possible to code your data to a gazetteer using the [VLOOKUP](https://support.office.com/en-gb/article/VLOOKUP-function-0bbc8083-26fe-4963-8ab8-93a18ad188a1) function in Microsoft Excel (or equivalent spreadsheets) or even using the [MMQGIS](http://michaelminn.com/linux/mmqgis/) plugin within QGIS. However, in most practical scenarios you will probably wish to match on several attributes simultaneously (for instance town, county and country – you would want to distinguish between Sudbury, Suffolk, England; Sudbury, Derbyshire, England; Sudbury, Middlesex, England; and Sudbury, Ontario, Canada). This can be achieved in a somewhat cumbersome way using the [INDEX](https://support.office.com/en-gb/article/INDEX-function-a5dcf0dd-996d-40a4-a822-b56b061328bd) function in Excel, but is more practical, and extensible, in a relational database such as Microsoft Access or LibreOffice Base. +In simple scenarios (such as this one where we are only matching a single 'place' attribute – i.e. only 'county') it is possible to code your data to a gazetteer using the [VLOOKUP](https://support.office.com/en-gb/article/VLOOKUP-function-0bbc8083-26fe-4963-8ab8-93a18ad188a1) function in Microsoft Excel (or equivalent spreadsheets) or even using the [MMQGIS](https://michaelminn.com/linux/mmqgis/) plugin within QGIS. However, in most practical scenarios you will probably wish to match on several attributes simultaneously (for instance town, county and country – you would want to distinguish between Sudbury, Suffolk, England; Sudbury, Derbyshire, England; Sudbury, Middlesex, England; and Sudbury, Ontario, Canada). This can be achieved in a somewhat cumbersome way using the [INDEX](https://support.office.com/en-gb/article/INDEX-function-a5dcf0dd-996d-40a4-a822-b56b061328bd) function in Excel, but is more practical, and extensible, in a relational database such as Microsoft Access or LibreOffice Base. This tutorial uses LibreOffice, which is an Open Source alternative to Microsoft Office and is available for Windows, Mac OS X and all variants of Linux etc (NB it requires a full Java installation). It includes a relational database application on all platforms, unlike Microsoft Access which is available only in the Windows version of Office. However, it is quite restricted in its functionality. If you use Microsoft Access, or are a very proficient spreadsheet user, please feel free to replicate this process using your preferred software. @@ -244,7 +244,7 @@ A more useful way of depicting the geocoded data is to use QGIS's advanced displ You have now completed the geocoding process, and can enjoy the advantages of being able to analyse this inherently spatial historical data in a spatial way. In a real world scenario, you would probably only geocode data which is more precise than simple county level, giving a good deal more analytical potential and making maps plotted more meaningful. Where you have data which can be geocoded to a high – and crucially consistent – level of precision, it is possible to conduct a wide range of geographical analyses such as measures of clustering or distances. -For example, you can easily tweak and refine which records are mapped by changing the definition query in the properties of your geocoded layer (Right click on `GeocodedAlumni` in Layers Panel and select `Layer Properties>General>Provider Feature Filter>Query Builder`). You can use the less than or greater than operators to define years and see if trends change over time, or use the [SQL LIKE](http://www.w3schools.com/sql/sql_like.asp) statement to query the ‘details’ column to filter particular colleges – did they tend to attract students from particular counties? These queries use standard [SQL language](http://www.w3schools.com/sql/) and can be combined with `AND`, `NOT` etc. This example would select only those students who had matriculated at Magdalen College: +For example, you can easily tweak and refine which records are mapped by changing the definition query in the properties of your geocoded layer (Right click on `GeocodedAlumni` in Layers Panel and select `Layer Properties>General>Provider Feature Filter>Query Builder`). You can use the less than or greater than operators to define years and see if trends change over time, or use the [SQL LIKE](https://www.w3schools.com/sql/sql_like.asp) statement to query the ‘details’ column to filter particular colleges – did they tend to attract students from particular counties? These queries use standard [SQL language](https://www.w3schools.com/sql/) and can be combined with `AND`, `NOT` etc. This example would select only those students who had matriculated at Magdalen College: ``` "Details" LIKE '%Magdalen Hall%' diff --git a/en/lessons/geoparsing-text-with-edinburgh.md b/en/lessons/geoparsing-text-with-edinburgh.md index 5a7299921c..7dadcea5ba 100755 --- a/en/lessons/geoparsing-text-with-edinburgh.md +++ b/en/lessons/geoparsing-text-with-edinburgh.md @@ -33,7 +33,7 @@ The Geoparser works best on running text, as it considers locations in context f In December 2015, the Edinburgh Geoparser was released under the University of Edinburgh’s GPL license to be used by other researchers in the field of text mining and natural language processing as well as scholars who are interested in geoparsing text. More information on its documentation, publications using it and how to download it can be found [here](https://www.ltg.ed.ac.uk/software/geoparser/). -A simple online demo of the vanilla Edinburgh Geoparser can be tried out [here](http://jekyll.inf.ed.ac.uk/geoparser.html). It provides only the visual interface to the Geoparser output after uploading a text file and selecting a gazetteer. The demo is otherwise not configurable and should only be used to try out small examples and not for geo-parsing a large number of files. +A simple online demo of the vanilla Edinburgh Geoparser can be tried out [here](https://jekyll.inf.ed.ac.uk/geoparser.html). It provides only the visual interface to the Geoparser output after uploading a text file and selecting a gazetteer. The demo is otherwise not configurable and should only be used to try out small examples and not for geo-parsing a large number of files. The following lesson explains how the Edinburgh Geoparser works under the hood and contains information on: @@ -151,11 +151,11 @@ It takes the stdout from the first command and runs the Geoparser with the follo * `-t` specifies the format of your input.  Text input (`plain`) is recommended for geo-parsing. - * `-g` specifies the gazetteer that should be queried.  In the above example, the gazetteer selected is [GeoNames](http://www.geonames.org/) (`geonames`), a large global gazetteer.  You can also specify other gazetteers, for example the DEEP gazetteer of historical placenames in England (`deep`) or the Pleiades+ gazetteer of ancient places (`plplus`).  For more information on the types of gazetteers offered as part of the distribution see the Geoparser documentation [here](http://groups.inf.ed.ac.uk/geoparser/documentation/v1.3/html/gaz.html). + * `-g` specifies the gazetteer that should be queried.  In the above example, the gazetteer selected is [GeoNames](https://www.geonames.org/) (`geonames`), a large global gazetteer.  You can also specify other gazetteers, for example the DEEP gazetteer of historical placenames in England (`deep`) or the Pleiades+ gazetteer of ancient places (`plplus`).  For more information on the types of gazetteers offered as part of the distribution see the Geoparser documentation [here](https://groups.inf.ed.ac.uk/geoparser/documentation/v1.3/html/gaz.html). * `-o` specifies two pieces of information, the output directory (`../out`) which is located within the `geoparser-1.3` directory and a prefix for the output file name (in this case `172172`, the same prefix as that of the input file name). Once the command is run and the Geoparser is finished, the result files appear in the output directory (`../out`) starting with the specified prefix. -When running the Geoparser, the specified text file is going through a series of processing steps which are combined into one pipeline.  It is first [tokenised](https://en.wikipedia.org/wiki/Lexical_analysis#Tokenization_), [part-of-speech-tagged](https://en.wikipedia.org/wiki/Part-of-speech_tagging) and [lemmatised](https://en.wikipedia.org/wiki/Lemmatisation). After these initial steps, [named entity recognition](https://en.wikipedia.org/wiki/Named-entity_recognition) is performed to identify location and person names as well as dates.  It was found that identifying location and person names in parallel helps to distinguish some ambiguous cases (like the string "Lewis" which could refer to a first name or the Scottish island) and where their context helps to distinguish between them.  The extracted locations are then resolved to latitude/longitude coordinate pairs.  The text is then further processed by identifying syntactic phrases (chunking) and temporal relations.  The latter two steps are not very relevant to this lesson and will therefore not be explained in detail.  Finally, visualisations are created to be able to inspect the file and the Geoparser output using a map interface in a browser.  For more information on each of the sub-components of the Geoparser, see the documentation [here](http://groups.inf.ed.ac.uk/geoparser/documentation/v1.3/html/pipeline.html). +When running the Geoparser, the specified text file is going through a series of processing steps which are combined into one pipeline.  It is first [tokenised](https://en.wikipedia.org/wiki/Lexical_analysis#Tokenization_), [part-of-speech-tagged](https://en.wikipedia.org/wiki/Part-of-speech_tagging) and [lemmatised](https://en.wikipedia.org/wiki/Lemmatisation). After these initial steps, [named entity recognition](https://en.wikipedia.org/wiki/Named-entity_recognition) is performed to identify location and person names as well as dates.  It was found that identifying location and person names in parallel helps to distinguish some ambiguous cases (like the string "Lewis" which could refer to a first name or the Scottish island) and where their context helps to distinguish between them.  The extracted locations are then resolved to latitude/longitude coordinate pairs.  The text is then further processed by identifying syntactic phrases (chunking) and temporal relations.  The latter two steps are not very relevant to this lesson and will therefore not be explained in detail.  Finally, visualisations are created to be able to inspect the file and the Geoparser output using a map interface in a browser.  For more information on each of the sub-components of the Geoparser, see the documentation [here](https://groups.inf.ed.ac.uk/geoparser/documentation/v1.3/html/pipeline.html). Note that when using the Geoparser in combination with the GeoNames gazetteer some historical place names will not be identified as they are missing from the gazetteer. Also the Geoparser team can provide additional pre-processing to improve the quality of optical-character recognised output (e.g. to fix soft-hyphen splitting or to deal with the long “s” character). Those scripts are not distributed with the standard distribution but available on request. @@ -219,9 +219,9 @@ where * `W`(est) `N`(orth) `E`(ast) `S`(outh) are decimal degrees * `score` is the same as for option `-l`. -You can grab the coordinates of a bounding box for a particular area using this online [BoundingBox](http://boundingbox.klokantech.com) tool. For example, a bounding box for Canada is `[W:-141.002701, N:83.110619, E:-52.620201, S:41.681019]` (see Figure 5) +You can grab the coordinates of a bounding box for a particular area using this online [BoundingBox](https://boundingbox.klokantech.com) tool. For example, a bounding box for Canada is `[W:-141.002701, N:83.110619, E:-52.620201, S:41.681019]` (see Figure 5) -{% include figure.html filename="geoparser_figure03.png" caption="Figure 5: Bounding box for Canada drawn on [BoundingBox](http://boundingbox.klokantech.com)." %} +{% include figure.html filename="geoparser_figure03.png" caption="Figure 5: Bounding box for Canada drawn on [BoundingBox](https://boundingbox.klokantech.com)." %} To specify this bounding box using the previous example, go back to the scripts directory and run the following command: @@ -289,7 +289,7 @@ If the document date is not specified all temporal expressions will be interpret ### Geo-parsing Multiple Text Files -Now that you know how to geo-parse one file, you may want to do the same thing for a set of documents all at once. You can download a simple shell script which geo-parses multiple files [here](http://groups.inf.ed.ac.uk/geoparser/scripts/run-multiple-files.sh). Please refer to the [Geoparser workshop](http://homepages.inf.ed.ac.uk/balex/publications/geoparser-workshop.pdf) slides for more information on how to make this script executable, run and it and adapt it to your needs. +Now that you know how to geo-parse one file, you may want to do the same thing for a set of documents all at once. You can download a simple shell script which geo-parses multiple files [here](https://groups.inf.ed.ac.uk/geoparser/scripts/run-multiple-files.sh). Please refer to the [Geoparser workshop](https://homepages.inf.ed.ac.uk/balex/publications/geoparser-workshop.pdf) slides for more information on how to make this script executable, run and it and adapt it to your needs. ### Extracting Geo-Resolution Output to TSV @@ -367,18 +367,18 @@ The lesson is also available in workshop form. If you're interested in running The Geoparser team also welcomes suggestions for future collaboration to tailor the Geoparser to different needs. Please get in touch if you have ideas about how it could be applied. -In the past the Geoparser was used to identify place names for different purposes and in different types of data (e.g. Grover et al., 2010 and Alex et al., 2015). For example, it was adapted to perform fine-grained geo-parsing for literature set in Edinburgh ([Palimpsest](http://palimpsest.blogs.edina.ac.uk/)) presented in the [LitLong](http://litlong.org/) interface. It was used to geo-parse -* volumes of the Survey of English Place Names ([DEEP](http://web.archive.org/web/20170722115758/http://englishplacenames.cerch.kcl.ac.uk/), see Grover and Tobin, 2014), -* large historical collections related to commodity trading in the 19th century British Empire ([Trading Consequences](http://tradingconsequences.blogs.edina.ac.uk/)) and -* 19th century British newspapers by [Prof. Ian Gregory](http://www.lancaster.ac.uk/staff/gregoryi/)’s group at Lancaster University. +In the past the Geoparser was used to identify place names for different purposes and in different types of data (e.g. Grover et al., 2010 and Alex et al., 2015). For example, it was adapted to perform fine-grained geo-parsing for literature set in Edinburgh ([Palimpsest](https://palimpsest.blogs.edina.ac.uk/)) presented in the [LitLong](https://litlong.org/) interface. It was used to geo-parse +* volumes of the Survey of English Place Names ([DEEP](https://web.archive.org/web/20170722115758/https://englishplacenames.cerch.kcl.ac.uk/), see Grover and Tobin, 2014), +* large historical collections related to commodity trading in the 19th century British Empire ([Trading Consequences](https://tradingconsequences.blogs.edina.ac.uk/)) and +* 19th century British newspapers by [Prof. Ian Gregory](https://www.lancaster.ac.uk/staff/gregoryi/)’s group at Lancaster University. -The Geoparser was also adapted to the ancient world for the [Google Ancient Places](https://googleancientplaces.wordpress.com/) project (e.g. see Isaksen et al., 2011), with its [GapVis](http://nrabinowitz.github.io/gapvis/)  interface. More recently, the Geoparser was used to geo-parse Twitter user profile locations (Alex et al, 2016) and the mass digitised text, including the Gazetteers of Scotland (Filgueira et al., 2020) and Encyclopaedia Britannica (Filgueira et al., 2021) +The Geoparser was also adapted to the ancient world for the [Google Ancient Places](https://googleancientplaces.wordpress.com/) project (e.g. see Isaksen et al., 2011), with its [GapVis](https://nrabinowitz.github.io/gapvis/)  interface. More recently, the Geoparser was used to geo-parse Twitter user profile locations (Alex et al, 2016) and the mass digitised text, including the Gazetteers of Scotland (Filgueira et al., 2020) and Encyclopaedia Britannica (Filgueira et al., 2021) ## References -Beatrice Alex, Clare Llewellyn, Claire Grover, Jon Oberlander and Richard Tobin (2016). Homing in on Twitter users: Evaluating an Enhanced Geoparser for User Profile Locations. 2016. In the Proceedings of the 10th Language Resources and Evaluation Conference (LREC), 23-28 May 2016. [[pdf](http://www.lrec-conf.org/proceedings/lrec2016/pdf/129_Paper.pdf)] +Beatrice Alex, Clare Llewellyn, Claire Grover, Jon Oberlander and Richard Tobin (2016). Homing in on Twitter users: Evaluating an Enhanced Geoparser for User Profile Locations. 2016. In the Proceedings of the 10th Language Resources and Evaluation Conference (LREC), 23-28 May 2016. [[pdf](https://www.lrec-conf.org/proceedings/lrec2016/pdf/129_Paper.pdf)] -Beatrice Alex, Kate Byrne, Claire Grover and Richard Tobin (2015). Adapting the Edinburgh Geoparser for Historical Georeferencing. International Journal for Humanities and Arts Computing, 9(1), pp. 15-35, March 2015.[[pdf](http://www.euppublishing.com/doi/pdfplus/10.3366/ijhac.2015.0136)] +Beatrice Alex, Kate Byrne, Claire Grover and Richard Tobin (2015). Adapting the Edinburgh Geoparser for Historical Georeferencing. International Journal for Humanities and Arts Computing, 9(1), pp. 15-35, March 2015.[[pdf](https://www.euppublishing.com/doi/pdfplus/10.3366/ijhac.2015.0136)] Rosa Filgueira, Claire Grover, Vasilios Karaiskos, Beatrice Alex, Sarah Van Eyndhoven, Lisa Gotthard, and Melissa Terras (2021). Extending defoe for the efficient analysis of historical texts at scale. In 2021 IEEE 17th International Conference on eScience (eScience), pp. 21-29. @@ -386,6 +386,6 @@ Rosa Filgueira, Claire Grover, Melissa Terras, and Beatrice Alex (2020). Geopars Claire Grover and Richard Tobin (2014). A Gazetteer and Georeferencing for Historical English Documents. In Proceedings of LaTeCH 2014 at EACL 2014. Gothenburg, Sweden. [[pdf](https://doi.org/10.3115/v1/W14-0617)] -Claire Grover, Richard Tobin, Kate Byrne, Matthew Woollard, James Reid, Stuart Dunn, and Julian Ball (2010). Use of the Edinburgh Geoparser for georeferencing digitised historical collections. Philosophical Transactions of the Royal Society A. [[pdf](http://homepages.inf.ed.ac.uk/grover/papers/PTRS-A-2010-Grover-3875-89.pdf)] +Claire Grover, Richard Tobin, Kate Byrne, Matthew Woollard, James Reid, Stuart Dunn, and Julian Ball (2010). Use of the Edinburgh Geoparser for georeferencing digitised historical collections. Philosophical Transactions of the Royal Society A. [[pdf](https://homepages.inf.ed.ac.uk/grover/papers/PTRS-A-2010-Grover-3875-89.pdf)] Leif Isaksen, Elton Barker, Eric C. Kansa, Kate Byrne (2012). GAP: A NeoGeo Approach to Classical Resources. Leonardo 45 (1): 82–83. [[pdf](https://direct.mit.edu/leon/article/45/1/82/46956/GAP-A-NeoGeo-Approach-to-Classical-Resources#.U48IuXWx15Q)] diff --git a/en/lessons/georeferencing-qgis.md b/en/lessons/georeferencing-qgis.md index 14a1848f01..daf2558dfa 100755 --- a/en/lessons/georeferencing-qgis.md +++ b/en/lessons/georeferencing-qgis.md @@ -57,7 +57,7 @@ Entering control points in a GIS is easy, but behind the scenes, georeferencing uses complex transformation and compression processes. These are used to correct the distortions and inaccuracies found in many historical maps and stretch the maps so that they fit geographic -coordinates. In cartography this is known as [rubber-sheeting](http://en.wikipedia.org/wiki/Rubbersheeting) because +coordinates. In cartography this is known as [rubber-sheeting](https://en.wikipedia.org/wiki/Rubbersheeting) because it treats the map as if it were made of rubber and the control points as if they were tacks 'pinning' the historical document to a three dimensional surface like the globe. @@ -78,7 +78,7 @@ GDAL and check the box beside it, and click OK. - At this point, you need to shut down and relaunch QGIS. For the purposes of this example, and to keep things as simple as possible, don't reload your existing project but instead start a new project. -- Set up the [Coordinate Reference System](http://en.wikipedia.org/wiki/Spatial_reference_system) (CRS) correctly (see +- Set up the [Coordinate Reference System](https://en.wikipedia.org/wiki/Spatial_reference_system) (CRS) correctly (see [Installing QGIS 2.0 and adding Layers](/lessons/qgis-layers) for a reminder) - Save this new project (under File menu, select Save Project) and call it 'georeferencing.' @@ -102,7 +102,7 @@ referred to as 'Lots' in PEI. Hence the file name - Navigate to the link below in your web browser and download the file. - + - After downloading the file called 'lot\_township\_polygon', move it into a folder that you can find later and unzip the file. (Remember @@ -179,7 +179,7 @@ Some tips for choosing control points: made. - Check that your control points did not change location over time. Roads were often re-routed, and even houses and other buildings were - moved, especially [in Atlantic Canada](http://books.google.ca/books?id=TqCNZYXWXAUC&dq=tilting&source=gbs_navlinks_s)! + moved, especially [in Atlantic Canada](https://books.google.ca/books?id=TqCNZYXWXAUC&dq=tilting&source=gbs_navlinks_s)! *Add your first control point:* @@ -247,13 +247,13 @@ compress the image. Most of these settings can be left as default: linear transformation type, nearest neighbour resampling method, and LZW compression. (The -[world file](http://en.wikipedia.org/wiki/World_file) is not necessary, unless you want to georeference the +[world file](https://en.wikipedia.org/wiki/World_file) is not necessary, unless you want to georeference the same image again in another GIS or if someone else needs to georeference the image and does not have access to your GIS data, coordinate reference system, etc.) The target SRS is not important, but you could use this feature to give the new raster a different reference system. -- Assign a folder for your new georeferenced raster file. [Tif](http://en.wikipedia.org/wiki/Tagged_Image_File_Format) is +- Assign a folder for your new georeferenced raster file. [Tif](https://en.wikipedia.org/wiki/Tagged_Image_File_Format) is the default format for rasters georeferenced in QGIS. - Be aware that a Tif file is going to be much larger than your original map, even with LZW compression, so make sure you have adequate space if you are @@ -323,4 +323,4 @@ over a DEM (digital elevation model) to give it a hillshade terrain or 3D effect and perform a 'fly-over' of PEI homes in the nineteenth century. -*This lesson is part of the [Geospatial Historian](http://geospatialhistorian.wordpress.com/).* +*This lesson is part of the [Geospatial Historian](https://geospatialhistorian.wordpress.com/).* diff --git a/en/lessons/geospatial-data-analysis.md b/en/lessons/geospatial-data-analysis.md index e4513d7797..be88dae82a 100644 --- a/en/lessons/geospatial-data-analysis.md +++ b/en/lessons/geospatial-data-analysis.md @@ -35,7 +35,7 @@ This tutorial will introduce scholars to some of these techniques for processing Specifically, this tutorial is going to use a membership list--with addresses--from a para-religious organization in America (PTL Ministries) and downloadable geographic data to assess population characteristics that could provide insights into an organization that is often characterized as more rural and less wealthy, alongside a host of other characteristics. The tutorial will then visualize and analyze this data to assess possible insights. This process will provide the basic tools and understandings that will allow scholars to assess other events and organizations that have geographic data. From this, you should be able to discover or challenge understandings of historical events using geospatial analysis. ## Pre-requisites -The work for this lesson will be done in R and R Studio, an open source statistical package used by data scientists, statisticians and other researchers. We are using R, because it is a widely-used open source tool that will allow us to both visualize and analyze our data using a multitude of methods that can be expanded upon quite easily. Some background knowledge of the software and statistics will be helpful. For introductions to R, I recommend the [r-basics](/lessons/r-basics-with-tabular-data) tutorial and the more comprehensive [Computational Historical Thinking](https://dh-r.lincolnmullen.com) as starting points. There are many other services such as this [MOOC](https://www.coursera.org/learn/r-programming) and [DataCamp](https://www.datacamp.com/) that can introduce beginners to R's broader functionality. [UCLA](http://www.ats.ucla.edu/stat/r/default.htm) also has a nice introduction.[^1] While this tutorial will attempt to step through the entire process in R, basic knowledge of R is needed. The tutorial also assumes users will have some knowledge about the event you are observing which you will use later as a means to test and contest assumptions. +The work for this lesson will be done in R and R Studio, an open source statistical package used by data scientists, statisticians and other researchers. We are using R, because it is a widely-used open source tool that will allow us to both visualize and analyze our data using a multitude of methods that can be expanded upon quite easily. Some background knowledge of the software and statistics will be helpful. For introductions to R, I recommend the [r-basics](/lessons/r-basics-with-tabular-data) tutorial and the more comprehensive [Computational Historical Thinking](https://dh-r.lincolnmullen.com) as starting points. There are many other services such as this [MOOC](https://www.coursera.org/learn/r-programming) and [DataCamp](https://www.datacamp.com/) that can introduce beginners to R's broader functionality. [UCLA](https://www.ats.ucla.edu/stat/r/default.htm) also has a nice introduction.[^1] While this tutorial will attempt to step through the entire process in R, basic knowledge of R is needed. The tutorial also assumes users will have some knowledge about the event you are observing which you will use later as a means to test and contest assumptions. ## Lesson Goals @@ -119,7 +119,7 @@ The number of variables in `County_Aggregate_Data` should now increase as all of The next step is to merge our list with our `SpatialDataFrame` so we can perform our analysis. While we are using a membership list, it can be any list that is geographic in nature. For example, you may have a list of events that happened during a particular time period; or a list of places an individual chooses to visit. This type of data will come in two basic formats. The first is information such as locations, address, or incident locations--which will be converted to geographic coordinates. The second will be a table that lists the same information alongside the county (or geographic region) where it occurred. We can handle either. ## Geocoding -In the first case we have raw addresses of the members of our organization which will necessitate some additional steps. The address will need be transformed into geographical points in a process called [geocoding](https://en.wikipedia.org/wiki/Geocoding). This will create geographic points--from addresses--that can be linked to spatial regions in our downloaded census data so that we can analyze it to help us discover trends related to geographic location of these addresses. R can do some of this work but if you have a large number of addresses, you will need to use an external service because the free services R uses (such as google) will cap how many address you can geocode in a day. One popular outside service is hosted by [Texas A&M Geocoding Services](http://geoservices.tamu.edu/Services/Geocode/) and can handle large batches at a reasonable price. In the end, our address will be transformed into a list of latitudes and longitudes. This is the data R needs. +In the first case we have raw addresses of the members of our organization which will necessitate some additional steps. The address will need be transformed into geographical points in a process called [geocoding](https://en.wikipedia.org/wiki/Geocoding). This will create geographic points--from addresses--that can be linked to spatial regions in our downloaded census data so that we can analyze it to help us discover trends related to geographic location of these addresses. R can do some of this work but if you have a large number of addresses, you will need to use an external service because the free services R uses (such as google) will cap how many address you can geocode in a day. One popular outside service is hosted by [Texas A&M Geocoding Services](https://geoservices.tamu.edu/Services/Geocode/) and can handle large batches at a reasonable price. In the end, our address will be transformed into a list of latitudes and longitudes. This is the data R needs. If you have less than 2,500 addresses this can be handled in R using Google's geocoder. In R, you must first gather the address from whatever dataset you have, and then transform it. In our example, the data has already been geocoded, but below is an example of the commands used when processing a list of address and turning them into a list of geographic coordinates: @@ -174,7 +174,7 @@ Now we have a large dataframe called `County_Aggregate_Data` which has our count ```r religion <- read.csv("./data/Religion/Churches.csv", as.is=TRUE) ``` -Depending on the state of the data you may need to do some data transformations in order to merge it back with the DataFrame. For complex transformations, see tutorials in R on working with data such as [Data Wrangling and Management in R tutorial](/en/lessons/data-wrangling-and-management-in-r) [data transforms](http://r4ds.had.co.nz/transform.html). In essence, you need to have a common field in both datasets to merge upon. Often this is a geographic id for the county and state represented by `GEOID`. It could also be the unique FIPS Code given by the US Census. Below I am using state and county `GEOID`. In this example, we are converting one data frame's common fields to numeric so that they match the variable type of the other dataframe: +Depending on the state of the data you may need to do some data transformations in order to merge it back with the DataFrame. For complex transformations, see tutorials in R on working with data such as [Data Wrangling and Management in R tutorial](/en/lessons/data-wrangling-and-management-in-r) [data transforms](https://r4ds.had.co.nz/transform.html). In essence, you need to have a common field in both datasets to merge upon. Often this is a geographic id for the county and state represented by `GEOID`. It could also be the unique FIPS Code given by the US Census. Below I am using state and county `GEOID`. In this example, we are converting one data frame's common fields to numeric so that they match the variable type of the other dataframe: ```r religion$STATEFP <- religion$STATE @@ -192,7 +192,7 @@ This will bring in all additional fields into our `SpatialDataFrame`. Now we have a large `SpatialDataFrame` called `County_Aggregate_Data` which has our geocoded count data, our external count data and our census data by county. It is now time to begin to look at the data distribution and assess if everything appears correct and is in a format that will allow for some visualization and data analysis. We have some inherent complexity to our data because it is considered "count data." As such, we should be cognizant that our data is not measuring individuals directly but rather relationships between counties. We are attempting to discover if counties with certain traits lead to higher membership in our datasets. These realities can help us gather some assumptions on the individuals in these regions. ## Visualizing -Because we are analyzing geospatial data, it is often best to begin with geographic visuals. There are many options here, but I find it easiest to start with the qtm function from the TMAP library which creates [choropleth](https://en.wikipedia.org/wiki/Choropleth_map) maps simply. We could also use [GGPlot2][(http://strimas.com/r/tidy-sf/](http://web.archive.org/web/20190922234254/http://strimas.com/r/tidy-sf/)) which which should be installed using the development version. +Because we are analyzing geospatial data, it is often best to begin with geographic visuals. There are many options here, but I find it easiest to start with the qtm function from the TMAP library which creates [choropleth](https://en.wikipedia.org/wiki/Choropleth_map) maps simply. We could also use [GGPlot2][(https://strimas.com/r/tidy-sf/](https://web.archive.org/web/20190922234254/https://strimas.com/r/tidy-sf/)) which which should be installed using the development version. Now, we are going to prepare the map and look at some census data. First on our list should be membership numbers relative to population (relative membership distribution). One of the most commonly used and clearest ways to display this information is by number of members per 10,000 people. We will then do the math to create a relative population variable(number of members per 10,000 people). We do this because we have to ensure we are taking into account the variability of populations within the census regions that we are analyzing otherwise we will get misleading visualization in densely populated counties that represent general population trends rather than variable relationships. If we did not take this step, we would undoubtedly see a map that highlights urban areas rather than areas where membership is strongest. @@ -298,7 +298,7 @@ Through this process, we have gathered and transformed geospatial data into a us ## Other Models and Visualizations -There are many other models and visualizations available that can bring insight but they also add some complexity which demand further statistical understandings. For example, You can also create more complex scatterplots that can provide further insights. [Plot.ly](https://plot.ly/r/) offers interactive scatter plots that can be customized and shared.[^8]. While statistical modeling usually focuses on a particular model's predictive insight, well-fit models also provide insight into the data they represent. In particular, the Poisson regression is frequently used to create [models of count data](http://www.theanalysisfactor.com/regression-models-for-count-data/) which is how population data is often represented. [Geographically Weighted Regressions](https://rstudio-pubs-static.s3.amazonaws.com/44975_0342ec49f925426fa16ebcdc28210118.html) also have particular advantages with this type of data. But assessing fit has some complexity. [Decision trees](hhttps://www.analyticsvidhya.com/blog/2016/04/complete-tutorial-tree-based-modeling-scratch-in-python/) could also be useful for historical data because they give an understandable graphical representation of the the leading factors that caused inclusion in a group or list. Principal component analysis, [correspondence analysis](/en/lessons/correspondence-analysis-in-R) and other clustering methods can also be helpful, especially when there is limited knowledge or insight into the event being analyzed yet there is an abundance of data associated with the event. I recommend background reading or discussions with a data scientist or statistician when exploring some of these modeling options as understanding the configuration and parameters of the individual models is essential to ensuring the results are trustworthy and significant. +There are many other models and visualizations available that can bring insight but they also add some complexity which demand further statistical understandings. For example, You can also create more complex scatterplots that can provide further insights. [Plot.ly](https://plot.ly/r/) offers interactive scatter plots that can be customized and shared.[^8]. While statistical modeling usually focuses on a particular model's predictive insight, well-fit models also provide insight into the data they represent. In particular, the Poisson regression is frequently used to create [models of count data](https://www.theanalysisfactor.com/regression-models-for-count-data/) which is how population data is often represented. [Geographically Weighted Regressions](https://rstudio-pubs-static.s3.amazonaws.com/44975_0342ec49f925426fa16ebcdc28210118.html) also have particular advantages with this type of data. But assessing fit has some complexity. [Decision trees](hhttps://www.analyticsvidhya.com/blog/2016/04/complete-tutorial-tree-based-modeling-scratch-in-python/) could also be useful for historical data because they give an understandable graphical representation of the the leading factors that caused inclusion in a group or list. Principal component analysis, [correspondence analysis](/en/lessons/correspondence-analysis-in-R) and other clustering methods can also be helpful, especially when there is limited knowledge or insight into the event being analyzed yet there is an abundance of data associated with the event. I recommend background reading or discussions with a data scientist or statistician when exploring some of these modeling options as understanding the configuration and parameters of the individual models is essential to ensuring the results are trustworthy and significant. @@ -312,13 +312,13 @@ There are many other models and visualizations available that can bring insight [^3]: This is often leveraged in the field of public health. See for example, [Spatial Analysis and Correlates of County-Level Diabetes Prevalence](https://www.cdc.gov/pcd/issues/2015/14_0404.htm). Other fields such as criminal justice also rely on similar analytics although criminal justice tends to look at smaller census areas within regions. See, for example, `https://www.ncjrs.gov/pdffiles1/nij/grants/204432.pdf` -[^4]: Count data typically has large numbers of zero values which can add some complexity that will not be covered here. There are more complex ways to minimize this using more complex regression models. See, for example [Regression Models with Count Data](https://stats.idre.ucla.edu/stata/seminars/regression-models-with-count-data/). For general description of what normal distributions, which work well without modification look like see normal [distributions](http://www.statisticshowto.com/probability-and-statistics/normal-distributions/) +[^4]: Count data typically has large numbers of zero values which can add some complexity that will not be covered here. There are more complex ways to minimize this using more complex regression models. See, for example [Regression Models with Count Data](https://stats.idre.ucla.edu/stata/seminars/regression-models-with-count-data/). For general description of what normal distributions, which work well without modification look like see normal [distributions](https://www.statisticshowto.com/probability-and-statistics/normal-distributions/) -[^5]: There are different strategies to dealing with this type of data. See for example, [The Excess-zero Problem in Soil Animal Count Data](http://www.sciencedirect.com/science/article/pii/S0031405608000073) or [Data Transformations](http://www.biostathandbook.com/transformation.html). +[^5]: There are different strategies to dealing with this type of data. See for example, [The Excess-zero Problem in Soil Animal Count Data](https://www.sciencedirect.com/science/article/pii/S0031405608000073) or [Data Transformations](https://www.biostathandbook.com/transformation.html). -[^6]: For details on ggmap and and integration with Google Maps or other maps services see the [ggmap overview](http://stat405.had.co.nz/ggmap.pdf). For another broader discussions on google map making that utilizes a few of the libraries in this tutorial see [R and Google Map Making](https://rpubs.com/nickbearman/r-google-map-making). For a discussion of the sf library and it relationship to sp see [Simple Features for R](https://cran.r-project.org/web/packages/sf/vignettes/sf1.html). While sp has been the library spatial analysis library of choice, it is being superseded by sf. +[^6]: For details on ggmap and and integration with Google Maps or other maps services see the [ggmap overview](https://stat405.had.co.nz/ggmap.pdf). For another broader discussions on google map making that utilizes a few of the libraries in this tutorial see [R and Google Map Making](https://rpubs.com/nickbearman/r-google-map-making). For a discussion of the sf library and it relationship to sp see [Simple Features for R](https://cran.r-project.org/web/packages/sf/vignettes/sf1.html). While sp has been the library spatial analysis library of choice, it is being superseded by sf. -[^7]: We are setting Coordinate Reference System(CRS) to EPSG 4326 which is the most common mapping system used int the U.S. It is used by Google which is the origins of our data. EPSG 3857 is also used by google. For more on CRS see [Coordinate Reference Systems & Spatial Projections](https://www.earthdatascience.org/courses/earth-analytics/spatial-data-r/intro-to-coordinate-reference-systems/). Also see [coordinate systems reference in R](http://web.archive.org/web/20200225021219/https://www.nceas.ucsb.edu/~frazier/RSpatialGuides/OverviewCoordinateReferenceSystems.pdf). +[^7]: We are setting Coordinate Reference System(CRS) to EPSG 4326 which is the most common mapping system used int the U.S. It is used by Google which is the origins of our data. EPSG 3857 is also used by google. For more on CRS see [Coordinate Reference Systems & Spatial Projections](https://www.earthdatascience.org/courses/earth-analytics/spatial-data-r/intro-to-coordinate-reference-systems/). Also see [coordinate systems reference in R](https://web.archive.org/web/20200225021219/https://www.nceas.ucsb.edu/~frazier/RSpatialGuides/OverviewCoordinateReferenceSystems.pdf). [^8]: These plots are a bit more complex and requires an extra library, but they have some advantages. They work well with complex datasets because they have the ability to model more than two relationships by altering the color or size of the data points(we did this earlier on the choropleths by altering font size). Moreover, they are interactive which allows you to explore extra information about data points after the plot is created without wrecking the visual makeup of the plot. Here is an example that looks at the relationship between income and membership but also adds urban status to the visual using color. I am also adjusting point size based on population so I can take a look at more populated areas alongside the other data: diff --git a/en/lessons/getting-started-with-markdown.md b/en/lessons/getting-started-with-markdown.md index 2902de8d04..e2a0488e68 100755 --- a/en/lessons/getting-started-with-markdown.md +++ b/en/lessons/getting-started-with-markdown.md @@ -34,11 +34,11 @@ Since Programming Historian lessons are submitted as Markdown files, I have incl ## What is Markdown? -Developed in 2004 by [John Gruber](http://daringfireball.net/projects/markdown/ "Markdown on Daring Fireball"), Markdown refers to both (1) a way of formatting text files, as well as (2) a Perl utility to convert Markdown files into HTML. In this lesson, we'll focus on the first part and learn to write files using the Markdown syntax. +Developed in 2004 by [John Gruber](https://daringfireball.net/projects/markdown/ "Markdown on Daring Fireball"), Markdown refers to both (1) a way of formatting text files, as well as (2) a Perl utility to convert Markdown files into HTML. In this lesson, we'll focus on the first part and learn to write files using the Markdown syntax. Plain text files have many advantages over other formats. For one, they are readable on virtually all devices. They have also withstood the test of time better than other file types -- if you've ever tried to open a document saved in a legacy word processor format, you'll be familiar with the compatibility challenges involved. -By following Markdown syntax, you'll be able to produce files that are both legible in plain text and ready to be styled on other platforms. Many blogging engines, static site generators, and sites like [GitHub](http://github.com "GitHub") also support Markdown, and will render these files into HTML for display on the web. Additionally, tools like Pandoc can convert files into and out of Markdown. For more on Pandoc, visit the lesson on [Sustainable authorship in plain text using Pandoc and Markdown](/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown) by Dennis Tenen and Grant Wythoff. +By following Markdown syntax, you'll be able to produce files that are both legible in plain text and ready to be styled on other platforms. Many blogging engines, static site generators, and sites like [GitHub](https://github.com "GitHub") also support Markdown, and will render these files into HTML for display on the web. Additionally, tools like Pandoc can convert files into and out of Markdown. For more on Pandoc, visit the lesson on [Sustainable authorship in plain text using Pandoc and Markdown](/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown) by Dennis Tenen and Grant Wythoff. ## Markdown Syntax Markdown files are saved with the extension `.md`, and can be opened in a text editor such as TextEdit, Notepad, Sublime Text, or Vim. Many websites and publishing platforms also offer web-based editors and/or extensions for entering text using Markdown syntax. diff --git a/en/lessons/getting-started-with-mysql-using-r.md b/en/lessons/getting-started-with-mysql-using-r.md index 56d9e53009..ace8ef4ec9 100755 --- a/en/lessons/getting-started-with-mysql-using-r.md +++ b/en/lessons/getting-started-with-mysql-using-r.md @@ -29,7 +29,7 @@ R can perform analysis and data storage without the use of a relational database - When data is stored in a relational database already. - Working with the data of different entities that are related to one another. An example would be a database of soldiers of two different armies that fought a battle where we wanted to know what squad, platoon, company and brigade each soldier was part of. -A further short discussion of this is on [Jason A. French's blog](http://www.jason-french.com/blog/2014/07/03/using-r-with-mysql-databases/)[^2]. +A further short discussion of this is on [Jason A. French's blog](https://www.jason-french.com/blog/2014/07/03/using-r-with-mysql-databases/)[^2]. By the end of this lesson you will be able to install a database system on your computer, create a database table, store information in the table and then query the data. At the conclusion of the lesson we'll use a query of the database to make a graph. @@ -48,7 +48,7 @@ MySQL is a relational database used to store and query information. This lesson - Store records to the table. - Query the table. -In this tutorial you will make a database of newspaper stories that contain words from a search of a newspaper archive. The program will store the title, date published and URL of each story in a database. We'll use another program to query the database and look for historically significant patterns. Sample data will be provided from the [Welsh Newspapers Online](http://newspapers.library.wales) newspaper archive. We are working toward having a list of stories we can query for information. At the end of the lesson, we will run a query to generate a graph of the number of newspaper stories in the database to see if there is a pattern that is significant. +In this tutorial you will make a database of newspaper stories that contain words from a search of a newspaper archive. The program will store the title, date published and URL of each story in a database. We'll use another program to query the database and look for historically significant patterns. Sample data will be provided from the [Welsh Newspapers Online](https://newspapers.library.wales) newspaper archive. We are working toward having a list of stories we can query for information. At the end of the lesson, we will run a query to generate a graph of the number of newspaper stories in the database to see if there is a pattern that is significant. # Required Software R, R Studio, MySQL Server and MySQL Workbench are the pieces of software required for this lesson. Notes on installing these software packages are below. @@ -179,7 +179,7 @@ SET PASSWORD=PASSWORD('your_new_password_you_just_wrote_down_in_step_3.5'); 3.6. Restart the machine. After restarting the machine you may need to repeat step *3.3 Start the MySQL server* above. ###### MySQL Workbench downloads -Click on this link: [http://dev.mysql.com/downloads/workbench/](http://dev.mysql.com/downloads/workbench/). Scroll down and click to **Select Operating System** that matches your computer. If necessary, **Select OS Version**. Once you have done that click the blue **Download** button. On the download page, scroll down, you have the option of starting the download by clicking **No thanks, just start my download.** +Click on this link: [https://dev.mysql.com/downloads/workbench/](https://dev.mysql.com/downloads/workbench/). Scroll down and click to **Select Operating System** that matches your computer. If necessary, **Select OS Version**. Once you have done that click the blue **Download** button. On the download page, scroll down, you have the option of starting the download by clicking **No thanks, just start my download.** Once the file is downloaded, double click on the downloaded file to install it. Once the installation of MySQL Workbench is done, as per the instructions on the screen, drag the icon to the Applications folder on the left. (See below) @@ -408,7 +408,7 @@ You have successfully connected to the database using a configuration file. # Storing data in a table with SQL -In this section of the lesson we'll create a SQL statement to insert a row of data into the database table about this [newspaper story](http://newspapers.library.wales/view/4121281/4121288/94/). We'll insert the record first in MySQL workbench and later we'll do it in R. +In this section of the lesson we'll create a SQL statement to insert a row of data into the database table about this [newspaper story](https://newspapers.library.wales/view/4121281/4121288/94/). We'll insert the record first in MySQL workbench and later we'll do it in R. 1. In MySQL Workbench, click the icon labelled SQL+ to create a new SQL tab for executing queries. 2. Paste this statement below into the query window. This will insert a record into the table. @@ -438,7 +438,7 @@ LEFT(RTRIM('http://newspapers.library.wales/view/4121281/4121288/94/'),99), | search_term_used) | " | | VALUES('THE LOST LUSITANIA.', | The value to be inserted into the story_title field | | '1915-05-21', | story_date_published field | -| LEFT(RTRIM('http://newspapers.library.wales/view/4121281/4121288/94/'),99), | story_url field. This field is a VARCHAR(99) so it has a maximum length of 99 characters. Inserting a URL longer than 99 characters would cause an error and so two functions are used to control for that. RTRIM() trims trailing spaces to the right of the URL. LEFT(value,99) returns only the leftmost 99 characters of the trimmed URL. This URL is much shorter than that and so these functions are here for an example only. | +| LEFT(RTRIM('https://newspapers.library.wales/view/4121281/4121288/94/'),99), | story_url field. This field is a VARCHAR(99) so it has a maximum length of 99 characters. Inserting a URL longer than 99 characters would cause an error and so two functions are used to control for that. RTRIM() trims trailing spaces to the right of the URL. LEFT(value,99) returns only the leftmost 99 characters of the trimmed URL. This URL is much shorter than that and so these functions are here for an example only. | | 'German+Submarine'); | search_term_used field | @@ -837,10 +837,10 @@ Below is what the plot should look like: # Going further with MySQL -If you wanted to put a database on a website, using MySQL as the database and the PHP language to build the pages of the site is one way to do this. An example of this type of website is one I built to [search issues of the Equity newspaper](http://www.jeffblackadar.ca/graham_fellowship/corpus_entities_equity/). Larry Ullman's book *PHP and MySQL for Dynamic Web Sites* covers how to set up and connect to a database using MySQL and PHP in a hacker resistant way. +If you wanted to put a database on a website, using MySQL as the database and the PHP language to build the pages of the site is one way to do this. An example of this type of website is one I built to [search issues of the Equity newspaper](https://www.jeffblackadar.ca/graham_fellowship/corpus_entities_equity/). Larry Ullman's book *PHP and MySQL for Dynamic Web Sites* covers how to set up and connect to a database using MySQL and PHP in a hacker resistant way. For examples of using SQL to sort and group data as well as perform calculations, see: -[MySQL by Examples for Beginners](http://web.archive.org/web/20171228130133/https://www.ntu.edu.sg/home/ehchua/programming/sql/MySQL_Beginner.html) or MySQL's [Examples of Common Queries](https://dev.mysql.com/doc/refman/5.7/en/examples.html). +[MySQL by Examples for Beginners](https://web.archive.org/web/20171228130133/https://www.ntu.edu.sg/home/ehchua/programming/sql/MySQL_Beginner.html) or MySQL's [Examples of Common Queries](https://dev.mysql.com/doc/refman/5.7/en/examples.html). # Conclusion @@ -850,7 +850,7 @@ I hope that you now have the knowledge to set up a database table, connect to it # Credits -I completed this lesson thanks to the support of the [George Garth Graham Undergraduate Digital History Research Fellowship](http://grahamresearchfellow.org/). +I completed this lesson thanks to the support of the [George Garth Graham Undergraduate Digital History Research Fellowship](https://grahamresearchfellow.org/). Thank you to Dr. Amanda Visconti for her guidance and support during the preparation of this lesson. @@ -862,7 +862,7 @@ Ullman, L. 2005. *PHP and MySQL for Dynamic Web Sites, 2nd ed.* Berkeley, Calif: [^1]: Lincoln Mullen, "Natural Language Processing," RPubs, [https://rpubs.com/lmullen/nlp-chapter](https://rpubs.com/lmullen/nlp-chapter). -[^2]: Jason A. French, "Using R With MySQL Databases," blog (3 July 2014), [http://www.jason-french.com/blog/2014/07/03/using-r-with-mysql-databases/](http://www.jason-french.com/blog/2014/07/03/using-r-with-mysql-databases/). +[^2]: Jason A. French, "Using R With MySQL Databases," blog (3 July 2014), [https://www.jason-french.com/blog/2014/07/03/using-r-with-mysql-databases/](https://www.jason-french.com/blog/2014/07/03/using-r-with-mysql-databases/). [^3]: Taylor Arnold and Lauren Tilton, "Basic Text Processing in R," Programming Historian (27 March 2017), [/lessons/basic-text-processing-in-r](/lessons/basic-text-processing-in-r). diff --git a/en/lessons/googlemaps-googleearth.md b/en/lessons/googlemaps-googleearth.md index d91672e3c2..1e1e27e79d 100755 --- a/en/lessons/googlemaps-googleearth.md +++ b/en/lessons/googlemaps-googleearth.md @@ -525,7 +525,7 @@ your work!** [geo22]: /images/googlemaps-googleearth/geo22.png [geo23]: /images/googlemaps-googleearth/geo23.png [geo24]: /images/googlemaps-googleearth/geo24.png - [www.davidrumsey.com]: http://www.davidrumsey.com/ + [www.davidrumsey.com]: https://www.davidrumsey.com/ [geo25]: /images/googlemaps-googleearth/geo25.png [geo26]: /images/googlemaps-googleearth/geo26.png [Georeferencing in QGIS 2.0]: /lessons/georeferencing-qgis @@ -540,11 +540,11 @@ your work!** [geo33]: /images/googlemaps-googleearth/geo33.png [geo34]: /images/googlemaps-googleearth/geo34.png [geo35]: /images/googlemaps-googleearth/geo35.png - [Mobile Mapping and Historical GIS in the Field]: http://niche-canada.org/2011/12/14/mobile-mapping-and-historical-gis-in-the-field/ + [Mobile Mapping and Historical GIS in the Field]: https://niche-canada.org/2011/12/14/mobile-mapping-and-historical-gis-in-the-field/ "Mobile Mapping and Historical GIS in the Field" [geo36]: /images/googlemaps-googleearth/geo36.png [geo37]: /images/googlemaps-googleearth/geo37.png [geo38]: /images/googlemaps-googleearth/geo38.png [geo39]: /images/googlemaps-googleearth/geo39.png [geo40]: /images/googlemaps-googleearth/geo40.png - [Geospatial Historian]: http://geospatialhistorian.wordpress.com/ + [Geospatial Historian]: https://geospatialhistorian.wordpress.com/ diff --git a/en/lessons/gravity-model.md b/en/lessons/gravity-model.md index d47f5f1906..9974567352 100644 --- a/en/lessons/gravity-model.md +++ b/en/lessons/gravity-model.md @@ -110,7 +110,7 @@ While gravity models can be used in a range of different migration and trade stu 1 - Adam Crymble, Adam Dennett, Tim Hitchcock, "Modelling regional imbalances in English plebeian migration to late eighteenth-century London", *Economic History Review*, 71, 3 (2018), pp. 747-771: (Paywall until July 2019). -2 - Adam Crymble, Louise Falcini, Tim Hitchcock, "Vagrant Lives: 14,789 Vagrants Processed by the County of Middlesex, 1777-1786", *Journal of Open Humanities Data*, vol. 1, no. 1 (2015), . +2 - Adam Crymble, Louise Falcini, Tim Hitchcock, "Vagrant Lives: 14,789 Vagrants Processed by the County of Middlesex, 1777-1786", *Journal of Open Humanities Data*, vol. 1, no. 1 (2015), . The Vagrancy Act of 1744 gave communities in England and Wales the right to expel outsiders back from whence they came. This was an important right because welfare was distributed locally at the time, and it was paid for by local taxes with the intention of supporting local people. That meant that a large influx of poor outsiders could financially cripple communities that attracted a lot of migration (such as those in London). This restriction on internal migration was only really used against the poor, and constables and local magistrates had tremendous powers of discretion over who they labelled a "vagrant" and who they left alone. As of the time of writing, a version of this law is still on the books in England, and it is still used by the police to arrest people who are begging or who they otherwise feel need to be removed from a situation. People in the late eighteenth century who were arrested under the 1744 act are therefore evidence of internal migration between the various counties of England and London. The question is: were any counties sending more or fewer vagrants to London than we would expect? @@ -120,9 +120,9 @@ A sample of the primary sources that detail these individuals' journeys can be s {% include figure.html filename="figure3.jpg" caption="Figure 3: A sample list of vagrants expelled from Middlesex. 'Middlesex Sessions Papers - Justices' Working Documents', (January 1778), *London Lives, 1690-1800*, LMSMPS50677PS506770118 (www.londonlives.org, version 2.0, 18 August 2018), London Metropolitan Archives." %} -As part of the "[Vagrant Lives](http://www.migrants.adamcrymble.org/the-project/)" project, the original vagrancy lists were converted into a scholarly dataset and published as: +As part of the "[Vagrant Lives](https://www.migrants.adamcrymble.org/the-project/)" project, the original vagrancy lists were converted into a scholarly dataset and published as: -* Adam Crymble, Louise Falcini, Tim Hitchcock, "Vagrant Lives: 14,789 Vagrants Processed by the County of Middlesex, 1777-1786", *Journal of Open Humanities Data*, vol. 1, no. 1 (2015), . +* Adam Crymble, Louise Falcini, Tim Hitchcock, "Vagrant Lives: 14,789 Vagrants Processed by the County of Middlesex, 1777-1786", *Journal of Open Humanities Data*, vol. 1, no. 1 (2015), . Readers are invited to download and explore this [published dataset](https://zenodo.org/record/1217600) and its documentation to understand the types of primary sources being modelled in this example. @@ -239,7 +239,7 @@ In probability statistics, there are a number of different [probability distribu As it happens, our vagrants are best suited to a negative binomial distribution. The reasons for this are that they represent count data (1, 2, 53 vagrants) that must be whole numbers (no 0.5 vagrants) and cannot be negative (no -9 vagrants). Earlier gravity modelling conducted in the 1980s tended to use a [Poisson Distribution](https://en.wikipedia.org/wiki/Poisson_distribution) for modelling human migration. The best approach for gravity models is still a point of academic debate, with some scholars opting for a Negative Binomial approach, and others sticking with the Poisson distribution.[^7] It is possible that another probability distribution entirely is most appropriate for your own data. If you were modelling trade surpluses or deficits (which could be + or -), your data may not follow a negative binomial distribution, and the author recommends speaking to a statistician about the most appropriate option. -What this means for us in this example is that the formula changes slightly. In particular, we no longer solve for $y$, but for the [natural logarithm](https://en.wikipedia.org/wiki/Natural_logarithm) ($ln$) of the [population mean](http://www.statisticshowto.com/population-mean/) ($μ$). You can read more about this type of formula in Michael L. Zwilling's work[^8]. +What this means for us in this example is that the formula changes slightly. In particular, we no longer solve for $y$, but for the [natural logarithm](https://en.wikipedia.org/wiki/Natural_logarithm) ($ln$) of the [population mean](https://www.statisticshowto.com/population-mean/) ($μ$). You can read more about this type of formula in Michael L. Zwilling's work[^8]. **Multivariate Regression Model:** @@ -801,7 +801,7 @@ With thanks to Angela Kedgley, Sarah Lloyd, Tim Hitchcock, Joe Cozens, Katrina N [^5]: For English speakers, the author recommends Eugene O'Loughlin, 'How To...Perform Simple Linear Regression by Hand', *YouTube* (23 December 2015): . [^6]: "Chapter 326: Negative Binomial Regression", *NCSS Stats Software* (n.d.): [^7]: Flowerdew, R. and Aitkin, M., ‘A method of fitting the gravity model based on the Poisson distribution’, *Journal of Regional Science*, 22 (1982), pp. 191–202; Flowerdew, R. and Lovett, A., ‘Fitting constrained Poisson regression models to interurban migration flows’, *Geographical Analysis*, 20 (1988), pp. 297–307; Congdon, P., ‘Approaches to modeling overdispersion in the analysis of migration’, *Environment and Planning* A, 25 (1993), pp. 1481–510; Flowerdew, R., ‘Modelling migration with Poisson regression’, in J. Stillwell, O. Duke-Williams, and A. Dennett, eds., *Technologies for migration and commuting analysis: spatial interaction data applications* (Hershey, Pa., 2010), pp. 261–79. -[^8]: Michael L. Zwilling, "Negative Binomial Regression", *The Mathematica Journal*, vol. 15 (2013): . +[^8]: Michael L. Zwilling, "Negative Binomial Regression", *The Mathematica Journal*, vol. 15 (2013): . [^9]: Crymble, A, A. Dennett, and T. Hitchcock, "Modelling regional imbalances in English plebeian migration to late eighteenth-century London", *Economic History Review*, vol. 71, no. 3 (2018), 747-771. [^10]: For example, see: Grigg, D.B. "E.G. Ravenstein and the 'laws of migration", *Journal of Historical Geography*, vol. 3, no. 1 (1977), pp. 44-54. [^11]: Crymble, A, A. Dennett, and T. Hitchcock, "Modelling regional imbalances in English plebeian migration to late eighteenth-century London", *Economic History Review*, vol. 71, no. 3 (2018), 753-754. diff --git a/en/lessons/image-classification-neural-networks.md b/en/lessons/image-classification-neural-networks.md index 8cddec5da0..3a8c857c04 100644 --- a/en/lessons/image-classification-neural-networks.md +++ b/en/lessons/image-classification-neural-networks.md @@ -53,7 +53,7 @@ Machine learning can be divided into two forms: supervised and unsupervised lear For this tutorial, we will download a dataset of paintings from [ArtUK](https://artuk.org/), which provides access to works that meet the UK's requirements for "[public ownership](https://artuk.org/footer/faq)." Approximately, [80% of the UK's publicly owned art is not on display](https://artuk.org/about/provide-free-digital-access-to-the-uks-art). ArtUK combats this by providing the general public access to these materials. -The ArtUK website allows you to view artworks by [topic](https://artuk.org/discover/topics), and we will use these topics to train our image classifier. You can [download a `.zip` file containing the images here](/assets/image-classification-neural-networks/dataset.zip). Save the `.zip` file in your `projects` folder and unzip it. Inside, you will find a folder called "dataset" with two additional folders: `training` and `testing`. Once you have downloaded all the files, go ahead and launch a live server on the `projects` folder. In most cases, you can view the server using the localhost address of "http://127.0.0.1". +The ArtUK website allows you to view artworks by [topic](https://artuk.org/discover/topics), and we will use these topics to train our image classifier. You can [download a `.zip` file containing the images here](/assets/image-classification-neural-networks/dataset.zip). Save the `.zip` file in your `projects` folder and unzip it. Inside, you will find a folder called "dataset" with two additional folders: `training` and `testing`. Once you have downloaded all the files, go ahead and launch a live server on the `projects` folder. In most cases, you can view the server using the localhost address of `http://127.0.0.1`. # Understanding Neural Networks diff --git a/en/lessons/installing-omeka.md b/en/lessons/installing-omeka.md index 66aee7add7..99de98f806 100755 --- a/en/lessons/installing-omeka.md +++ b/en/lessons/installing-omeka.md @@ -28,7 +28,7 @@ doi: 10.46430/phen0052 ## Introduction -[Omeka.net](http://omeka.net), as described in [the previous lesson](up-and-running-with-omeka.html), is a useful service for Omeka beginners, but there are a few reasons why you might want to install your own copy of Omeka. Reasons include: +[Omeka.net](https://omeka.net), as described in [the previous lesson](up-and-running-with-omeka.html), is a useful service for Omeka beginners, but there are a few reasons why you might want to install your own copy of Omeka. Reasons include: * **Upgrades**. By installing Omeka yourself, you can use the latest versions of Omeka as soon as they're released, without having to wait for Omeka.net to upgrade their system. * **Plugins and themes**. You can install any plugin or theme you want, without being restricted to those provided by Omeka.net. @@ -41,20 +41,20 @@ In this tutorial, we'll be entering a few commands on the command line. This tut ## Step 1: Set Up Your Host -First, sign up for an account with a hosting provider that gives you SSH access. There are two main types of hosting providers: VPS and shared. A VPS host gives you root access, which means you have more control over the server, but your storage space is often limited. For small archives of 20GB or less, this is the best solution, but for large archives, shared hosting plans might be better suited. [DigitalOcean](https://www.digitalocean.com/signup/) is an easy-to-use and inexpensive VPS host, and [Amazon Web Services](http://aws.amazon.com/free/) (AWS) hosts similar virtual servers on their Elastic Computing (EC2) platform, geared more toward advanced users. Both [HostGator](http://www.hostgator.com/) and [DreamHost](http://www.dreamhost.com) offer inexpensive shared hosting with unlimited storage. +First, sign up for an account with a hosting provider that gives you SSH access. There are two main types of hosting providers: VPS and shared. A VPS host gives you root access, which means you have more control over the server, but your storage space is often limited. For small archives of 20GB or less, this is the best solution, but for large archives, shared hosting plans might be better suited. [DigitalOcean](https://www.digitalocean.com/signup/) is an easy-to-use and inexpensive VPS host, and [Amazon Web Services](https://aws.amazon.com/free/) (AWS) hosts similar virtual servers on their Elastic Computing (EC2) platform, geared more toward advanced users. Both [HostGator](https://www.hostgator.com/) and [DreamHost](https://www.dreamhost.com) offer inexpensive shared hosting with unlimited storage. -If you open an account with a VPS provider, you'll first want to create a virtual server with their interface. (If you’re using shared hosting, this is already done for you.) On DigitalOcean, VPS instances are called "droplets," and you can create one by simply logging in and clicking "Create Droplet." On AWS EC2, a VPS is called an "instance," and you can create one by logging into your EC2 console and clicking "Launch Instance." In both cases, **choose an Ubuntu system** to install, since we'll be running Ubuntu Linux commands below. For more detailed help with these steps, check out Digital Ocean's guide [How To Create Your First DigitalOcean Droplet Virtual Server](https://web.archive.org/web/20170608220025/https://www.digitalocean.com/community/tutorials/how-to-create-your-first-digitalocean-droplet-virtual-server), and Amazon's guide [Launch an Amazon EC2 Instance](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-launch-instance_linux.html). +If you open an account with a VPS provider, you'll first want to create a virtual server with their interface. (If you’re using shared hosting, this is already done for you.) On DigitalOcean, VPS instances are called "droplets," and you can create one by simply logging in and clicking "Create Droplet." On AWS EC2, a VPS is called an "instance," and you can create one by logging into your EC2 console and clicking "Launch Instance." In both cases, **choose an Ubuntu system** to install, since we'll be running Ubuntu Linux commands below. For more detailed help with these steps, check out Digital Ocean's guide [How To Create Your First DigitalOcean Droplet Virtual Server](https://web.archive.org/web/20170608220025/https://www.digitalocean.com/community/tutorials/how-to-create-your-first-digitalocean-droplet-virtual-server), and Amazon's guide [Launch an Amazon EC2 Instance](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-launch-instance_linux.html). Now that you have a running server, connect to it with an SSH client. This is sometimes as simple as opening a terminal and typing `ssh user@hostname`, where `user` is the username provided by your VPS and `hostname` is your server address. Consult your host's documentation for instructions for logging on via SSH. Here is a sampling of guides for VPS hosts: * [Digital Ocean: How To Connect To Your Droplet with SSH](https://www.digitalocean.com/docs/droplets/how-to/connect-with-ssh) - * [Amazon Web Services: Connecting to Your Linux Instance Using SSH](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/AccessingInstancesLinux.html) + * [Amazon Web Services: Connecting to Your Linux Instance Using SSH](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/AccessingInstancesLinux.html) * [Google Cloud: Connecting to Your Linux Instance](https://cloud.google.com/compute/docs/instances/connecting-to-instance) And here are a few guides for shared hosts: * [DreamHost Wiki: SSH](https://help.dreamhost.com/hc/en-us/articles/216041267-SSH-overview) - * [HostGator: How Do I Get and Use SSH Access?](http://support.hostgator.com/articles/hosting-guide/lets-get-started/how-do-i-get-and-use-ssh-access) + * [HostGator: How Do I Get and Use SSH Access?](https://support.hostgator.com/articles/hosting-guide/lets-get-started/how-do-i-get-and-use-ssh-access) When you're connected, you should see a prompt that looks roughly like this: @@ -139,7 +139,7 @@ Now let's download Omeka directly to the server. This will allow us to avoid the If you get a permissions error here on a VPS, make sure you're logged in as the root user with `su root`. Now let's download Omeka with command `wget` like this: - wget http://omeka.org/files/omeka-2.7.zip + wget https://omeka.org/files/omeka-2.7.zip Now let’s first make sure we have the `unzip` command: diff --git a/en/lessons/installing-python-modules-pip.md b/en/lessons/installing-python-modules-pip.md index e65290805a..c2c0c2c11d 100755 --- a/en/lessons/installing-python-modules-pip.md +++ b/en/lessons/installing-python-modules-pip.md @@ -154,6 +154,6 @@ python -m pip install XXX Happy installing! [pip]: https://pip.pypa.io/en/stable/ - [curl command]: http://www.thegeekstuff.com/2012/04/curl-examples/ + [curl command]: https://www.thegeekstuff.com/2012/04/curl-examples/ [here]: https://bootstrap.pypa.io/get-pip.py - [StackOverflow page]: http://stackoverflow.com/questions/4750806/how-to-install-pip-on-windows + [StackOverflow page]: https://stackoverflow.com/questions/4750806/how-to-install-pip-on-windows diff --git a/en/lessons/interactive-text-games-using-twine.md b/en/lessons/interactive-text-games-using-twine.md index f79ede873f..124b1f6fbe 100644 --- a/en/lessons/interactive-text-games-using-twine.md +++ b/en/lessons/interactive-text-games-using-twine.md @@ -81,7 +81,7 @@ An important first step in creating a meaningful and inclusive environment for g * Only certain types of people play games (students may feel that the games they play, such as mobile games, do not qualify as “real” games) * That games easily create social change by automatically sparking empathy -Playing and analyzing a game is a helpful way to challenge these assumptions. Games with strong rhetorical arguments are a good place to start. I begin many of my game units with Zoe Quinn’s Twine game [*Depression Quest*](http://www.depressionquest.com/), in which you play as someone living with depression. +Playing and analyzing a game is a helpful way to challenge these assumptions. Games with strong rhetorical arguments are a good place to start. I begin many of my game units with Zoe Quinn’s Twine game [*Depression Quest*](https://www.depressionquest.com/), in which you play as someone living with depression. {% include figure.html filename="Figure1.jpg" alt="A 'passage' from the game Depression Quest. At the top, a scenario is described. It is written in black lettering. Below, there are four options for the player's next action, preceeded by the prompt 'Do you...'. The first option is written in red, and is struck-through indicating that it is unavailable to choose. The following three options are written in blue, indicating that these options are available" caption="Figure 1. Example from *Depression Quest*" %} @@ -128,7 +128,7 @@ In the game I walk through below, the player character will move through her fir You can create a successful Twine game using limited technical elements. However, there are some basics of game design that can turn a technically simple game into a narratively complex game. The game we will make is technically straightforward–it involves narrative and basic, text-based choices that the player can make. However, we will work to create choices that connect to the game's rhetorical goals. ## Creating Your First Story -To create your first game, which Twine will refer to as a "story", go to [Twine](http://twinery.org/) and click "Use it online." If it is your first time using Twine, there will be a basic introduction. Once you have read or skipped this introduction, Twine will take you to your story list. At first, this area will be largely empty. It will populate as you create more stories. To create your first story, click "+Story." +To create your first game, which Twine will refer to as a "story", go to [Twine](https://twinery.org/) and click "Use it online." If it is your first time using Twine, there will be a basic introduction. Once you have read or skipped this introduction, Twine will take you to your story list. At first, this area will be largely empty. It will populate as you create more stories. To create your first story, click "+Story." {% include figure.html filename="Figure2.jpg" caption="Figure 2. Getting Started with the Browser Version of Twine" %} @@ -404,7 +404,7 @@ If you would like to play with Twine or potentially integrate it into a course, ### Twine Games * [*A Witch’s Word*](https://rainbowstarbird.itch.io/a-witchs-word) by RainbowStarbird -* [*Depression Quest*](http://www.depressionquest.com/) by Zoe Quinn +* [*Depression Quest*](https://www.depressionquest.com/) by Zoe Quinn * [*Play Smarter Not Harder: Developing Your Scholarly Meta*](https://perma.cc/W2PK-FCQT) by Jason Helms * [*Queers in Love at the End of the World*](https://w.itch.io/end-of-the-world) by Anna Anthropy * [*September 7th, 2020*](https://perma.cc/GP6X-RARD) by Cait S. Kirby diff --git a/en/lessons/interrogating-national-narrative-gpt.md b/en/lessons/interrogating-national-narrative-gpt.md index 121e943179..5e499e7229 100644 --- a/en/lessons/interrogating-national-narrative-gpt.md +++ b/en/lessons/interrogating-national-narrative-gpt.md @@ -387,7 +387,7 @@ The use of generated text as an analytical tool is relatively novel, as is the a [^2]: David Tarditi, Sidd Puri, and Jose Oglesby, "Accelerator: Using data parallelism to program GPUs for general-purpose uses," *Operating Systems Review* 40, (2006): 325-326. [https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/tr-2005-184.pdf](https://perma.cc/QDX9-33R6). [^3]: Shawn Graham, *An Enchantment of Digital Archaeology: Raising the Dead with Agent-Based Models, Archaeogaming, and Artificial Intelligence* (New York: Berghahn Books, 2020), 118. [^4]: Emily M. Bender and Alexander Koller, "Climbing towards NLU: On Meaning, Form, and Understanding in the Age of Data," (paper presented at Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, Online, July 5 2020): 5187. [https://doi.org/10.18653/v1/2020.acl-main.463](https://doi.org/10.18653/v1/2020.acl-main.463). -[^5]: Kari Kraus, "Conjectural Criticism: Computing Past and Future Texts," *Digital Humanities Quarterly* 3, no. 4 (2009). [http://www.digitalhumanities.org/dhq/vol/3/4/000069/000069.html](https://perma.cc/C7D7-H7WY). +[^5]: Kari Kraus, "Conjectural Criticism: Computing Past and Future Texts," *Digital Humanities Quarterly* 3, no. 4 (2009). [https://www.digitalhumanities.org/dhq/vol/3/4/000069/000069.html](https://perma.cc/C7D7-H7WY). [^6]: Alexandra Borchardt, Felix M. Simon, and Diego Bironzo, *Interested but not Engaged: How Europe’s Media Cover Brexit,* (Oxford: Reuters Institute for the Study of Journalism, 2018), 23, [https://reutersinstitute.politics.ox.ac.uk/sites/default/files/2018-06/How%20Europe%27s%20Media%20Cover%20Brexit.pdf](https://perma.cc/8S2H-9ZDV). [^7]: Satnam Virdee & Brendan McGeever, "Racism, Crisis, Brexit," *Ethnic and Racial Studies* 40, no. 10 (July 2017): 1807, [https://doi.org/10.1080/01419870.2017.1361544](https://doi.org/10.1080/01419870.2017.1361544). [^8]: Blair E Williams, "A Tale of Two Women: A Comparative Gendered Media Analysis of UK Prime Ministers Margaret Thatcher and Theresa May", *Parliamentary Affairs* 74, no. 2 (April 2021): 408, [https://doi.org/10.1093/pa/gsaa008](https://doi.org/10.1093/pa/gsaa008). diff --git a/en/lessons/intro-to-bash.md b/en/lessons/intro-to-bash.md index 10d27284bf..68f811b11b 100755 --- a/en/lessons/intro-to-bash.md +++ b/en/lessons/intro-to-bash.md @@ -37,11 +37,11 @@ Many of the lessons at the *Programming Historian* require you to enter commands {% include figure.html filename="en-or-intro-to-bash-01.png" caption="Figure 1. GUI of Ian Milligan's Computer" %} -Command-line interfaces have advantages for computer users who need more precision in their work -- such as digital historians. They allow for more detail when running some programs, as you can add modifiers to specify *exactly* how you want your program to run. Furthermore, they can be easily automated through [scripts](http://www.tldp.org/LDP/Bash-Beginners-Guide/html/chap_01.html), which are essentially recipes of text-based commands. +Command-line interfaces have advantages for computer users who need more precision in their work -- such as digital historians. They allow for more detail when running some programs, as you can add modifiers to specify *exactly* how you want your program to run. Furthermore, they can be easily automated through [scripts](https://www.tldp.org/LDP/Bash-Beginners-Guide/html/chap_01.html), which are essentially recipes of text-based commands. -There are two main command-line interfaces, or 'shells,' that many digital historians use. On OS X or many Linux installations, the shell is known as `bash`, or the 'Bourne-again shell.' For users on Windows-based systems, the command-line interface is by default `MS-DOS-based`, which uses different commands and [syntax](http://en.wikipedia.org/wiki/Syntax), but can often achieve similar tasks. This tutorial provides a basic introduction to the `bash` terminal, and Windows users can follow along by installing popular shells such as [Cygwin](https://www.cygwin.com/) or Git Bash (see below). +There are two main command-line interfaces, or 'shells,' that many digital historians use. On OS X or many Linux installations, the shell is known as `bash`, or the 'Bourne-again shell.' For users on Windows-based systems, the command-line interface is by default `MS-DOS-based`, which uses different commands and [syntax](https://en.wikipedia.org/wiki/Syntax), but can often achieve similar tasks. This tutorial provides a basic introduction to the `bash` terminal, and Windows users can follow along by installing popular shells such as [Cygwin](https://www.cygwin.com/) or Git Bash (see below). -This lesson uses a **[Unix shell](http://en.wikipedia.org/wiki/Unix_shell)**, which is a command-line interpreter that provides a user interface for the [Unix](http://en.wikipedia.org/wiki/Unix) operating system and for Unix-like systems. This lesson will cover a small number of basic commands. By the end of this tutorial you will be able to navigate through your file system and find files, open them, perform basic data manipulation tasks such as combining and copying files, as well as both reading them and making relatively simple edits. These commands constitute the building blocks upon which more complex commands can be constructed to fit your research data or project. Readers wanting a reference guide that goes beyond this lesson are recommended to read Deborah S. Ray and Eric J. Ray, *Unix and Linux: Visual Quickstart Guide*, 4th edition (2009). +This lesson uses a **[Unix shell](https://en.wikipedia.org/wiki/Unix_shell)**, which is a command-line interpreter that provides a user interface for the [Unix](https://en.wikipedia.org/wiki/Unix) operating system and for Unix-like systems. This lesson will cover a small number of basic commands. By the end of this tutorial you will be able to navigate through your file system and find files, open them, perform basic data manipulation tasks such as combining and copying files, as well as both reading them and making relatively simple edits. These commands constitute the building blocks upon which more complex commands can be constructed to fit your research data or project. Readers wanting a reference guide that goes beyond this lesson are recommended to read Deborah S. Ray and Eric J. Ray, *Unix and Linux: Visual Quickstart Guide*, 4th edition (2009). ## Windows Only: Installing Git Bash @@ -59,7 +59,7 @@ When you run it, you will see this window. {% include figure.html filename="en-or-intro-to-bash-03.png" caption="Figure 3. A blank terminal screen on our OS X workstation" %} -You might want to change the default visual appearance of the terminal, as eyes can strain at repeatedly looking at black text on a white background. In the default OS X application, you can open the 'Settings' menu in 'Preferences' under Terminal. Click on the 'Settings' tab and change it to a new colour scheme. We personally prefer something with a bit less contrast between background and foreground, as you'll be staring at this a great deal. 'Novel' is a soothing one as is the popular [Solarized](http://ethanschoonover.com/solarized) suite of colour palettes. For Windows users, a similar effect can be achieved using the Git Bash `Properties` tab. To reach this, right-click anywhere in the top bar and select `Properties`. +You might want to change the default visual appearance of the terminal, as eyes can strain at repeatedly looking at black text on a white background. In the default OS X application, you can open the 'Settings' menu in 'Preferences' under Terminal. Click on the 'Settings' tab and change it to a new colour scheme. We personally prefer something with a bit less contrast between background and foreground, as you'll be staring at this a great deal. 'Novel' is a soothing one as is the popular [Solarized](https://ethanschoonover.com/solarized) suite of colour palettes. For Windows users, a similar effect can be achieved using the Git Bash `Properties` tab. To reach this, right-click anywhere in the top bar and select `Properties`. {% include figure.html filename="en-or-intro-to-bash-04.png" caption="Figure 4. The Settings Screen on the OS X Terminal Shell Application" %} @@ -161,7 +161,7 @@ This moves us 'up' one directory, putting us back in `/Users/ianmilligan1/`. If will bring you right back to the home directory, right where you started. -Try exploring: visit your documents directory, your pictures, folders you might have on your desktop. Get used to moving in and out of directories. Imagine that you are navigating a [tree structure](http://en.wikipedia.org/wiki/Tree_structure). If you're on the desktop, you won't be able to `cd documents` as it is a 'child' of your home directory, whereas your Desktop is a 'sibling' of the Documents folder. To get to a sibling, you have to go back to the common parent. To do this, you will have to back up to your home directory (`cd ..`) and then go forward again to `cd documents`. +Try exploring: visit your documents directory, your pictures, folders you might have on your desktop. Get used to moving in and out of directories. Imagine that you are navigating a [tree structure](https://en.wikipedia.org/wiki/Tree_structure). If you're on the desktop, you won't be able to `cd documents` as it is a 'child' of your home directory, whereas your Desktop is a 'sibling' of the Documents folder. To get to a sibling, you have to go back to the common parent. To do this, you will have to back up to your home directory (`cd ..`) and then go forward again to `cd documents`. Being able to navigate your file system using the bash shell is very important for many of the lessons at the *Programming Historian*. As you become more comfortable, you'll soon find yourself skipping directly to the directory that you want. In our case, from anywhere on our system, you could type @@ -185,7 +185,7 @@ in Windows. That command will open up your GUI at the current directory. Make su ## Interacting with Files -As well as navigating directories, you can interact with files on the command line: you can read them, open them, run them, and even edit them, often without ever having to leave the interface. There is some debate over why one would do this. The primary reason is the seamless experience of working on the command line: you never have to pick up your mouse or touch your track pad, and, although it has a steep learning curve it can eventually become a sole writing environment. Furthermore, many programs require you to use the command line to operate with them. Since you'll be using programs on the command line, it can often be quicker to make small edits without switching into a separate program. For some of these arguments, see Jon Beltran de Heredia's ["Why, oh WHY, do those #?@! nutheads use vi?"](http://www.viemu.com/a-why-vi-vim.html). +As well as navigating directories, you can interact with files on the command line: you can read them, open them, run them, and even edit them, often without ever having to leave the interface. There is some debate over why one would do this. The primary reason is the seamless experience of working on the command line: you never have to pick up your mouse or touch your track pad, and, although it has a steep learning curve it can eventually become a sole writing environment. Furthermore, many programs require you to use the command line to operate with them. Since you'll be using programs on the command line, it can often be quicker to make small edits without switching into a separate program. For some of these arguments, see Jon Beltran de Heredia's ["Why, oh WHY, do those #?@! nutheads use vi?"](https://www.viemu.com/a-why-vi-vim.html). Here's a few basic ways to interact with files. @@ -197,7 +197,7 @@ This creates a directory named, you guessed it, 'ProgHist-Text.' In general, it' But wait! There's a trick to make things a bit quicker. Go up one directory (`cd ..` - which will take you back to the Desktop). To navigate to the `ProgHist-Text` directory you could type `cd ProgHist-Text`. Alternatively, you could type `cd Prog` and then hit tab. You will notice that the interface completes the line to `cd ProgHist-Text`. **Hitting tab at any time within the shell will prompt it to attempt to auto-complete the line based on the files or sub-directories in the current directory. This is case sensitive, however (i.e. in the previous example, `cd prog` would not auto complete to `ProgHist-Text`. Where two or more files have the same characters, the auto-complete will only fill up to the first point of difference. We would encourage using this method throughout the lesson to see how it behaves.** -Now you need to find a basic text file to help us with the example. Why don't you use a book that you know is long, such as Leo Tolstoy's epic *War and Peace*. The text file is availiable via [Project Gutenberg](http://www.gutenberg.org/ebooks/2600). If you have already installed [wget](/lessons/applied-archival-downloading-with-wget), you can just type +Now you need to find a basic text file to help us with the example. Why don't you use a book that you know is long, such as Leo Tolstoy's epic *War and Peace*. The text file is availiable via [Project Gutenberg](https://www.gutenberg.org/ebooks/2600). If you have already installed [wget](/lessons/applied-archival-downloading-with-wget), you can just type `wget http://www.gutenberg.org/files/2600/2600-0.txt` @@ -267,7 +267,7 @@ and hit enter, a combination of all the .txt files in the current directory are ## Editing Text Files Directly on the Command Line -If you want to read a file in its entirety without leaving the command line, you can fire up [vim](http://en.wikipedia.org/wiki/Vim_%28text_editor%29). Vim is a very powerful text editor, which is perfect for using with programs such as [Pandoc](https://pandoc.org/) to do word processing, or for editing your code without having to switch to another program. Best of all, it comes included with bash on both OS X and Windows. Vim has a fairly steep learning curve, so we will just touch on a few minor points. +If you want to read a file in its entirety without leaving the command line, you can fire up [vim](https://en.wikipedia.org/wiki/Vim_%28text_editor%29). Vim is a very powerful text editor, which is perfect for using with programs such as [Pandoc](https://pandoc.org/) to do word processing, or for editing your code without having to switch to another program. Best of all, it comes included with bash on both OS X and Windows. Vim has a fairly steep learning curve, so we will just touch on a few minor points. Type @@ -277,7 +277,7 @@ You should see vim come to life before you, a command-line based text editor. {% include figure.html filename="en-or-intro-to-bash-06.png" caption="Figure 6. Vim" %} -If you really want to get into Vim, there is a [good Vim guide](http://vimdoc.sourceforge.net/htmldoc/quickref.html) available. +If you really want to get into Vim, there is a [good Vim guide](https://vimdoc.sourceforge.net/htmldoc/quickref.html) available. Using Vim to read files is relatively simple. You can use the arrow keys to navigate around and could theoretically read *War and Peace* through the command line (one should get an achievement for doing that). Some quick basic navigational commands are as follows: diff --git a/en/lessons/intro-to-linked-data.md b/en/lessons/intro-to-linked-data.md index a25d82a149..4554eb5748 100755 --- a/en/lessons/intro-to-linked-data.md +++ b/en/lessons/intro-to-linked-data.md @@ -47,7 +47,7 @@ If you need to learn how to explore LOD using the query language [SPARQL](https: In order to provide readers with a solid grounding in the basic principles of LOD, this tutorial will not be able to offer a comprehensive coverage of all LOD concepts. The following two LOD concepts will *not* be the focus of this lesson: 1. The [semantic web](https://en.wikipedia.org/wiki/Semantic_Web) and [semantic reasoning](https://en.wikipedia.org/wiki/Semantic_reasoner) of [datasets](https://en.wikipedia.org/wiki/Data_set). A semantic reasoner would deduce that George VI is the brother or half-brother of Edward VIII, given the fact that a) Edward VIII is the son of George V and b) George VI is the son of George V. This tutorial does not focus on this type of task. -2. Creating and uploading linked open datasets to the [linked data cloud](http://linkeddatacatalog.dws.informatik.uni-mannheim.de/state/). Sharing your LOD is an important principle, which is encouraged below. However, the practicalities of contributing your LOD to the linked data cloud are beyond the scope of this lesson. Some resources that can help you get started with this task are available at the end of this tutorial. +2. Creating and uploading linked open datasets to the [linked data cloud](https://linkeddatacatalog.dws.informatik.uni-mannheim.de/state/). Sharing your LOD is an important principle, which is encouraged below. However, the practicalities of contributing your LOD to the linked data cloud are beyond the scope of this lesson. Some resources that can help you get started with this task are available at the end of this tutorial. ## Linked open data: what is it? @@ -71,7 +71,7 @@ Using the model above in which each person is represented by a unique number, le person=64183282 -And let's make Jack Straw described by the *[Oxford Dictionary of National Biography](http://www.oxforddnb.com)* as 'the enigmatic rebel leader', number `33059614`, making his attribute-value pair look like this: +And let's make Jack Straw described by the *[Oxford Dictionary of National Biography](https://www.oxforddnb.com)* as 'the enigmatic rebel leader', number `33059614`, making his attribute-value pair look like this: person=33059614 @@ -81,7 +81,7 @@ The attribute-value pairs can also store information about other types of entiti place=2655524 -At this point you might be thinking, "that's what a library catalogue does". It's true that the key idea here is that of the [authority file](https://en.wikipedia.org/wiki/Authority_control), which is central in library science (an authority file is a definitive list of terms which can be used in a particular context, for example when cataloguing a book). In both of the examples outlined above, we have used authority files to assign the numbers (the unique ids) to the Jacks and to Blackburn. The numbers we used for the two Jack Straws come from the [Virtual International Authority File](https://viaf.org) (VIAF), which is maintained by a consortium of libraries worldwide to try to address the problem of the myriad ways in which the same person might be referred to. The unique identifier we used for the Blackburn constituency came from [GeoNames](http://www.geonames.org/), a free geographical database. +At this point you might be thinking, "that's what a library catalogue does". It's true that the key idea here is that of the [authority file](https://en.wikipedia.org/wiki/Authority_control), which is central in library science (an authority file is a definitive list of terms which can be used in a particular context, for example when cataloguing a book). In both of the examples outlined above, we have used authority files to assign the numbers (the unique ids) to the Jacks and to Blackburn. The numbers we used for the two Jack Straws come from the [Virtual International Authority File](https://viaf.org) (VIAF), which is maintained by a consortium of libraries worldwide to try to address the problem of the myriad ways in which the same person might be referred to. The unique identifier we used for the Blackburn constituency came from [GeoNames](https://www.geonames.org/), a free geographical database. But let's try to be more precise by what we mean by Blackburn in this instance. Jack Straw represented the parliamentary consitituency (an area represented by a single member of parliament) of Blackburn, which has changed its boundaries over time. The '[Digging Into Linked Parliamentary Data](https://repository.jisc.ac.uk/6544/)' (Dilipad) project (on which I worked), produced unique identifiers for party affiliations and constituencies for each member of parliament. In this example, Jack Straw represented the constituency known as 'Blackburn' in its post-1955 incarnation: @@ -127,19 +127,19 @@ In the previous section we used two different numbers to identify our two differ The problem is that around the world there are many databases that contain people with these numbers, and they're probably all different people. Outside of our immediate context these numbers don't identify unique individuals. Let's try to fix that. Here are these same identifiers but as URIs: - http://viaf.org/viaf/64183282/ + https://viaf.org/viaf/64183282/ - http://viaf.org/viaf/33059614/ + https://viaf.org/viaf/33059614/ Just as the unique number disambiguated our two Jack Straws, the full URI above helps us disambiguate between all of the different authority files out there. In this case, it's clear that we are using VIAF as our authority file. You have already seen this form of disambuguation many times on the web. There are many websites round the world with pages called `/home` or `/faq`. But there is no confusion because the [domain](https://en.wikipedia.org/wiki/Domain_name) (the first part of the [Uniform Resource Locator](https://en.wikipedia.org/wiki/Uniform_Resource_Locator) (URL) - eg. `bbc.co.uk`) is unique and thus all pages that are part of that domain are unique from other `/faq` pages on other websites. In the address `http://www.bbc.co.uk/faqs` it is the `bbc.co.uk` part which makes the subsequent pages unique. This is so obvious to people who use the web all the time that they don't think about it. You probably also know that if you want to start a website called `bbc.co.uk` you can't, because that name has already been registered with the appropriate authority, which is the [Domain Name System](https://en.wikipedia.org/wiki/Domain_Name_System). The registration guarantees uniqueness. URIs also have to be unique. While the examples above look like URLs, it is also possible to construct a URI that looks nothing like a URL. We have many ways of uniquely identifying people and things and we rarely think or worry about it. Barcodes, passport numbers, and even your postal address are all designed to be unique. Mobile phone numbers are frequently put up as shop signs precisely because they are unique. All of these could be used as URIs. -When we wanted to create URIs for the entities described by the '[Tobias](http://www.history.ac.uk/projects/digital/tobias)' project, we chose a URL-like structure, and chose to use our institutional webspace, setting aside `data.history.ac.uk/tobias-project/` as a place dedicated to hosting these URIs. By putting it at `data.history.ac.uk` rather than `history.ac.uk`, there was a clear separation between URIs and the pages of the website. For example, one of the URIs from the Tobias project was http://data.history.ac.uk/tobias-project/person/15601. While the format of the abovementioned URIs is the same as a URL, they do not link to web pages (try pasting it of them into a web browser). Many people new to LOD find this confusing. All URLs are URIs but not all URIs are URLs. A URI can describe anything at all, whereas URL describes the location of something on the web. So a URL tells you the location of a web page or a file or something similar. A URI just does the job of identifying something. Just as the International Standard Book Number, or [ISBN](https://www.iso.org/standard/36563.html) `978-0-1-873354-6` uniquely identifies a hardback edition of _Baptism, Brotherhood and Belief in Reformation Germany_ by Kat Hill, but doesn't tell you where to get a copy. For that you would need something like a library [shelfmark](https://en.wikipedia.org/wiki/Accession_number_(library_science)), which gives you an exact location on a shelf of a specific library. +When we wanted to create URIs for the entities described by the '[Tobias](https://www.history.ac.uk/projects/digital/tobias)' project, we chose a URL-like structure, and chose to use our institutional webspace, setting aside `data.history.ac.uk/tobias-project/` as a place dedicated to hosting these URIs. By putting it at `data.history.ac.uk` rather than `history.ac.uk`, there was a clear separation between URIs and the pages of the website. For example, one of the URIs from the Tobias project was https://data.history.ac.uk/tobias-project/person/15601. While the format of the abovementioned URIs is the same as a URL, they do not link to web pages (try pasting it of them into a web browser). Many people new to LOD find this confusing. All URLs are URIs but not all URIs are URLs. A URI can describe anything at all, whereas URL describes the location of something on the web. So a URL tells you the location of a web page or a file or something similar. A URI just does the job of identifying something. Just as the International Standard Book Number, or [ISBN](https://www.iso.org/standard/36563.html) `978-0-1-873354-6` uniquely identifies a hardback edition of _Baptism, Brotherhood and Belief in Reformation Germany_ by Kat Hill, but doesn't tell you where to get a copy. For that you would need something like a library [shelfmark](https://en.wikipedia.org/wiki/Accession_number_(library_science)), which gives you an exact location on a shelf of a specific library. There is a little bit of jargon around URIs. People talk about whether they are, or are not, [dereferenceable](https://en.wikipedia.org/wiki/Reference_(computer_science)). That just means *can it be turned from an abstract reference into something else?* For example, if you paste a URI into the address bar of a browser, will it return something? The VIAF URI for historian Simon Schama is: - http://viaf.org/viaf/46784579 + https://viaf.org/viaf/46784579 If you put that into the browser you will get back a web page about Simon Schama which contains structured data about him and his publishing history. This is very handy - for one thing, it's not obvious from the URI who or even what is being referred to. Similarly, if we treated a mobile phone number (with international code) as the URI for a person then it should be dereferenceable. Someone might answer the phone, and it might even be Schama. @@ -166,9 +166,9 @@ We're making up examples simply for the purposes of illustration, but if you wan An ontology is more flexible because it is non-hierarchical. It aims to represent the fluidity of the real world, where things can be related to each other in more complex ways than are represented by a hierarchical tree-like structure. Instead, an ontology is more like a spider's web. -Whatever you are looking to represent with LOD, we suggest that you find an existing vocabulary and use it, rather than try to write your own. The main page here has [a list of some of the most popular vocabularies](http://semanticweb.org/wiki/Main_Page.html). +Whatever you are looking to represent with LOD, we suggest that you find an existing vocabulary and use it, rather than try to write your own. The main page here has [a list of some of the most popular vocabularies](https://semanticweb.org/wiki/Main_Page.html). -Since our example above focuses on pianists, it would be a good idea to find an appropriate ontology rather than create our own system. In fact there is [an ontology for music](http://web.archive.org/web/20170715094229/http://www.musicontology.com/). As well as a well-developed specification it also has some useful examples of its use. You can have a look at the [Getting started pages](http://web.archive.org/web/20170718143925/http://musicontology.com/docs/getting-started.html) to get a sense of how you might use that particular ontology. +Since our example above focuses on pianists, it would be a good idea to find an appropriate ontology rather than create our own system. In fact there is [an ontology for music](https://web.archive.org/web/20170715094229/https://www.musicontology.com/). As well as a well-developed specification it also has some useful examples of its use. You can have a look at the [Getting started pages](https://web.archive.org/web/20170718143925/https://musicontology.com/docs/getting-started.html) to get a sense of how you might use that particular ontology. Unfortunately I can't find anything that describes the relationship between a teacher and a pupil in the Music Ontology. But the ontology is published openly, so I can use it to describe other features of music and then create my own extension. If I then publish my extension openly, others can use it if they wish and it may become a standard. While the Music Ontology project does not have the relationship I need, the [Linked Jazz project](https://linkedjazz.org/) allows use of 'mentorOf', which sounds like it would work nicely in our case. While this is not an ideal solution, it is one that makes an effort to use what is already out there. @@ -214,19 +214,19 @@ Recognising what serialisation you are looking at means that you can then choose Turtle uses aliases or a shortcuts known as [prefixes](https://www.w3.org/TeamSubmission/turtle/#sec-tutorial), which saves us having to write out full URIs every time. Let's go back to the URI we invented in the previous section: - http://data.history.ac.uk/tobias-project/person/15601 + https://data.history.ac.uk/tobias-project/person/15601 We don't want to type this out every time we refer to this person (Jack Straw, you'll remember). So we just have to announce our shortcut: - @prefix toby: . + @prefix toby: . Then Jack is `toby:15601`, which replaces the long URI and is easier on the eye. I have chosen 'toby', but could just as easily chosen any string of letters. Let's now move from Jack Straw to William Shakespeare and use Turtle to describe some stuff about his works. We'll need to decide on the authority files to use, a process which, as mentioned above, is best gleaned from looking at other LOD sets. Here we'll use [Dublin Core](https://en.wikipedia.org/wiki/Dublin_Core), a library [metadata](https://en.wikipedia.org/wiki/Metadata) standard, as one of our prefixes, the [Library of Congress Control Number](https://en.wikipedia.org/wiki/Library_of_Congress_Control_Number) authority file for another, and the last one (VIAF) should be familiar to you. Together these three authority files provide unique identifiers for all of the entities I plan to use in this example.: - @prefix lccn: . - @prefix dc: . - @prefix viaf: . + @prefix lccn: . + @prefix dc: . + @prefix viaf: . lccn:n82011242 dc:creator viaf:96994048 . @@ -236,9 +236,9 @@ In the above example, lccn:n82011242 represents Macbeth; dc:creator links Macbet Turtle also allows you to list triples without bothering to repeat each URI when you've only just used it. Let's add the date when scholars think Macbeth was written, using the Dublin Core attribute-value pair: `dc:created 'YYYY'`: - @prefix lccn: . - @prefix dc: . - @prefix viaf: . + @prefix lccn: . + @prefix dc: . + @prefix viaf: . lccn:n82011242 dc:creator viaf:96994048 ; dc:created "1606" . @@ -257,11 +257,11 @@ You can use a semicolon if the subject is the same but the predicate and object Here we're saying that Shakespeare (96994048) and John Fletcher (12323361) were both the creators of the work *The Two Noble Kinsmen*. -When we looked at ontologies earlier I suggested you have a look at the examples from [the Music Ontology](http://web.archive.org/web/20170718143925/http://musicontology.com/docs/getting-started.html). I hope they didn't put you off. Have a look again now. This is still complicated stuff, but do they make more sense now? +When we looked at ontologies earlier I suggested you have a look at the examples from [the Music Ontology](https://web.archive.org/web/20170718143925/https://musicontology.com/docs/getting-started.html). I hope they didn't put you off. Have a look again now. This is still complicated stuff, but do they make more sense now? One of the most approachable ontologies is Friend of a Friend, or [FOAF](https://en.wikipedia.org/wiki/FOAF_(ontology)). This is designed to describe people, and is perhaps for that reason, fairly intuitive. If, for example, you want to write to tell me that this course is the best thing you've ever read, here is my email address expressed as triples in FOAF: - @prefix foaf: . + @prefix foaf: . :"Jonathan Blaney" foaf:mbox . @@ -286,30 +286,30 @@ The RDF/XML format has the same basic information as Turtle, but is written very Let's move on to a different example to show how RDF/XML combines triples and, at the same time, introduce [Simple Knowledge Organization System](https://en.wikipedia.org/wiki/Simple_Knowledge_Organization_System) (SKOS), which is designed for encoding thesauri or taxonomies. - + Abdication -Here we are saying that the SKOS concept `21250`, abdication, has a preferred label of "abdication". The way it works is that the subject element (including the abdication part, which is an attribute value in XML terms) has the predicate and object nested inside it. The nested element is the predicate and [the leaf node](https://en.wikipedia.org/wiki/Tree_(data_structure)#Terminology), is the object. This example is taken from a project to publish a [thesaurus of British and Irish History](http://www.history.ac.uk/projects/digital/tobias). +Here we are saying that the SKOS concept `21250`, abdication, has a preferred label of "abdication". The way it works is that the subject element (including the abdication part, which is an attribute value in XML terms) has the predicate and object nested inside it. The nested element is the predicate and [the leaf node](https://en.wikipedia.org/wiki/Tree_(data_structure)#Terminology), is the object. This example is taken from a project to publish a [thesaurus of British and Irish History](https://www.history.ac.uk/projects/digital/tobias). Just as with Turtle, we can add more triples. So let's declare that the narrower term in our subject hierarchy, one down from *Abdication* is going to be *Abdication crisis (1936)*. - + Abdication - - + + Remember how predicates and objects are nested inside the subject? Here we've done that twice with the same subject, so we can make this less verbose by nesting both sets of predicates and objects inside the one subject: - + Abdication - + -If you're familiar with XML this will be like mother's milk to you. If you're not you might prefer a format like Turtle. But the advantage here is that in creating my RDF/XML you can use the usual tools available with XML, like dedicated XML editors and parsers, to check that your RDF/XML is correctly formatted. If you're not an XML person I recommend Turtle, for which you can use an [online tool](http://www.easyrdf.org/converter) to check your syntax is correct. +If you're familiar with XML this will be like mother's milk to you. If you're not you might prefer a format like Turtle. But the advantage here is that in creating my RDF/XML you can use the usual tools available with XML, like dedicated XML editors and parsers, to check that your RDF/XML is correctly formatted. If you're not an XML person I recommend Turtle, for which you can use an [online tool](https://www.easyrdf.org/converter) to check your syntax is correct. ## Querying RDF with SPARQL @@ -321,7 +321,7 @@ As I mentioned at the beginning, *Programming Historian* has [a complete lesson] We're going to run our SPARQL queries on [DBpedia](https://en.wikipedia.org/wiki/DBpedia), which is a huge LOD set derived from Wikipedia. As well as being full of information that is very difficult to find through the usual Wikipedia interface, it has several SPARQL "end points" - interfaces where you can type in SPARQL queries and get results from DBpedia's triples. -The SPARQL query end point I use is called [snorql](http://dbpedia.org/snorql/). These end points occasionally seem to go offline, so if that should be the case, try searching for *dbpedia sparql* and you should find a similar replacement. +The SPARQL query end point I use is called [snorql](https://dbpedia.org/snorql/). These end points occasionally seem to go offline, so if that should be the case, try searching for *dbpedia sparql* and you should find a similar replacement. If you go to the snorql URL above you will see at first that a number of prefixes have already been declared for us, which is handy. You'll recognise some of the prefixes now too. @@ -366,7 +366,7 @@ Back to the results for the query I ran a moment ago: I can see a long list in the column labelled _c_. These are all the attributes Roper has in *DBpedia* and will help us to find other people with these attributes. For example I can see ```http://dbpedia.org/class/yago/Historian110177150```. Can I use this to get a list of historians? I'm going to put this into my query but in third place (because that's where it was when I found it in the Lyndal Roper results. My query looks like this: SELECT * WHERE { - ?historian_name ?predicate + ?historian_name ?predicate } I've made a small change here. If this query works at all then I expect my historians to be in the first column, because 'historian' doesn't look like it could be a predicate: it doesn't function like a verb in a sentence; so I'm going to call my first results column 'historian_name' and my second (which I don't know anything about) 'predicate'. @@ -375,12 +375,12 @@ Run the query. Does it work for you? I get a big list of historians. {% include figure.html filename="en-or-intro-to-linked-data-05.png" caption="Figure 5. Historians, according to DBpedia." %} -So this works for creating lists, which is useful, but it would much more powerful to combine lists, to get intersections of sests. I found a couple more things that might be interesting to query in Lyndal Roper's DBpedia attributes: and . It's very easy to combine these by asking for a variable to be returned (in our case this is `?name`) and then using that in multiple lines of a query. Note as well the space and full point at the end of the first line beginning with `?name`: +So this works for creating lists, which is useful, but it would much more powerful to combine lists, to get intersections of sests. I found a couple more things that might be interesting to query in Lyndal Roper's DBpedia attributes: and . It's very easy to combine these by asking for a variable to be returned (in our case this is `?name`) and then using that in multiple lines of a query. Note as well the space and full point at the end of the first line beginning with `?name`: SELECT ?name WHERE { - ?name ?b . - ?name ?b + ?name ?b . + ?name ?b } It works! I get five results. At the time of writing, there are five British, women historians in *DBpedia*... @@ -398,15 +398,15 @@ However, despite its inconsistencies, *DBpedia* is a great place to learn SPARQL * Dean Allemang and James Hendler, *Semantic Web for the Working Ontologist*, 2nd edn, Elsevier, 2011 * Tim Berners-Lee [Linked Data](https://www.w3.org/DesignIssues/LinkedData.html) * Bob DuCharme, *Learning SPARQL*, O'Reilly, 2011 -* [Bob DuCharme's blog](http://www.snee.com/bobdc.blog/) is also worth reading +* [Bob DuCharme's blog](https://www.snee.com/bobdc.blog/) is also worth reading * Richard Gartner, *Metadata: Shaping Knowledge from Antiquity to the Semantic Web*, Springer, 2016 * Seth van Hooland and Ruben Verborgh, *Linked Data for Libraries, Archives and Museums*, 2015 * Matthew Lincoln ['Using SPARQL to access Linked Open Data'](/lessons/graph-databases-and-SPARQL) -* [Linked Data guides and tutorials](http://linkeddata.org/guides-and-tutorials) +* [Linked Data guides and tutorials](https://linkeddata.org/guides-and-tutorials) * Dominic Oldman, Martin Doerr and Stefan Gradmann, 'Zen and the Art of Linked Data: New Strategies for a Semantic Web of Humanist * Knowledge', in *A New Companion to Digital Humanities*, edited by Susan Schreibman et al. -* Max Schmachtenberg, Christian Bizer and Heiko Paulheim, [State of the LOD Cloud 2017](http://linkeddatacatalog.dws.informatik.uni-mannheim.de/state/) +* Max Schmachtenberg, Christian Bizer and Heiko Paulheim, [State of the LOD Cloud 2017](https://linkeddatacatalog.dws.informatik.uni-mannheim.de/state/) * David Wood, Marsha Zaidman and Luke Ruth, *Linked Data: Structured data on the Web*, Manning, 2014 ## Acknowlegements -I'd like to thank my two peer reviewers, Matthew Lincoln and Terhi Nurmikko-Fuller, and my editor, Adam Crymble, for generously spending time helping me to improve this course with numerous suggestions, clarification and corrections. This tutorial is based on one written as part of the 'Thesaurus of British and Irish History as SKOS' [(Tobias) project](http://www.history.ac.uk/projects/digital/tobias), funded by the [AHRC](http://www.ahrc.ac.uk/). It has been revised for the *Programming Historian*. +I'd like to thank my two peer reviewers, Matthew Lincoln and Terhi Nurmikko-Fuller, and my editor, Adam Crymble, for generously spending time helping me to improve this course with numerous suggestions, clarification and corrections. This tutorial is based on one written as part of the 'Thesaurus of British and Irish History as SKOS' [(Tobias) project](https://www.history.ac.uk/projects/digital/tobias), funded by the [AHRC](https://www.ahrc.ac.uk/). It has been revised for the *Programming Historian*. diff --git a/en/lessons/intro-to-twitterbots.md b/en/lessons/intro-to-twitterbots.md index 088234f506..f5a50a5b41 100755 --- a/en/lessons/intro-to-twitterbots.md +++ b/en/lessons/intro-to-twitterbots.md @@ -31,10 +31,10 @@ Access to Twitter’s API has recently changed. The Free Tier no longer allows u # An Introduction to Twitter Bots with Tracery -This lesson explains how to create simple twitterbots using the [Tracery generative grammar](http://tracery.io) and the [Cheap Bots Done Quick](http://cheapbotsdonequick.com/) service. Tracery exists in multiple languages and can be integrated into websites, games, bots. You may fork it [on github here](https://github.com/galaxykate/tracery/tree/tracery2). +This lesson explains how to create simple twitterbots using the [Tracery generative grammar](https://tracery.io) and the [Cheap Bots Done Quick](https://cheapbotsdonequick.com/) service. Tracery exists in multiple languages and can be integrated into websites, games, bots. You may fork it [on github here](https://github.com/galaxykate/tracery/tree/tracery2). ## Why bots? -Strictly speaking, a twitter bot is a piece of software for automated controlling a Twitter account. When thousands of these are created and are tweeting more or less the same message, they have the ability to shape discourse on Twitter which then can influence other media discourses. Bots of this kind [can even be seen as credible sources of information](http://www.sciencedirect.com/science/article/pii/S0747563213003129). Projects such as [Documenting the Now](http://www.docnow.io/) are creating tools to allow researchers to create and query archives of social media around current events - and which will naturally contain many bot-generated posts. In this tutorial, I want to demonstrate how one can build a simple twitterbot so that, knowing how they operate, historians may more easily spot the bots in our archives - and perhaps counter with bots of their own. +Strictly speaking, a twitter bot is a piece of software for automated controlling a Twitter account. When thousands of these are created and are tweeting more or less the same message, they have the ability to shape discourse on Twitter which then can influence other media discourses. Bots of this kind [can even be seen as credible sources of information](https://www.sciencedirect.com/science/article/pii/S0747563213003129). Projects such as [Documenting the Now](https://www.docnow.io/) are creating tools to allow researchers to create and query archives of social media around current events - and which will naturally contain many bot-generated posts. In this tutorial, I want to demonstrate how one can build a simple twitterbot so that, knowing how they operate, historians may more easily spot the bots in our archives - and perhaps counter with bots of their own. But I believe also that there is space in digital history and the digital humanities more generally for creative, expressive, artistic work. I belive that there is space for programming historians to use the affordances of digital media to create _things_ that could not otherwise exist to move us, to inspire us, to challenge us. There is room for satire; there is room for comment. With Mark Sample, I believe that there is a need for '[bots of conviction](https://medium.com/@samplereality/a-protest-bot-is-a-bot-so-specific-you-cant-mistake-it-for-bullshit-90fe10b7fbaa)'. @@ -76,11 +76,11 @@ Some suggestions to get you thinking, from individuals on Twitter who responded > @electricarchaeo A bot imagining the reactions of Afghans, Iraqis, Syrians, Yemenis, when their family members are killed by drone attacks. — Cory Taylor (@CoryTaylor_) April 22, 2017 -Given that so much historical data is expressed on the web as [JSON](http://json.org/), a bit of digging should find you data that you can actually fold into your bot. +Given that so much historical data is expressed on the web as [JSON](https://json.org/), a bit of digging should find you data that you can actually fold into your bot. My method is that of the bricoleur, the person who adapts and pastes together the bits and bobs of code that he finds; in truth, most programming functions this way. There are many packages available that will interface with Twitter's API, in various languages. There is little 'programming' in this lesson in the sense of writing bots in (for instance) Python. In this introductory lesson, I will show you how to build bots that tell stories, that write poetry, that do wonderful things using Tracery.io as our _generative grammar_, in conjunction with the Cheap Bots Done Quick service to host the bot. For more tutorials on building and hosting Twitter bots with other services, see [the Botwiki tutorial list](https://botwiki.org/tutorials/twitterbots/). -My most successful bot has been [@tinyarchae](http://twitter.com/tinyarchae), a bot that tweets scenes from a horrible dsyfunctional archaeological excavation project. Every archaeological project deals with problems of sexism, abuse, and bad faith; @tinyarchae pushes the stuff of conference whispers to a ridiculous extreme. It is a caricature that contains a kernel of uncomfortable truth. Other bots I have built glitch [archaeological photography](https://twitter.com/archaeoglitch); one is actually useful, in that it is [tweeting out new journal articles in archaeology](https://twitter.com/botarchaeo) and so serves as a research assistant. (For more thoughts on the role bots play in public archaeology, see this [keynote](https://electricarchaeology.ca/2017/04/27/bots-of-archaeology-machines-writing-public-archaeology/) from the [Public Archaeology Twitter Conference](http://web.archive.org/web/20180131161516/https://publicarchaeologyconference.wordpress.com/)). +My most successful bot has been [@tinyarchae](https://twitter.com/tinyarchae), a bot that tweets scenes from a horrible dsyfunctional archaeological excavation project. Every archaeological project deals with problems of sexism, abuse, and bad faith; @tinyarchae pushes the stuff of conference whispers to a ridiculous extreme. It is a caricature that contains a kernel of uncomfortable truth. Other bots I have built glitch [archaeological photography](https://twitter.com/archaeoglitch); one is actually useful, in that it is [tweeting out new journal articles in archaeology](https://twitter.com/botarchaeo) and so serves as a research assistant. (For more thoughts on the role bots play in public archaeology, see this [keynote](https://electricarchaeology.ca/2017/04/27/bots-of-archaeology-machines-writing-public-archaeology/) from the [Public Archaeology Twitter Conference](https://web.archive.org/web/20180131161516/https://publicarchaeologyconference.wordpress.com/)). # Planning: What will your bot do? @@ -90,7 +90,7 @@ We begin with pad and paper. As a child in elementary school, one activity we of and students would fill in the blanks appropriately. It was silly; and it was fun. Twitterbots are to madlibs what sports cars are to horse and wagons. The blanks that we might fill in could be values in svg vector graphics. They could be numbers in numeric file names (and thus tweet random links to an open database, say). They could be, yes, even nouns and adverbs. Since Twitterbots live on the web, the building blocks that we put together can be more than text (although, for the time being, text will be easiest to work with). -We are going to start by sketching out a _replacement grammar_. The conventions of this grammar were developed by Kate Compton ([@galaxykate](https://twitter.com/galaxykate) on Twitter); it's called [Tracery.io](http://tracery.io). It can be used as a javascript library in webpages, in games, and in bots. A replacement grammar works rather similarly to the madlibs you might remember as a child. +We are going to start by sketching out a _replacement grammar_. The conventions of this grammar were developed by Kate Compton ([@galaxykate](https://twitter.com/galaxykate) on Twitter); it's called [Tracery.io](https://tracery.io). It can be used as a javascript library in webpages, in games, and in bots. A replacement grammar works rather similarly to the madlibs you might remember as a child. *In order to make it clear what the _grammar_ is doing, we are going to _not_ create a history bot for the time being. I want to make it clear what the grammar does, and so we will build something surreal to surface how that grammar works.* @@ -202,9 +202,9 @@ Remember that your bot will be appearing in other people's timelines. The potent You can plumb a bot into your own, current, account, but you probably don't want a bot tweeting _as_ you or _for_ you. In which case, set up a new Twitter account. When you set up a new Twitter account, Twitter will want an email address. You can use a brand new email address, or, if you have a Gmail account, you can use the `+tag` trick, ie instead of 'johndoe' at gmail, you use `johndoe+twitterbot` at gmail. Twitter will accept that as a distinct email from your usual email. -Normally, when one is building a Twitterbot, one has to create an app on twitter (at [apps.twitter.com](http://apps.twitter.com)), obtain the consumer secret and key, and the access token and key. Then you have to program in authentication so that Twitter knows that the program trying to access the platform is permitted. +Normally, when one is building a Twitterbot, one has to create an app on twitter (at [apps.twitter.com](https://apps.twitter.com)), obtain the consumer secret and key, and the access token and key. Then you have to program in authentication so that Twitter knows that the program trying to access the platform is permitted. -Fortunately, we do not have to do that, since George Buckenham created the bot hosting site '[Cheap Bots Done Quick](http://cheapbotsdonequick.com/)'. (That website also shows the JSON source grammar for a number of different bots, which can serve as inspiration). Once you've created your bot's Twitter account - and you are logged in to Twitter as the bot account- go to Cheap Bots Done Quick and hit the 'sign in with Twitter' button. The site will redirect you to Twitter to approve authorization, and then bring you back to Cheap Bots Done Quick. +Fortunately, we do not have to do that, since George Buckenham created the bot hosting site '[Cheap Bots Done Quick](https://cheapbotsdonequick.com/)'. (That website also shows the JSON source grammar for a number of different bots, which can serve as inspiration). Once you've created your bot's Twitter account - and you are logged in to Twitter as the bot account- go to Cheap Bots Done Quick and hit the 'sign in with Twitter' button. The site will redirect you to Twitter to approve authorization, and then bring you back to Cheap Bots Done Quick. The JSON that describes your bot can be written or pasted into the main white box. Take the JSON from the editor and paste it into the main white box. If there are any errors in your JSON, the output box at the bottom will turn red and the site will try to give you an indication of where things have gone wrong. In most cases, this will be because of an errant comma or quotation mark. If you hit the refresh button to the right of the output box (NOT the browser refresh button!), the site will generate new text from your grammar. @@ -224,7 +224,7 @@ As Cheap Bots Done Quick is a service provided by George Buckenham out of a spir > If you create a bot I deem abusive or otherwise unpleasant (for example, @mentioning people who have not consented, posting insults or using slurs) I will take it down -Other pointers for good bot citizenship are provided by Darius Kazemi, one of the great bot artists, are discussed [here](http://tinysubversions.com/2013/03/basic-twitter-bot-etiquette/). +Other pointers for good bot citizenship are provided by Darius Kazemi, one of the great bot artists, are discussed [here](https://tinysubversions.com/2013/03/basic-twitter-bot-etiquette/). # Going further with Tracery Many bots are a good deal more complicated than what we have described here, but it is enough to get you started. Some surprisingly effective bots can be created using Tracery. @@ -249,7 +249,7 @@ The modifiers `.capitalize` and `.s` are added inside the `#` of the symbol they ## Use Emoji -Emoji can be used to great effect in Twitterbots. You can copy and paste emoji directly into the Cheap Bots Done Quick editor; each emoji should be within quotation marks as any other rule would be. Use [this list](http://unicode.org/emoji/charts/full-emoji-list.html) to find the emoji you wish to use, and make sure to copy and paste the emoji from the Twitter column to ensure that your emoji will display. +Emoji can be used to great effect in Twitterbots. You can copy and paste emoji directly into the Cheap Bots Done Quick editor; each emoji should be within quotation marks as any other rule would be. Use [this list](https://unicode.org/emoji/charts/full-emoji-list.html) to find the emoji you wish to use, and make sure to copy and paste the emoji from the Twitter column to ensure that your emoji will display. ## Reusing Generated Symbols with Actions @@ -273,7 +273,7 @@ This feature probably would not be used much in the case of a Twitterbot, but if } ``` -Another, slightly more complex example is example number 5 at Kate Compton's own tutorial site at [http://www.crystalcodepalace.com/traceryTut.html](http://www.crystalcodepalace.com/traceryTut.html): +Another, slightly more complex example is example number 5 at Kate Compton's own tutorial site at [https://www.crystalcodepalace.com/traceryTut.html](https://www.crystalcodepalace.com/traceryTut.html): ```JSON { @@ -291,7 +291,7 @@ Tracery reads the origin, and before it gets to the `story` symbol it sees an ac ## Responding to mentions in Cheap Bots Done Quick -[Cheap Bots Done Quick](http://cheapbotsdonequick.com/) has a beta feature that allows your bot to respond to mentions. (Warning: if you create two bots, and one mentions the other, the ensuing 'conversation' can run for a very long time indeed; there is a 5% chance in any exchange that the bot won't respond, thus breaking the conversation). +[Cheap Bots Done Quick](https://cheapbotsdonequick.com/) has a beta feature that allows your bot to respond to mentions. (Warning: if you create two bots, and one mentions the other, the ensuing 'conversation' can run for a very long time indeed; there is a 5% chance in any exchange that the bot won't respond, thus breaking the conversation). To set up a response pattern, click at the bottom of the page to set the button to 'reply to tweets'. In the JSON editing box that appears, you set up the pattern for phrases that your bot will respond to. For instance, some of what @tinyarchae watches for: @@ -318,7 +318,7 @@ At the very bottom of the page, you can test your mentions by writing a sample t {% include figure.html filename="bot-lesson-response.png" caption="Testing your bot's response" %} ## SVG graphics -Since SVG is a text format that describes the geometry of a vector graphic, Tracery can be used to create rather artistic work - the [Tiny Space Adventure](https://twitter.com/TinyAdv) bot draws a starfield, a spaceship, and a plot. Its grammar [may be viewed here](https://pastebin.com/YYtZnzZ0). The key issue with generating svg with Tracery is to get the components correct. The source code for the [softlandscapes bot](http://cheapbotsdonequick.com/source/softlandscapes) can be a useful model. This bot begins by defining the critical text that marks out SVG: +Since SVG is a text format that describes the geometry of a vector graphic, Tracery can be used to create rather artistic work - the [Tiny Space Adventure](https://twitter.com/TinyAdv) bot draws a starfield, a spaceship, and a plot. Its grammar [may be viewed here](https://pastebin.com/YYtZnzZ0). The key issue with generating svg with Tracery is to get the components correct. The source code for the [softlandscapes bot](https://cheapbotsdonequick.com/source/softlandscapes) can be a useful model. This bot begins by defining the critical text that marks out SVG: ``` "origin2": ["#preface##defs##bg##mountains##clouds##ending#"], @@ -337,11 +337,11 @@ Working with SVG can be tricky, as things like backslashes, line endings, quotat Bots that generate SVG are beyond the scope of this lesson, but careful study of existing bots should help you on your way. ## Music -Strictly speaking, this is no longer about bots, but since music can be notated in text, one can use Tracery to compose music and then use other libraries to convert this notation into Midi files - see [http://www.codingblocks.net/videos/generating-music-in-javascript/](http://www.codingblocks.net/videos/generating-music-in-javascript/) and my [own experiment](https://electricarchaeology.ca/2017/04/07/tracery-continues-to-be-awesome/). +Strictly speaking, this is no longer about bots, but since music can be notated in text, one can use Tracery to compose music and then use other libraries to convert this notation into Midi files - see [https://www.codingblocks.net/videos/generating-music-in-javascript/](https://www.codingblocks.net/videos/generating-music-in-javascript/) and my [own experiment](https://electricarchaeology.ca/2017/04/07/tracery-continues-to-be-awesome/). # Other Bot Tutorials -- Zach Whalen [How to make a Twitter Bot with Google Spreadsheets](http://www.zachwhalen.net/posts/how-to-make-a-twitter-bot-with-google-spreadsheets-version-04/) +- Zach Whalen [How to make a Twitter Bot with Google Spreadsheets](https://www.zachwhalen.net/posts/how-to-make-a-twitter-bot-with-google-spreadsheets-version-04/) - Casey Bergman, Keeping Up With the Scientific Literature using Twitterbots: The FlyPapers Experiment https://caseybergman.wordpress.com/2014/02/24/keeping-up-with-the-scientific-literature-using-twitterbots-the-flypapers-experiment/ also https://github.com/roblanf/phypapers ; in essence this method collects the RSS feed from journal articles, and then uses a service such as [Dlvr.it](https://dlvrit.com/) to push the links to a Twitter account. - Discontinued: Stefan Bohacek has posted the code templates for a number of different kinds of bots at the code remixing site Glitch.com. If you visit his page, you will see a list of different kinds of bots; click on the 'remix' button and then study the readme button carefully. Glitch requires a login via a Github or Facebook account. - Finally, I would suggest joining the BotMakers Slack group to find more tutorials, like-minded individuals, and further resources: [Sign up here](https://botmakers.org) diff --git a/en/lessons/introduction-and-installation.md b/en/lessons/introduction-and-installation.md index 158b561cea..9a0fff5add 100755 --- a/en/lessons/introduction-and-installation.md +++ b/en/lessons/introduction-and-installation.md @@ -57,7 +57,7 @@ The programming language we will use in this series of lessons is Python, a free, open source language. Unless otherwise noted, we will be using **Python 3** throughout. Version 2 is no longer officially supported, but you might still find it used in older projects or lessons. -[Python 3 has a few differences in formatting](http://sebastianraschka.com/Articles/2014_python_2_3_key_diff.html) (think grammar rules), so beware if you find examples online that still use Python 2. They might not run under current versions of Python. +[Python 3 has a few differences in formatting](https://sebastianraschka.com/Articles/2014_python_2_3_key_diff.html) (think grammar rules), so beware if you find examples online that still use Python 2. They might not run under current versions of Python. Backup Your Work! ----------------- @@ -86,10 +86,10 @@ doesn't work on your platform, please let us know. - [Windows Python Installation][] - [Linux Python Installation][] - [Python programming language]: http://www.python.org/ - [Beautiful Soup HTML/XML parser]: http://www.crummy.com/software/BeautifulSoup/ - [Komodo Edit]: http://www.activestate.com/komodo-edit - [Python Editors]: http://wiki.python.org/moin/PythonEditors/ + [Python programming language]: https://www.python.org/ + [Beautiful Soup HTML/XML parser]: https://www.crummy.com/software/BeautifulSoup/ + [Komodo Edit]: https://www.activestate.com/komodo-edit + [Python Editors]: https://wiki.python.org/moin/PythonEditors/ [Jungle Disk]: https://www.jungledisk.com/ [Dropbox]: https://www.dropbox.com/home [Viewing HTML Files]: /lessons/viewing-html-files diff --git a/en/lessons/introduction-to-ffmpeg.md b/en/lessons/introduction-to-ffmpeg.md index 53dedf3f11..8bd2124ff9 100644 --- a/en/lessons/introduction-to-ffmpeg.md +++ b/en/lessons/introduction-to-ffmpeg.md @@ -75,7 +75,7 @@ to install FFmpeg and ensure it remains in the most up-to-date version. Homebrew Windows users can use the package manager [Chocolately](https://chocolatey.org/) to install and maintain FFmpeg. Reto Kromer's [Windows installation guide](https://avpres.net/FFmpeg/install_Windows.html) provides all the necessary information to use Chocolately or to install the software from a build. ## For Linux Users -[Linuxbrew](http://linuxbrew.sh/), a program similar to Homebrew, can be used to +[Linuxbrew](https://linuxbrew.sh/), a program similar to Homebrew, can be used to install and maintain FFmpeg in Linux. Reto Kromer also provides a helpful [Linux installation guide](https://avpres.net/FFmpeg/install_Linux.html) that closely resembles the Mac OS installation. Your distribution of Linux may also have its [own package manager](https://www.linode.com/docs/tools-reference/linux-package-management/) already installed that include FFmpeg packages available. Depending on your distribution of Linux (Ubuntu, Fedora, Arch Linux, etc.) these builds can vary, so using Linuxbrew could be useful to ensure that the build is the same regardless of which type of Linux you are using. @@ -111,7 +111,7 @@ that closely resembles the Mac OS installation. Your distribution of Linux may a * If you see something like `-bash: ffmpeg: command not found` then something has gone wrong. - * Note: If you are using a package manager it is unlikely that you will encounter this error message. However, if there is a problem after installing with a package manager, it is likely the issue is with the package manager itself as opposed to FFmpeg. Consult the Troubleshooting sections for [Homebrew](https://docs.brew.sh/Troubleshooting), [Chocolatey](https://chocolatey.org/docs/troubleshooting), or [Linuxbrew](http://linuxbrew.sh/) to ensure the package manager is functioning properly on your computer. If you are attempting to install without a package manager and see this error message, cross-reference your method with the FFmpeg Compilation Guide provided above. + * Note: If you are using a package manager it is unlikely that you will encounter this error message. However, if there is a problem after installing with a package manager, it is likely the issue is with the package manager itself as opposed to FFmpeg. Consult the Troubleshooting sections for [Homebrew](https://docs.brew.sh/Troubleshooting), [Chocolatey](https://chocolatey.org/docs/troubleshooting), or [Linuxbrew](https://linuxbrew.sh/) to ensure the package manager is functioning properly on your computer. If you are attempting to install without a package manager and see this error message, cross-reference your method with the FFmpeg Compilation Guide provided above. ## Using FFmpeg in a web browser (without installing) If you do not want to install FFmpeg on your computer but would like to become familiar with using it at the command-line, Brian Grinstead's [videoconverter.js](https://bgrins.github.io/videoconverter.js/demo/) provides a way to run FFmpeg commands and learn its basic functions in the web-browser of your choice. @@ -381,8 +381,8 @@ done ``` * `for file in *.m4v; do` = initiates the for loop. This first line basically tells FFmpeg: "for all files in this directory with the extension `.m4v`, perform the following command." - * The `*` is a Bash [wildcard](http://tldp.org/LDP/GNU-Linux-Tools-Summary/html/x11655.htm) attached to a given file-type and specifies them as the input files. - * The word `file` is an arbitrary [variable](http://tldp.org/HOWTO/Bash-Prog-Intro-HOWTO-5.html) which will represent each file as it runs through the loop. + * The `*` is a Bash [wildcard](https://tldp.org/LDP/GNU-Linux-Tools-Summary/html/x11655.htm) attached to a given file-type and specifies them as the input files. + * The word `file` is an arbitrary [variable](https://tldp.org/HOWTO/Bash-Prog-Intro-HOWTO-5.html) which will represent each file as it runs through the loop. * `ffprobe -f lavfi -i movie="$file",signalstats -show_entries frame=pkt_pts_time:frame_tags=lavfi.signalstats.HUEMED -print_format csv > "${file%.m4v}.csv"; done` = the same color metadata extraction command we ran on our two excerpts of *Destination Earth*, with some slight alterations to the syntax to account for its use across multiple files in a directory: * `"$file"` recalls each variable. The enclosing quotation marks ensures that the original filename is retained. diff --git a/en/lessons/introduction-to-populating-a-website-with-api-data.md b/en/lessons/introduction-to-populating-a-website-with-api-data.md index e929fd1601..74c660db98 100644 --- a/en/lessons/introduction-to-populating-a-website-with-api-data.md +++ b/en/lessons/introduction-to-populating-a-website-with-api-data.md @@ -30,7 +30,7 @@ doi: 10.46430/phen0086 This tutorial offers readers the possibility to quickly learn the basics of APIs without prior knowledge of programming, to start accessing a vast amount of data (often freely) available on the web. In particular, we learn basic [HTML](https://en.wikipedia.org/wiki/HTML) and [PHP](https://en.wikipedia.org/wiki/PHP) to build a simple website to display API query results of cultural heritage collections, using [Europeana API](https://pro.europeana.eu/resources/apis). As the technique is generic, we also create a short template to test it with [Harvard Art Museums API](https://www.harvardartmuseums.org/collections/api). In the tutorial, some other concepts such as [metadata](https://en.wikipedia.org/wiki/Metadata) and [web servers](https://en.wikipedia.org/wiki/Web_server) are explained to understand APIs in a broad context. -[PHP](http://php.net/) is a programming language especially suited for web development, while [HTML](https://en.wikipedia.org/wiki/HTML) is a markup language to create webpages and applications. The exampes in this lesson uses some basic programming, however it is usually also possible to use copy and paste when working with API at a basic level. +[PHP](https://php.net/) is a programming language especially suited for web development, while [HTML](https://en.wikipedia.org/wiki/HTML) is a markup language to create webpages and applications. The exampes in this lesson uses some basic programming, however it is usually also possible to use copy and paste when working with API at a basic level. # Contents The tutorial consists of two parts. The first part provides the basic theory of APIs: @@ -117,7 +117,7 @@ Let’s have a closer look at what you typed into your browser URL box the examp {% include figure.html filename="website-api2.jpg" caption="Untidy JSON data structure (raw data) in Chrome" %} ## Understanding API Data (in JSON) -If your browser does not support a tidy [JSON](https://en.wikipedia.org/wiki/JSON) view (the latest Firefox should have a pre-installed JSON viewer), please copy and paste the entire data to an [online JSON viewer](http://jsonviewer.stack.hu/). It allows us to view the data more easily by expanding (+ button) and collapsing (- button) the data hierarchy. +If your browser does not support a tidy [JSON](https://en.wikipedia.org/wiki/JSON) view (the latest Firefox should have a pre-installed JSON viewer), please copy and paste the entire data to an [online JSON viewer](https://jsonviewer.stack.hu/). It allows us to view the data more easily by expanding (+ button) and collapsing (- button) the data hierarchy. {% include figure.html filename="website-api3.jpg" caption="Online JSON viewer" %} @@ -129,7 +129,7 @@ Now, if you look carefully at the first lines of the data, you may notice someth You read literally: `"apikey"` is your API key. Your API access is `success`ful. We can ignore what `requestNumber` is, but only the first `12` items (records) are returned (to avoid a flood of data) out of the `totalResults` of `1967431`. After that, you have actual data from the collection (i.e. the 12 items). -In order to organise the data, Europeana uses a particular format/structure, called [JSON (JavaScript Object Notation)](http://json.org/). The data are wrapped with curly brackets (which is called Object). It always starts with `{` and ends with `}`. Inside, the data are represented with pairs of strings. Each pair has two components separated by a colon (`:`). For instance, `"totalResults":1967341`. We call this format [name-value pair](https://en.wikipedia.org/wiki/Attribute%E2%80%93value_pair). In our case, the name is `"totalResults"` and `1967341` is the data value. If there are more than one pair, name-value pairs are separated by comma (`,`). To sum up, the simplest JSON data look like this: +In order to organise the data, Europeana uses a particular format/structure, called [JSON (JavaScript Object Notation)](https://json.org/). The data are wrapped with curly brackets (which is called Object). It always starts with `{` and ends with `}`. Inside, the data are represented with pairs of strings. Each pair has two components separated by a colon (`:`). For instance, `"totalResults":1967341`. We call this format [name-value pair](https://en.wikipedia.org/wiki/Attribute%E2%80%93value_pair). In our case, the name is `"totalResults"` and `1967341` is the data value. If there are more than one pair, name-value pairs are separated by comma (`,`). To sum up, the simplest JSON data look like this: ``` { @@ -149,17 +149,17 @@ As there can be a long list of names in a record, let me explain some of the nam | id | Identifier of this item | /9200309/BibliographicResource_3000093757119_source | | country | Country of the data provider | Belgium | | dataProvider | Data provider of this item | Royal Library of Belgium | -| rights | Predefined rights statement (Creative Commons etc) | http://rightsstatements.org/vocab/InC/1.0/ | +| rights | Predefined rights statement (Creative Commons etc) | https://rightsstatements.org/vocab/InC/1.0/ | | title | Title of this item | Stand Not Upon The Order Of Your Going, But Go At Once Shakespeare Macbeth 3-4 Enlist Now | | edmPreview | URL of the preview of this item in Europeana | [https://www.europeana.eu/api/v2/thumbnail-by-url.json?uri=http%3A%2F%2Fuurl.kbr.be%2F1017835%2Fthumbs%2Fs&size=LARGE&type=IMAGE](https://www.europeana.eu/api/v2/thumbnail-by-url.json?uri=http%3A%2F%2Fuurl.kbr.be%2F1017835%2Fthumbs%2Fs&size=LARGE&type=IMAGE) | -| edmIsShownAt | URL (web page) of this item at the website of the data provider | [http://uurl.kbr.be/1017835](http://uurl.kbr.be/1017835) | +| edmIsShownAt | URL (web page) of this item at the website of the data provider | [https://uurl.kbr.be/1017835](https://uurl.kbr.be/1017835) | | edmIsShownBy | URL (media file) of this item at the website of the data provider | [https://www.rijksmuseum.nl/nl/collectie/RP-P-OB-84.508](https://www.rijksmuseum.nl/nl/collectie/RP-P-OB-84.508) | | type | The type of the item | IMAGE | -| guid | URL of the item page in Europeana | [http://www.europeana.eu/portal/record/90402/RP_P_OB_84_508.html](http://www.europeana.eu/portal/record/90402/RP_P_OB_84_508.html) | +| guid | URL of the item page in Europeana | [https://www.europeana.eu/portal/record/90402/RP_P_OB_84_508.html](https://www.europeana.eu/portal/record/90402/RP_P_OB_84_508.html) | It is outside of the scope of this tutorial to explain the data model of Europeana (Europeana Data Model: EDM), but a short explanation would be handy, because all records are based on it. It consists of different descriptions (i.e. metadata) about cultural heritage items, including: -- [Dublin Core](http://dublincore.org/documents/dcmi-terms/) metadata to describe a cultural heritage object (stored in museums, libraries and archives). It includes the description of mostly physical aspects of the object such as title (Mona Lisa), creator (Leonardo da Vinci), size (77 cm × 53 cm), date (1503-1517?), place (France), owner (Louvre museum), and type (painting). In the Europeana API, it is often specified with prefix `dc`. +- [Dublin Core](https://dublincore.org/documents/dcmi-terms/) metadata to describe a cultural heritage object (stored in museums, libraries and archives). It includes the description of mostly physical aspects of the object such as title (Mona Lisa), creator (Leonardo da Vinci), size (77 cm × 53 cm), date (1503-1517?), place (France), owner (Louvre museum), and type (painting). In the Europeana API, it is often specified with prefix `dc`. - Metadata about digital versions of the physical object. It may include URLs where user can view the object (both at the Europeana website and external website), digital formats (jpg), and licensing information ([Creative Commons](https://en.wikipedia.org/wiki/Creative_Commons)). @@ -211,7 +211,7 @@ If you use Skype XAMPP may not work as Skype may use the same port (80 and 443). {% include figure.html filename="website-api4.jpg" caption="Click Start button for Apache LModule, and it is started (User interface may look a bit different depending on your OS)" %} -{% include figure.html filename="website-api5.jpg" caption="Go to [http://localhost/dashboard](http://localhost/dashboard) in your browser to see if Apache is working" %} +{% include figure.html filename="website-api5.jpg" caption="Go to [https://localhost/dashboard](https://localhost/dashboard) in your browser to see if Apache is working" %} If you see the screens like above, everything should be OK. Go to the installation folder, you will find an "htdocs" folder (for Mac OSX, /Applications/XAMPP/xamppfiles/htdocs). I suggest creating a shortcut on the desktop. We must use this folder to put all the necessary files to create our website, so it is best if it is conveniently located. Right now there are only default files in this folder that XAMPP has prepared for us, so let’s create a brand new PHP file. Inside the "htdocs" folder, create a new text file using your text editor and save it as `helloworld.php`. @@ -225,7 +225,7 @@ print 'Hello World'; ?> ``` -Open your web browser and type [http://localhost/helloworld.php](http://localhost/helloworld.php) in the address bar. When working on PHP code, I suggest keeping the browser open to the web page you are editing, so as soon as you save the file, you can see the outcome. +Open your web browser and type [https://localhost/helloworld.php](https://localhost/helloworld.php) in the address bar. When working on PHP code, I suggest keeping the browser open to the web page you are editing, so as soon as you save the file, you can see the outcome. You should see "Hello World" on a white background in your browser window. Congratulations. You have just made your first PHP program. PHP code should always start with ``. Just like JSON, those lines declare that the file is PHP. `print` means display the following code `'Hello World'` as text. In PHP, you can use either `''` or `""` (single or double quotes) to indicate that the data type is a [string](https://en.wikipedia.org/wiki/String_(computer_science)) (text) (There are [other data types](https://www.w3schools.com/pHp/php_datatypes.asp) such as integer, Boolean, or array, but let’s focus on strings for now). @@ -514,13 +514,13 @@ The point is the API template can be reused and customized, therefore, the most If you can learn a bit of programming, you are no longer restricted by what a website offers by default. You are now free to build your own tool or system, for example, to select, filter, compare, process, analyse, visualise, and share data in new ways. So, what are you waiting for? Be brave and start your new project. ## Useful APIs -- [The New York Times](http://developer.nytimes.com/) +- [The New York Times](https://developer.nytimes.com/) - [The Digital Public Library of America](https://pro.dp.la/developers/api-codex) - [VIAF](https://www.oclc.org/developer/api/oclc-apis/viaf.en.html) -- [GeoNames](http://www.geonames.org/export/web-services.html) +- [GeoNames](https://www.geonames.org/export/web-services.html) - [Wikipedia](https://www.mediawiki.org/wiki/API:Main_page) - [The Open Library](https://openlibrary.org/developers/api) -- [List of useful APIs for museums](http://museum-api.pbworks.com/w/page/21933420/Museum%C2%A0APIs) +- [List of useful APIs for museums](https://museum-api.pbworks.com/w/page/21933420/Museum%C2%A0APIs) ## Author's Project Using APIs - [James Cook Dynamic Journal (JCDJ)](https://web.archive.org/web/20210414011922/https://jcdj.acdh-dev.oeaw.ac.at/)...Contextualisation of a book from The Open Library diff --git a/en/lessons/introduction-to-stylometry-with-python.md b/en/lessons/introduction-to-stylometry-with-python.md index 228f83276f..42bf81ff15 100755 --- a/en/lessons/introduction-to-stylometry-with-python.md +++ b/en/lessons/introduction-to-stylometry-with-python.md @@ -61,7 +61,7 @@ This tutorial uses both datasets and software that you will have to download and ### The Dataset ### -To work through this lesson, you will need to download and unzip the archive of the _Federalist Papers_ ([.zip](/assets/introduction-to-stylometry-with-python/stylometry-federalist.zip)) containing the 85 documents that we will use for our analysis. The archive also contains the [original Project Gutenberg ebook](http://www.gutenberg.org/cache/epub/1404/pg1404.txt) version of the _Federalist Papers_ from which these 85 documents have been extracted. When you unzip the archive, it will create a [directory](https://en.wikipedia.org/wiki/Directory_(computing)) called `data` in your current [working directory](https://en.wikipedia.org/wiki/Working_directory). Make sure that you stay in this current working directory and that you save all work here while completing the lesson. +To work through this lesson, you will need to download and unzip the archive of the _Federalist Papers_ ([.zip](/assets/introduction-to-stylometry-with-python/stylometry-federalist.zip)) containing the 85 documents that we will use for our analysis. The archive also contains the [original Project Gutenberg ebook](https://www.gutenberg.org/cache/epub/1404/pg1404.txt) version of the _Federalist Papers_ from which these 85 documents have been extracted. When you unzip the archive, it will create a [directory](https://en.wikipedia.org/wiki/Directory_(computing)) called `data` in your current [working directory](https://en.wikipedia.org/wiki/Working_directory). Make sure that you stay in this current working directory and that you save all work here while completing the lesson. ### The Software ### @@ -75,11 +75,11 @@ Some of these modules may not be pre-installed on your computer. Should you enco ## Some Notes about Language Independence -This tutorial applies stylometric analysis to a set of English-language texts using a Python library called `nltk`. Much of the functionality provided by the `nltk` works with other languages. As long as a language provides a clear way to distinguish word boundaries within a word, `nltk` should perform well. Languages such as Chinese for which there is no clear distinction between word boundaries may be problematic. I have used `nltk` with French texts without any trouble; other languages that use [diacritics](https://en.wikipedia.org/wiki/Diacritic), such as Spanish and German, should also work well with `nltk`. Please refer to [nltk's documentation](http://www.nltk.org/book/) for details. +This tutorial applies stylometric analysis to a set of English-language texts using a Python library called `nltk`. Much of the functionality provided by the `nltk` works with other languages. As long as a language provides a clear way to distinguish word boundaries within a word, `nltk` should perform well. Languages such as Chinese for which there is no clear distinction between word boundaries may be problematic. I have used `nltk` with French texts without any trouble; other languages that use [diacritics](https://en.wikipedia.org/wiki/Diacritic), such as Spanish and German, should also work well with `nltk`. Please refer to [nltk's documentation](https://www.nltk.org/book/) for details. Only one of the tasks in this tutorial requires language-dependent code. To divide a text into a set of French or Spanish words, you will need to specify the appropriate language as a parameter to `nltk`'s [tokenizer](https://en.wikipedia.org/wiki/Lexical_analysis#Tokenization), which uses English as the default. This will be explained in the tutorial. -Finally, note that some linguistic tasks, such as [part-of-speech tagging](https://en.wikipedia.org/wiki/Part-of-speech_tagging), may not be supported by `nltk` in languages other than English. This tutorial does not cover part-of-speech tagging. Should you need it for your own projects, please refer to the [nltk documentation](http://www.nltk.org/book/) for advice. +Finally, note that some linguistic tasks, such as [part-of-speech tagging](https://en.wikipedia.org/wiki/Part-of-speech_tagging), may not be supported by `nltk` in languages other than English. This tutorial does not cover part-of-speech tagging. Should you need it for your own projects, please refer to the [nltk documentation](https://www.nltk.org/book/) for advice. # The *Federalist Papers* - Historical Context @@ -214,7 +214,7 @@ federalist_by_author_length_distributions[author] = nltk.FreqDist(token_lengths) federalist_by_author_length_distributions[author].plot(15,title=author) ``` -The '%matplotlib inline' declaration below 'import nltk' is required if your development environment is a [Jupyter Notebook](http://jupyter.org/), as it was for me while writing this tutorial; otherwise you may not see the graphs on your screen. If you work in [Jupyter Lab](http://jupyterlab.readthedocs.io/en/stable/getting_started/installation.html), please replace this clause with '%matplotlib ipympl'. +The '%matplotlib inline' declaration below 'import nltk' is required if your development environment is a [Jupyter Notebook](https://jupyter.org/), as it was for me while writing this tutorial; otherwise you may not see the graphs on your screen. If you work in [Jupyter Lab](https://jupyterlab.readthedocs.io/en/stable/getting_started/installation.html), please replace this clause with '%matplotlib ipympl'. The first line in the code snippet above loads the *Natural Language Toolkit module (nltk)*, which contains an enormous number of useful functions and resources for text processing. We will barely touch its basics in this lesson; if you decide to explore text analysis in Python further, I strongly recommend that you start with [nltk's documentation](https://www.nltk.org/). @@ -332,7 +332,7 @@ However, chi-squared is still a coarse method. For one thing, words that appear In some languages, it may be useful to apply parts-of-speech tagging to the word tokens before counting them, so that the same word used as two different parts of speech may count as two different features. For example, in French, very common words like "la" and "le" serve both as articles (in which case they would translate into English as "the") and as pronouns ("it"). This lesson does not use part-of-speech tagging because it is rarely useful for stylometric analysis in contemporary English and because `nltk`'s default tagger does not support other languages very well. -Should you need to apply part-of-speech tagging to your own data, you may be able to download taggers for other languages, to work with a third-party tool like [Tree Tagger](http://www.cis.uni-muenchen.de/~schmid/tools/TreeTagger/), or even to train your own tagger, but these techniques are far beyond the scope of the current lesson. +Should you need to apply part-of-speech tagging to your own data, you may be able to download taggers for other languages, to work with a third-party tool like [Tree Tagger](https://www.cis.uni-muenchen.de/~schmid/tools/TreeTagger/), or even to train your own tagger, but these techniques are far beyond the scope of the current lesson. # Third Stylometric Test: John Burrows' Delta Method (Advanced) @@ -622,7 +622,7 @@ Thanks to Stéfan Sinclair and Andrew Piper, in whose seminars at McGill Univers [^17]: Stefan Evert et al., "Understanding and explaining Delta measures for authorship attribution", _Digital Scholarship in the Humanities_, vol. 32, no. suppl_2 (2017), pp. ii4-ii16. -[^18]: José Calvo Tello, “Entendiendo Delta desde las Humanidades,” _Caracteres_, May 27 2016, http://revistacaracteres.net/revista/vol5n1mayo2016/entendiendo-delta/. +[^18]: José Calvo Tello, “Entendiendo Delta desde las Humanidades,” _Caracteres_, May 27 2016, https://revistacaracteres.net/revista/vol5n1mayo2016/entendiendo-delta/. [^19]: Javier de la Rosa and Juan Luis Suárez, “The Life of Lazarillo de Tormes and of His Machine Learning Adversities,” _Lemir_, vol. 20 (2016), pp. 373-438. diff --git a/en/lessons/json-and-jq.md b/en/lessons/json-and-jq.md index 578d8588ce..7fb2ad5af9 100755 --- a/en/lessons/json-and-jq.md +++ b/en/lessons/json-and-jq.md @@ -44,7 +44,7 @@ By the end of the lesson, you will understand how to combine basic operators to ## What is JSON? -[You may find a short and cogent primer on JSON here.](http://www.json.org/) +[You may find a short and cogent primer on JSON here.](https://www.json.org/) In brief, a JSON **object** is a series of key/value pairs, where **keys** are the names for the **values** they are paired with. For example, the tiny JSON object: @@ -219,7 +219,7 @@ If you want to access just the first (or the _n_-th) item in an array, put a dig ``` **IMPORTANT: you access the first element of an array with `0`, not `1`.** -This is because JavaScript, like quite a few other programming languages ([though not all!](http://stackoverflow.com/questions/3135325/why-do-vector-indices-in-r-start-with-1-instead-of-0)), [starts counting at 0](http://skillcrush.com/2013/01/17/why-programmers-start-counting-at-zero/). +This is because JavaScript, like quite a few other programming languages ([though not all!](https://stackoverflow.com/questions/3135325/why-do-vector-indices-in-r-start-with-1-instead-of-0)), [starts counting at 0](https://skillcrush.com/2013/01/17/why-programmers-start-counting-at-zero/). This filter returns just the first element of the `artObjects` array. `.artObjects[1]` would return the second, and so on. @@ -921,7 +921,7 @@ For fast processing of very large files, or of JSON lines spread across multiple ### Installation on OS X -The easiest way to install jq on OS X is to use the package management system [Homebrew](http://brew.sh/). +The easiest way to install jq on OS X is to use the package management system [Homebrew](https://brew.sh/). This system works via OS X's "Terminal" application, which gives you access to the Bash command line. [For an introduction to this system, see The Programming Historian's "Introduction to the Bash Command Line".](/lessons/intro-to-bash) @@ -974,5 +974,5 @@ If basic counting is all you need to do with your JSON data, then jq can help yo For more involved math, however, it would be more sensible to create table(s) with jq and then continue your analysis in Python, R, or even Excel. If you are working with deeply-nested JSON (that is, many objects within objects), or JSON where objects have inconsistent structure, you may need to use features not covered in this lesson, including [if-then-else statements](https://stedolan.github.io/jq/manual/#if-then-else), [recursion](https://stedolan.github.io/jq/manual/#Recursion), and [reduction](https://stedolan.github.io/jq/manual/#Reduce). -If you can't figure out the filter you need to go from your given input to your desired output, using the tag `jq` over at [StackOverflow](http://stackoverflow.com/questions/tagged/jq) can often get you a speedy answer. -Make sure that you try to [follow best practices when describing your problem](http://stackoverflow.com/help/how-to-ask) and provide a [reproducible example](http://stackoverflow.com/help/mcve). +If you can't figure out the filter you need to go from your given input to your desired output, using the tag `jq` over at [StackOverflow](https://stackoverflow.com/questions/tagged/jq) can often get you a speedy answer. +Make sure that you try to [follow best practices when describing your problem](https://stackoverflow.com/help/how-to-ask) and provide a [reproducible example](https://stackoverflow.com/help/mcve). diff --git a/en/lessons/jupyter-notebooks.md b/en/lessons/jupyter-notebooks.md index 1abcff34b8..2bc580295a 100644 --- a/en/lessons/jupyter-notebooks.md +++ b/en/lessons/jupyter-notebooks.md @@ -393,7 +393,7 @@ From experimenting with code to documenting workflows, from pedagogy to scholarl [^2]: Millman, KJ and Fernando Perez. 2014. "Developing open source scientific practice". In *Implementing Reproducible Research*, Ed. Victoria Stodden, Friedrich Leisch, and Roger D. Peng. https://osf.io/h9gsd/ -[^3]: Sinclair, Stéfan & Geoffrey Rockwell. 2013. "Voyant Notebooks: Literate Programming and Programming Literacy". Journal of Digital Humanities, Vol. 2, No. 3 Summer 2013. http://journalofdigitalhumanities.org/2-3/voyant-notebooks-literate-programming-and-programming-literacy/ +[^3]: Sinclair, Stéfan & Geoffrey Rockwell. 2013. "Voyant Notebooks: Literate Programming and Programming Literacy". Journal of Digital Humanities, Vol. 2, No. 3 Summer 2013. https://journalofdigitalhumanities.org/2-3/voyant-notebooks-literate-programming-and-programming-literacy/ [^4]: Haley Di Pressi, Stephanie Gorman, Miriam Posner, Raphael Sasayama, and Tori Schmitt, with contributions from Roderic Crooks, Megan Driscoll, Amy Earhart, Spencer Keralis, Tiffany Naiman, and Todd Presner. "A Student Collaborator's Bill of Rights". https://humtech.ucla.edu/news/a-student-collaborators-bill-of-rights/ diff --git a/en/lessons/linear-regression.md b/en/lessons/linear-regression.md index bf24cc0dd6..ec943e78f6 100644 --- a/en/lessons/linear-regression.md +++ b/en/lessons/linear-regression.md @@ -699,7 +699,7 @@ Now move on to [Logistic Regression analysis with scikit-learn](/en/lessons/logi [^9]: Ibid. -[^10]: The University of Texas at Austin. _Statistics Online Support: Variable Types_, [http://sites.utexas.edu/sos/variables/](https://perma.cc/GN36-BCPD). +[^10]: The University of Texas at Austin. _Statistics Online Support: Variable Types_, [https://sites.utexas.edu/sos/variables/](https://perma.cc/GN36-BCPD). [^11]: Jarausch, Konrad H., and Kenneth A. Hardy. _Quantitative Methods for Historians: A Guide to Research, Data, and Statistics_. 1991. UNC Press Books, 2016: 122. diff --git a/en/lessons/logistic-regression.md b/en/lessons/logistic-regression.md index 79f24d014d..82a77cb4f1 100644 --- a/en/lessons/logistic-regression.md +++ b/en/lessons/logistic-regression.md @@ -741,7 +741,7 @@ If you are not using Anaconda, you will need to cover the following dependencies 1. Install Python 3 (preferably Python 3.7 or later) 2. Recommended: install and run a virtual environment -3. Install the [scikit-learn library](http://scikit-learn.org/stable/install.html) and its dependencies +3. Install the [scikit-learn library](https://scikit-learn.org/stable/install.html) and its dependencies 4. Install [the Pandas library](https://pandas.pydata.org/docs/) 5. Install the [matplotlib](https://matplotlib.org/) and [seaborn](https://seaborn.pydata.org/) libraries 6. Install [Jupyter Notebook](https://jupyter.org/) and its dependencies diff --git a/en/lessons/mac-installation.md b/en/lessons/mac-installation.md index 2afaa06ac7..892f3e29d3 100755 --- a/en/lessons/mac-installation.md +++ b/en/lessons/mac-installation.md @@ -142,10 +142,10 @@ Now that you and your computer are up and running, we can move onto some more interesting tasks. If you are working through the Python lessons in order, we suggest you next try '[Understanding Web Pages and HTML][].' - [Time Machine]: http://support.apple.com/kb/ht1427 - [Python website]: http://www.python.org/ - [Beautiful Soup]: http://www.crummy.com/software/BeautifulSoup/ - [other text editing options]: http://wiki.python.org/moin/PythonEditors/ + [Time Machine]: https://support.apple.com/kb/ht1427 + [Python website]: https://www.python.org/ + [Beautiful Soup]: https://www.crummy.com/software/BeautifulSoup/ + [other text editing options]: https://wiki.python.org/moin/PythonEditors/ [BBEdit]: https://www.barebones.com/products/bbedit/ [Sublime Text website]: https://www.sublimetext.com/download [Understanding Web Pages and HTML]: /lessons/viewing-html-files diff --git a/en/lessons/mapping-with-python-leaflet.md b/en/lessons/mapping-with-python-leaflet.md index f93bdfcbee..39bc9b508e 100755 --- a/en/lessons/mapping-with-python-leaflet.md +++ b/en/lessons/mapping-with-python-leaflet.md @@ -34,18 +34,18 @@ In this lesson, you will learn how to create a web map based on that data. By t This lesson uses: -- [python](/lessons/?topic=python) ([pip](http://pip.readthedocs.org/en/stable/), [geopy](https://github.com/geopy/geopy), [pandas](http://pandas.pydata.org/pandas-docs/stable/dsintro.html#dataframe)) -- [leaflet](http://leafletjs.com/) -- [geojson.io (from mapbox)](http://geojson.io/) +- [python](/lessons/?topic=python) ([pip](https://pip.readthedocs.org/en/stable/), [geopy](https://github.com/geopy/geopy), [pandas](https://pandas.pydata.org/pandas-docs/stable/dsintro.html#dataframe)) +- [leaflet](https://leafletjs.com/) +- [geojson.io (from mapbox)](https://geojson.io/) - [javascript](https://www.javascript.com/) and [jquery](https://jquery.com/) Optional: If you wish to follow along with pre-made scripts you can find them in [this directory](https://github.com/programminghistorian/jekyll/tree/gh-pages/assets/mapping-with-python-leaflet). To set up your working environment: 1. Create a directory for this project where you will keep all of your scripts and files that you will work from -2. If you have a text editor where you can work from the directory of your project, import that directory. You can use editors such as [TextWrangler](http://www.barebones.com/products/textwrangler/) for OS X, [Notepad++](https://notepad-plus-plus.org/) for Windows, or [Sublime Text](http://www.sublimetext.com/). +2. If you have a text editor where you can work from the directory of your project, import that directory. You can use editors such as [TextWrangler](https://www.barebones.com/products/textwrangler/) for OS X, [Notepad++](https://notepad-plus-plus.org/) for Windows, or [Sublime Text](https://www.sublimetext.com/). If you are using a code editor such as Sublime Text, to import the folder you could drag and drop the folder that you want to work from into your editor window. Once you've done that, the directory will appear on the left hand sidebar as you root folder. If you click on your folder, you'll be able to see the contents of your folder. Importing a folder allows you to easily work with the files in your project. If you need to work with multiple files and directories in directories, this will make it easier to search through these files, switch between them while you're working and keep you organized. -3. (Optional) It is recommended to use a [Python virtual environment](http://docs.python-guide.org/en/latest/dev/virtualenvs/) to store the dependencies and versions required for your specific project. +3. (Optional) It is recommended to use a [Python virtual environment](https://docs.python-guide.org/en/latest/dev/virtualenvs/) to store the dependencies and versions required for your specific project. ### Getting Data: Download the CSV @@ -55,7 +55,7 @@ We're going to start with a plain comma-separated values (CSV) data file and cre ```curl -O https://programminghistorian.org/assets/mapping-with-python-leaflet/census.csv``` -The original source of this data is from the [Greater London Authority London Datastore](http://data.london.gov.uk/dataset/historic-census-population). +The original source of this data is from the [Greater London Authority London Datastore](https://data.london.gov.uk/dataset/historic-census-population). ## Geocoding with Python @@ -75,13 +75,13 @@ If you're familiar with _Programming Historian_, you might have already noticed [Geopy](https://github.com/geopy/geopy) is a Python library that gives you access to the various geocoding APIs. Geopy makes it easy for Python developers to locate the coordinates of addresses, cities, countries, and landmarks across the globe using third-party geocoders and other data sources. Geopy includes geocoders built by OpenStreetMap Nominatim, ESRI ArcGIS, Google Geocoding API (V3), Baidu Maps, Bing Maps API, Yahoo! PlaceFinder, Yandex, IGN France, GeoNames, NaviData, OpenMapQuest, What3Words, OpenCage, SmartyStreets, geocoder.us, and GeocodeFarm geocoder services. -[Pandas](http://pandas.pydata.org/pandas-docs/stable/dsintro.html#dataframe) is another python library that we will use. It's very popular library amongst scientists and mathematicians to manipulate and analyse data. +[Pandas](https://pandas.pydata.org/pandas-docs/stable/dsintro.html#dataframe) is another python library that we will use. It's very popular library amongst scientists and mathematicians to manipulate and analyse data. -Finally, [Pip](http://pip.readthedocs.org/en/stable/) is a very useful package manager to help you install things like Geopy and Pandas! If you've [already installed Python](/lessons/introduction-and-installation) and [installed pip](/lessons/installing-python-modules-pip), type ```pip list``` to see if you already have the geopy and pandas packages installed. If you do not have pip installed, you can download [get-pip.py](https://bootstrap.pypa.io/get-pip.py), then from your command line go to the directory where get-pip.py is located and run +Finally, [Pip](https://pip.readthedocs.org/en/stable/) is a very useful package manager to help you install things like Geopy and Pandas! If you've [already installed Python](/lessons/introduction-and-installation) and [installed pip](/lessons/installing-python-modules-pip), type ```pip list``` to see if you already have the geopy and pandas packages installed. If you do not have pip installed, you can download [get-pip.py](https://bootstrap.pypa.io/get-pip.py), then from your command line go to the directory where get-pip.py is located and run ```python get-pip.py ``` -For the most up to date instructions, you can visit [pip's installation manual](http://pip.readthedocs.org/en/stable/installing/). +For the most up to date instructions, you can visit [pip's installation manual](https://pip.readthedocs.org/en/stable/installing/). To install Geopy and Pandas, open your [command line (using this lesson as a guideline if necessary)](/lessons/intro-to-bash) and install the Geopy and Pandas libraries: @@ -94,7 +94,7 @@ pip install pytz pip install geopy pip install pandas ``` -Note: We are installing numpy, python-dateutil, and pytz because pandas [requires them](http://pandas.pydata.org/pandas-docs/stable/install.html#dependencies). +Note: We are installing numpy, python-dateutil, and pytz because pandas [requires them](https://pandas.pydata.org/pandas-docs/stable/install.html#dependencies). For Windows, you may need to install [Microsoft Visual C++ Compiler for Python](https://wiki.python.org/moin/WindowsCompilers). Set the environmental variables to recognize python and pip from the command line: @@ -136,7 +136,7 @@ def main(): We are first using pandas' pre-existing read_csv() function to open the CSV file. We pass the filepath to our data file in the first parameter, 'census-historic-population-borough.csv'. If it was in a folder called 'data', you would put 'data/census-historic-population-borough.csv'. The second parameter, ```index_col=None```, will number the rows to generate the index without using any column. If we use ```index_col=0```, it indexes the first column in your data as the row name. The third parameter, ```header=0```, indicates that there is a header row, which is the first line of the spreadsheet (Note: Python uses "0" instead of "1" to indicate the first value in an index). The fourth parameter ```sep=","``` is where you indicate delimiter symbol that is used to split data into fields. Since are using a comma separated values data format, we need to indicate that we are using a comma to split our data. -There are many other parameters you can use. A full list is available in the pandas documentation on the [read_csv() function](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_csv.html). +There are many other parameters you can use. A full list is available in the pandas documentation on the [read_csv() function](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_csv.html). Next, we anticipate that when we geocode the csv we will get points in the format of (latitude, longitude). If we only want the latitude value of the point in a csv column, we will define a function to isolate that value. The same can be done for our longitude value. @@ -164,7 +164,7 @@ Next, select the geolocator you want to use. Here we're creating two geolocator | request limit | 1 request/s or timeout | | performance test on census data | 33.5s | -You can also choose a different geolocator from the list found in [the geopy documentation](http://geopy.readthedocs.org/). GoogleV3 is a geocoder compatible with geopy, it is a reliable geolocator choice because of their large geographic data coverage. However, since July 2018 an API key is required, and you need to enable billing in Google Cloud to use it. For more information about choosing geolocators, you can follow the discussion in the [geopy repository on Github](https://github.com/geopy/geopy/issues/90). +You can also choose a different geolocator from the list found in [the geopy documentation](https://geopy.readthedocs.org/). GoogleV3 is a geocoder compatible with geopy, it is a reliable geolocator choice because of their large geographic data coverage. However, since July 2018 an API key is required, and you need to enable billing in Google Cloud to use it. For more information about choosing geolocators, you can follow the discussion in the [geopy repository on Github](https://github.com/geopy/geopy/issues/90). To use a geolocator, import them and assign a variable name (in this case we use the name geolocator): @@ -186,7 +186,7 @@ def main(): geolocator = Nominatim() ``` -Finally, using pandas you want to create a column in your spreadsheet called 'latitude'. The script will read the existing 'Area_Name' data column, run the geopy [geolocator](http://geopy.readthedocs.io/en/latest/#module-geopy.geocoders) on the column using pandas' [apply function](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.apply.html), and generate a latitude coordinate in that column. The same transformation will occur in the 'longitude' column. Once this is finished it will output a new CSV file with those two columns: +Finally, using pandas you want to create a column in your spreadsheet called 'latitude'. The script will read the existing 'Area_Name' data column, run the geopy [geolocator](https://geopy.readthedocs.io/en/latest/#module-geopy.geocoders) on the column using pandas' [apply function](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.apply.html), and generate a latitude coordinate in that column. The same transformation will occur in the 'longitude' column. Once this is finished it will output a new CSV file with those two columns: ```python import geopy @@ -300,7 +300,7 @@ To address the timeout error, you could add the parameter ```timeout```, which s Now that you have a spreadsheet full of coordinate data, we can convert the CSV spreadsheet into a format that web maps like, like GeoJSON. GeoJSON is a web mapping standard of JSON data. There are a couple of ways to make GeoJSON: -The easiest, recommended way is to use a UI tool developed by Mapbox called [geojson.io](http://geojson.io). All you have to do is click and drag your csv file into the data window (the right side of the screen, next to the map), and it will automatically format your data into GeoJSON for you. You can select the 'GeoJSON' option under 'Save.' Save your GeoJSON file as `census.geojson`. +The easiest, recommended way is to use a UI tool developed by Mapbox called [geojson.io](https://geojson.io). All you have to do is click and drag your csv file into the data window (the right side of the screen, next to the map), and it will automatically format your data into GeoJSON for you. You can select the 'GeoJSON' option under 'Save.' Save your GeoJSON file as `census.geojson`. {% include figure.html filename="webmap-01-geojsonio.gif" caption="Drag and Drop GeoJSON creation!" %} @@ -381,7 +381,7 @@ Which you can now run by using the command: python geocoder-helpercolumn.py census_country.csv ``` -Turn your clean data into GeoJSON by saving it as `census.geojson` and test it out at [geojson.io](http://geojson.io). Remember, drag the new CSV you created (`census_country.csv` into the window to create that beautiful JSON). Do the results look better now? Good! +Turn your clean data into GeoJSON by saving it as `census.geojson` and test it out at [geojson.io](https://geojson.io). Remember, drag the new CSV you created (`census_country.csv` into the window to create that beautiful JSON). Do the results look better now? Good! ## Using Leaflet to Create a Web Map @@ -397,9 +397,9 @@ SimpleHTTPServer is a Python module. If you want to change the server to port 80 ```python -m SimpleHTTPServer 8080``` or ```python3 -m http.server 8080``` (for Python3) -In your browser go to http://localhost:8080 and you should see the files you've been working with so far. +In your browser go to https://localhost:8080 and you should see the files you've been working with so far. -Now in your text editor open a new document and save it as an html file (mymap.html). If you want to do a quick test, copy and paste the text below, refresh your http://localhost:8080 and open the html file in your browser. +Now in your text editor open a new document and save it as an html file (mymap.html). If you want to do a quick test, copy and paste the text below, refresh your https://localhost:8080 and open the html file in your browser. ```html @@ -655,7 +655,7 @@ window.onload = function () { }; ``` -What we've done here is edit the [onEachFeature function](http://leafletjs.com/SlavaUkraini/reference-1.2.0.html#geojson-oneachfeature), which gets called for each feature (in this case, each marker popup) to add additional information about each marker contained in our `census.geojson` data. To add attribute information from our `census.geojson` file, we use the convention `feature.properties.ATTRIBUTE_NAME` to access the population data. In this case, we are adding `feature.properties.Pop_2001`, `feature.properties.Pop_1981`, and `feature.properties.Pop_1801`, and adding a bit of styling with html for readability. +What we've done here is edit the [onEachFeature function](https://leafletjs.com/SlavaUkraini/reference-1.2.0.html#geojson-oneachfeature), which gets called for each feature (in this case, each marker popup) to add additional information about each marker contained in our `census.geojson` data. To add attribute information from our `census.geojson` file, we use the convention `feature.properties.ATTRIBUTE_NAME` to access the population data. In this case, we are adding `feature.properties.Pop_2001`, `feature.properties.Pop_1981`, and `feature.properties.Pop_1801`, and adding a bit of styling with html for readability. {% include figure.html filename="webmap-06-exercise02.jpg" caption="Exercise 02" %} diff --git a/en/lessons/naive-bayesian.md b/en/lessons/naive-bayesian.md index 13536eedc7..5c179d283b 100755 --- a/en/lessons/naive-bayesian.md +++ b/en/lessons/naive-bayesian.md @@ -93,7 +93,7 @@ not in the learner code itself).* ## The Old Bailey Digital Archive -The [Old Bailey digital archive](http://www.oldbaileyonline.org/) +The [Old Bailey digital archive](https://www.oldbaileyonline.org/) contains 197,745 criminal trials held at the Old Bailey, aka the Central Criminal Court in London. The trials were held between 1674 and 1913, and since the archive provides the full transcript of each trial, many @@ -1460,25 +1460,25 @@ big problem. Happy hunting! - [A Naive Bayesian in the Old Bailey]: http://digitalhistoryhacks.blogspot.com/2008/05/naive-bayesian-in-old-bailey-part-1.html - [Old Bailey digital archive]: http://www.oldbaileyonline.org/ + [A Naive Bayesian in the Old Bailey]: https://digitalhistoryhacks.blogspot.com/2008/05/naive-bayesian-in-old-bailey-part-1.html + [Old Bailey digital archive]: https://www.oldbaileyonline.org/ [A zip file of the scripts]: /assets/naive-bayesian/baileycode.zip [another zip file]: https://doi.org/10.5281/zenodo.13284 - [BeautifulSoup]: http://www.crummy.com/software/BeautifulSoup/ - [search interface]: http://www.oldbaileyonline.org/forms/formMain.jsp - [classification]: http://en.wikipedia.org/wiki/Statistical_classification - [clustering]: http://home.deib.polimi.it/matteucc/Clustering/tutorial_html/ - ["ff0000," the HTML code for red]: http://www.paulgraham.com/spam.html - [an explanation of Bayes' rule and conditional probabilities]: http://www.yudkowsky.net/rational/bayes + [BeautifulSoup]: https://www.crummy.com/software/BeautifulSoup/ + [search interface]: https://www.oldbaileyonline.org/forms/formMain.jsp + [classification]: https://en.wikipedia.org/wiki/Statistical_classification + [clustering]: https://home.deib.polimi.it/matteucc/Clustering/tutorial_html/ + ["ff0000," the HTML code for red]: https://www.paulgraham.com/spam.html + [an explanation of Bayes' rule and conditional probabilities]: https://www.yudkowsky.net/rational/bayes [topic modeling]: /lessons/topic-modeling-and-mallet - [logarithms]: http://betterexplained.com/articles/using-logs-in-the-real-world/ - [priors]: http://support.sas.com/documentation/cdl/en/statug/63033/HTML/default/viewer.htm#statug_introbayes_sect004.htm + [logarithms]: https://betterexplained.com/articles/using-logs-in-the-real-world/ + [priors]: https://support.sas.com/documentation/cdl/en/statug/63033/HTML/default/viewer.htm#statug_introbayes_sect004.htm [Introduction to the Bash Command Line]: /lessons/intro-to-bash [Automated Downloading with wget]: /lessons/automated-downloading-with-wget [Understanding Regular Expressions]: /lessons/understanding-regular-expressions [Intro to Beautiful Soup]: /lessons/intro-to-beautiful-soup - [documentation for developers]: http://www.oldbaileyonline.org/static/DocAPI.jsp - [Old Bailey search page]: http://www.oldbaileyonline.org/forms/formMain.jsp - [pypy]: http://pypy.org/ - [Snowball Stemmer]: http://snowball.tartarus.org/ - [a more detailed explanation of TF-IDF]: http://stevenloria.com/finding-important-words-in-a-document-using-tf-idf/ + [documentation for developers]: https://www.oldbaileyonline.org/static/DocAPI.jsp + [Old Bailey search page]: https://www.oldbaileyonline.org/forms/formMain.jsp + [pypy]: https://pypy.org/ + [Snowball Stemmer]: https://snowball.tartarus.org/ + [a more detailed explanation of TF-IDF]: https://stevenloria.com/finding-important-words-in-a-document-using-tf-idf/ diff --git a/en/lessons/normalizing-data.md b/en/lessons/normalizing-data.md index 70ff98079b..e6a074f26b 100755 --- a/en/lessons/normalizing-data.md +++ b/en/lessons/normalizing-data.md @@ -259,11 +259,11 @@ to make sure you have the correct code. - python-lessons4.zip ([zip sync][]) [From HTML to a List of Words (2)]: /lessons/from-html-to-list-of-words-2 - [web page]: http://www.oldbaileyonline.org/browse.jsp?id=t17800628-33&div=t17800628-33 + [web page]: https://www.oldbaileyonline.org/browse.jsp?id=t17800628-33&div=t17800628-33 [From HTML to a List of Words (1)]: /lessons/from-html-to-list-of-words-1 [Manipulating Strings in Python]: /lessons/manipulating-strings-in-python - [Unicode]: http://unicode.org/ - [Python support]: https://web.archive.org/web/20180502053841/http://www.diveintopython.net/xml_processing/unicode.html - [Dive into Python]: https://web.archive.org/web/20180416143856/http://www.diveintopython.net/regular_expressions/index.html + [Unicode]: https://unicode.org/ + [Python support]: https://web.archive.org/web/20180502053841/https://www.diveintopython.net/xml_processing/unicode.html + [Dive into Python]: https://web.archive.org/web/20180416143856/https://www.diveintopython.net/regular_expressions/index.html [zip]: /assets/python-lessons3.zip [zip sync]: /assets/python-lessons4.zip diff --git a/en/lessons/ocr-tutorial.md b/en/lessons/ocr-tutorial.md index c385e89f23..a136b03060 100755 --- a/en/lessons/ocr-tutorial.md +++ b/en/lessons/ocr-tutorial.md @@ -22,7 +22,7 @@ and Going through a text file line by line and correcting OCR errors one at a time is hugely error-prone, as any proof reader will tell you. If you are dealing with a narrative, a monograph, a diary, or something like that, a great deal of that kind of proofing will be unavoidable; however, if what you have is an ordered collection of primary source documents, a legal code say, or a cartulary, you are far better served by creating an ordered data structure out of it __first__. You will wind up with data that is useful in a variety of contexts, even before your army of street urchins starts correcting specific OCR typos. -This is where a scripting language like Python comes very much in handy. For our project we wanted to prepare some of the documents from a 12th century collection of *imbreviatura* from the Italian scribe known as [Giovanni Scriba](http://www.worldcat.org/oclc/17591390) so that they could be marked up by historians for subsequent NLP analysis or potentially for other purposes as well. The pages of the 1935 published edition look like this. +This is where a scripting language like Python comes very much in handy. For our project we wanted to prepare some of the documents from a 12th century collection of *imbreviatura* from the Italian scribe known as [Giovanni Scriba](https://www.worldcat.org/oclc/17591390) so that they could be marked up by historians for subsequent NLP analysis or potentially for other purposes as well. The pages of the 1935 published edition look like this. ![GS page 110](gs_pg110.png) @@ -89,7 +89,7 @@ You will note that some of this metadata is page-bound and some of it is charter IL CIRTOL.'RE DI G:OV.I\N( sca:FR 339 342 NI .\ßlO CHIAUDANO 9LtTTIA MORESCO -These strings are not regular enough to reliably find with regular expressions; however, if you know what the strings are supposed to look like, you can compose some kind of string similarity algorithm to test each string against an exemplar and measure the likelihood that it is a page header. Fortunately, I didn't have to compose such an algorithm, Vladimir Levenshtein did it for us in 1965 (see: ). A computer language can encode this algorithm in any number of ways, here's an effective Python function that will work for us: +These strings are not regular enough to reliably find with regular expressions; however, if you know what the strings are supposed to look like, you can compose some kind of string similarity algorithm to test each string against an exemplar and measure the likelihood that it is a page header. Fortunately, I didn't have to compose such an algorithm, Vladimir Levenshtein did it for us in 1965 (see: ). A computer language can encode this algorithm in any number of ways, here's an effective Python function that will work for us: ```python @@ -466,7 +466,7 @@ Print out our resulting dictionary using `pprint(charters)` and you'll see somet } ``` -Printing out your Python dictionary as a literal string is not a bad thing to do. For a text this size, the resulting file is perfectly manageable, can be mailed around usefully and read into a python repl session very simply using `eval()`, or pasted directly into a Python module file. On the other hand, if you want an even more reliable way to serialize it in an exclusively Python context, look into [`Pickle`](https://docs.python.org/2/library/pickle.html). If you need to move it to some other context, JavaScript for example, or some `RDF` triple stores, Python's [`json`](https://docs.python.org/2/library/json.html#module-json) module will translate effectively. If you have to get some kind of XML output, I will be very sorry for you, but the [`lxml`](http://lxml.de/) python module may ease the pain a little. +Printing out your Python dictionary as a literal string is not a bad thing to do. For a text this size, the resulting file is perfectly manageable, can be mailed around usefully and read into a python repl session very simply using `eval()`, or pasted directly into a Python module file. On the other hand, if you want an even more reliable way to serialize it in an exclusively Python context, look into [`Pickle`](https://docs.python.org/2/library/pickle.html). If you need to move it to some other context, JavaScript for example, or some `RDF` triple stores, Python's [`json`](https://docs.python.org/2/library/json.html#module-json) module will translate effectively. If you have to get some kind of XML output, I will be very sorry for you, but the [`lxml`](https://lxml.de/) python module may ease the pain a little. ## Order from disorder, huzzah. Now that we have an ordered data structure, we can do many things with it. As a very simple example, lets just print it out as html for display on a web-site: @@ -525,7 +525,7 @@ fout.write("""""") Drop the resulting file on a web browser, and you've got a nicely formated electronic edition. Being able to do this with your, mostly uncorrected, OCR output is not a trivial advantage. If you're serious about creating a clean, error free, electronic edition of anything, you've got to do some serious proofreading. Having a source text formatted for reading is crucial; moreover, if your proofreader can change the font, spacing, color, layout, and so forth at will, you can increase their accuracy and productivity substantially. With this example in a modern web browser, tweaking those parameters with some simple css declarations is easy. Also, with some ordered HTML to work with, you might crowd-source the OCR error correction, instead of hiring that army of illiterate street urchins. -Beyond this though, there's lots you can do with an ordered data set, including feeding it back through a markup tool like the [brat](http://brat.nlplab.org) as we did for the ChartEx project. Domain experts can then start adding layers of semantic tagging even if you don't do any further OCR error correction. +Beyond this though, there's lots you can do with an ordered data set, including feeding it back through a markup tool like the [brat](https://brat.nlplab.org) as we did for the ChartEx project. Domain experts can then start adding layers of semantic tagging even if you don't do any further OCR error correction. The bits of code above are in no way a turn-key solution for cleaning arbitrary OCR output. There is no such magic wand. The Google approach to scanning the contents of research libraries threatens to drown us in an ocean of bad data. Worse, it elides a fundamental fact of digital scholarship: digital sources are hard to get. Reliable, flexible, and useful digital texts require careful redaction and persistent curation. Google, Amazon, Facebook, *et alia* do not have to concern themselves with the quality of their data, just its quantity. Historians, on the other hand, must care first for the integrity of their sources. diff --git a/en/lessons/preserving-your-research-data.md b/en/lessons/preserving-your-research-data.md index 93acf4d1ca..27c19e6472 100755 --- a/en/lessons/preserving-your-research-data.md +++ b/en/lessons/preserving-your-research-data.md @@ -165,7 +165,7 @@ documented), though existing schema such as [Markdown][] are available (Markdown files are saved as .md). An excellent Markdown cheat sheet is available on GitHub ) for those who wish to follow – or adapt – this existing schema. Notepad++ - is recommended for Windows users, though + is recommended for Windows users, though by no means essential, for working with .md files. Mac or Unix users may find [Komodo Edit][] or [Text Wrangler][] helpful. @@ -213,7 +213,7 @@ blogging services. WordPress URLs follow the format: - *website name*/*year(4 digits)*/*month (2 digits)*/*day (2 digits)*/*words-of-title-separated-by-hyphens* -- +- A similar style is used by news agencies such as a The Guardian newspaper: @@ -221,7 +221,7 @@ newspaper: - *website name*/*section subdivision*/*year (4 digits)*/*month (3 characters)*/*day (2 digits)*/*words-describing-content-separated-by-hyphens* -- +- . In archival catalogues, URLs structured by a single data element are @@ -229,12 +229,12 @@ often used. The British Cartoon Archive structures its online archive using the format: - *website name*/record/*reference number* -- +- And the Old Bailey Online uses the format: - *website name*/browse.jsp?ref=*reference number* -- +- What we learn from these examples is that a combination of semantic description and data elements make consistent and predictable data @@ -415,11 +415,11 @@ blog (17 October 2013) Hitchcock, Tim, 'Judging a book by its URLs', Historyonics blog (3 January 2014) - + Howard, Sharon, 'Unclean, unclean! What historians can do about sharing our messy research data', Early Modern Notes blog (18 May 2013) - + Noble, William Stafford, A Quick Guide to Organizing Computational Biology Projects.PLoSComputBiol 5(7): e1000424 (2009) @@ -432,7 +432,7 @@ Information Management: Organising Humanities Material' (2011) Pennock, Maureen, 'The Twelve Principles of Digital Preservation (and a cartridge in a repository…)', British Library Collection Care blog (3 September 2013) - + Pritchard, Adam, 'Markdown Cheatsheet' (2013) @@ -441,10 +441,10 @@ Rosenzweig, Roy, 'Scarcity or Abundance? Preserving the Past in a Digital Era', The American Historical Review 108:3 (2003), 735-762. UK Data Archive, 'Documenting your Data' - + - [PRINCE2]: http://en.wikipedia.org/wiki/PRINCE2 - [platform agnostic]: http://en.wikipedia.org/wiki/Cross-platform - [Markdown]: http://en.wikipedia.org/wiki/Markdown - [Komodo Edit]: http://komodoide.com/komodo-edit/ + [PRINCE2]: https://en.wikipedia.org/wiki/PRINCE2 + [platform agnostic]: https://en.wikipedia.org/wiki/Cross-platform + [Markdown]: https://en.wikipedia.org/wiki/Markdown + [Komodo Edit]: https://komodoide.com/komodo-edit/ [Text Wrangler]: https://www.barebones.com/products/textwrangler/ diff --git a/en/lessons/qgis-layers.md b/en/lessons/qgis-layers.md index 9800166857..5934cf5b89 100755 --- a/en/lessons/qgis-layers.md +++ b/en/lessons/qgis-layers.md @@ -102,11 +102,11 @@ making the downloads quick! - Navigate to the links below in your web browser and then download the following PEI shapefiles. We created the final two, so they will download directly: -1. -2. -3. -4. -5. +1. +2. +3. +4. +5. 6. [PEI Highways][] 7. [PEI Places][] @@ -463,16 +463,16 @@ save your work!** *This lesson is part of the [Geospatial Historian][].* - [QGIS Download page]: http://qgis.org/en/site/forusers/download.html - [KyngChaos Qgis download page]: http://www.kyngchaos.com/software/qgis - [Download Archive]: http://www.kyngchaos.com/software/archive + [QGIS Download page]: https://qgis.org/en/site/forusers/download.html + [KyngChaos Qgis download page]: https://www.kyngchaos.com/software/qgis + [Download Archive]: https://www.kyngchaos.com/software/archive [PEI Highways]: /assets/qgis-layers/PEI_highway.zip [PEI Places]: /assets/qgis-layers/PEI_placenames.zip - [Coordinate Reference System]: http://en.wikipedia.org/wiki/Spatial_reference_system + [Coordinate Reference System]: https://en.wikipedia.org/wiki/Spatial_reference_system [NRCan's website]: https://perma.cc/B4UW-R4FK - [Double Stereographic projection]: http://www.gov.pe.ca/gis/index.php3?number=77865&lang=E - [Tutorial: Working with Projections in QGIS]: http://web.archive.org/web/20180807132308/http://qgis.spatialthoughts.com/2012/04/tutorial-working-with-projections-in.html - [defined]: http://www.gislounge.com/geodatabases-explored-vector-and-raster-data/ - [aerial photos]: http://en.wikipedia.org/wiki/Orthophoto + [Double Stereographic projection]: https://www.gov.pe.ca/gis/index.php3?number=77865&lang=E + [Tutorial: Working with Projections in QGIS]: https://web.archive.org/web/20180807132308/https://qgis.spatialthoughts.com/2012/04/tutorial-working-with-projections-in.html + [defined]: https://www.gislounge.com/geodatabases-explored-vector-and-raster-data/ + [aerial photos]: https://en.wikipedia.org/wiki/Orthophoto [PEI_CumminsMap1927.tif]: /assets/qgis-layers/PEI_CumminsMap1927_compLZW.tif - [Geospatial Historian]: http://geospatialhistorian.wordpress.com/ + [Geospatial Historian]: https://geospatialhistorian.wordpress.com/ diff --git a/en/lessons/r-basics-with-tabular-data.md b/en/lessons/r-basics-with-tabular-data.md index 51785fc39f..a1dc5f4a8e 100755 --- a/en/lessons/r-basics-with-tabular-data.md +++ b/en/lessons/r-basics-with-tabular-data.md @@ -534,10 +534,10 @@ For more information on R, visit the [R Manual](https://cran.r-project.org/doc/m There are also a number of other R tutorials online including: -* [R: A self-learn tutorial](http://web.archive.org/web/20191015004305/https://www.nceas.ucsb.edu/files/scicomp/Dloads/RProgramming/BestFirstRTutorial.pdf) - this tutorial goes through a series of functions and provides exercises to practice skills. +* [R: A self-learn tutorial](https://web.archive.org/web/20191015004305/https://www.nceas.ucsb.edu/files/scicomp/Dloads/RProgramming/BestFirstRTutorial.pdf) - this tutorial goes through a series of functions and provides exercises to practice skills. * [DataCamp Introduction to R](https://www.datacamp.com/courses/free-introduction-to-r) - this is a free online course that gives you feedback on your code to help identify errors and learn how to write code more efficiently. -Finally, a great resource for digital historians is Lincoln Mullen's [Digital History Methods in R](http://dh-r.lincolnmullen.com/). It is a draft of a book written specifically on how to use R for digital history work. +Finally, a great resource for digital historians is Lincoln Mullen's [Digital History Methods in R](https://dh-r.lincolnmullen.com/). It is a draft of a book written specifically on how to use R for digital history work. ## Endnotes diff --git a/en/lessons/research-data-with-unix.md b/en/lessons/research-data-with-unix.md index c3857210f2..fbfce2a313 100755 --- a/en/lessons/research-data-with-unix.md +++ b/en/lessons/research-data-with-unix.md @@ -40,7 +40,7 @@ _____ ## Software and setup -Windows users will need to install Git Bash. This can be installed by downloading the most recent installer at the [git for windows webpage](http://msysgit.github.io/). Instructions for installation are available at [Open Hatch](https://web.archive.org/web/20190318191709/https://openhatch.org/missions/windows-setup/install-git-bash). +Windows users will need to install Git Bash. This can be installed by downloading the most recent installer at the [git for windows webpage](https://msysgit.github.io/). Instructions for installation are available at [Open Hatch](https://web.archive.org/web/20190318191709/https://openhatch.org/missions/windows-setup/install-git-bash). OS X and Linux users will need to use their terminal shells to follow this lesson, as discussed in "[Introduction to the Bash Command Line](../lessons/intro-to-bash)." @@ -48,7 +48,7 @@ This lesson was written using Git Bash 1.9.0 and the Windows 7 operating system. The files used in this lesson are available on "[Figshare](https://doi.org/10.6084/m9.figshare.1172094)". The data contains the metadata for journal articles categorised under 'History' in the British Library ESTAR database. The data is shared under a CC0 copyright waiver. -Download the required files, save them to your computer, and unzip them. If you do not have default software installed to interact with .zip files, we recommend [7-zip](http://www.7-zip.org/) for this purpose. On Windows, we recommend unzipping the folder provided to your c: drive so the files are at `c:\proghist\`. However, any location will work fine, but you may have to adjust your commands as you are following along with this lesson if you use a different location. +Download the required files, save them to your computer, and unzip them. If you do not have default software installed to interact with .zip files, we recommend [7-zip](https://www.7-zip.org/) for this purpose. On Windows, we recommend unzipping the folder provided to your c: drive so the files are at `c:\proghist\`. However, any location will work fine, but you may have to adjust your commands as you are following along with this lesson if you use a different location.
    April 2025 update: The paths indicated for Windows no longer correspond to those used by recent versions of Git (version 2.49.0 at the time of writing). Whenever the path c:\proghist\... is mentioned, you will need to replace it with c/Users/USERNAME/proghist/.... @@ -70,9 +70,9 @@ Type `ls` and then hit enter. This prints, or displays, a list that includes two The files in this directory are the dataset `2014-01_JA.csv` that contains journal article metadata and a file containing documentation about `2014-01_JA.csv` called `2014-01_JA.txt`. -The subdirectory is named `derived_data`. It contains four [.tsv](http://en.wikipedia.org/wiki/Tab-separated_values) files derived from `2014-01_JA.csv`. Each of these includes all data where a keyword such as `africa` or `america` appears in the 'Title' field of `2014-01_JA.csv`. The `derived_data` directory also includes a subdirectory called `results`. +The subdirectory is named `derived_data`. It contains four [.tsv](https://en.wikipedia.org/wiki/Tab-separated_values) files derived from `2014-01_JA.csv`. Each of these includes all data where a keyword such as `africa` or `america` appears in the 'Title' field of `2014-01_JA.csv`. The `derived_data` directory also includes a subdirectory called `results`. -*Note: [CSV](http://en.wikipedia.org/wiki/Comma-separated_values) files are those in which the units of data (or cells) are separated by commas (comma-separated-values) and TSV files are those in which they are separated by tabs. Both can be read in simple text editors or in spreadsheet programs such as Libre Office Calc or Microsoft Excel.* +*Note: [CSV](https://en.wikipedia.org/wiki/Comma-separated_values) files are those in which the units of data (or cells) are separated by commas (comma-separated-values) and TSV files are those in which they are separated by tabs. Both can be read in simple text editors or in spreadsheet programs such as Libre Office Calc or Microsoft Excel.* Before you begin working with these files, you should move into the directory in which they are stored. Navigate to `c:\proghist\data\derived_data` on Windows or `~/users/USERNAME/proghist/data/derived_data` on OS X. @@ -128,8 +128,8 @@ _____ In this lesson you have learnt to undertake some basic file counting, to query across research data for common strings, and to save results and derived data. Though this lesson is restricted to using the Unix shell to count and mine tabulated data, the processes can be easily extended to free text. For this we recommend two guides written by William Turkel: -- William Turkel, '[Basic Text Analysis with Command Line Tools in Linux](http://williamjturkel.net/2013/06/15/basic-text-analysis-with-command-line-tools-in-linux/)' (15 June 2013) -- William Turkel, '[Pattern Matching and Permuted Term Indexing with Command Line Tools in Linux](http://williamjturkel.net/2013/06/20/pattern-matching-and-permuted-term-indexing-with-command-line-tools-in-linux/)' (20 June 2013) +- William Turkel, '[Basic Text Analysis with Command Line Tools in Linux](https://williamjturkel.net/2013/06/15/basic-text-analysis-with-command-line-tools-in-linux/)' (15 June 2013) +- William Turkel, '[Pattern Matching and Permuted Term Indexing with Command Line Tools in Linux](https://williamjturkel.net/2013/06/20/pattern-matching-and-permuted-term-indexing-with-command-line-tools-in-linux/)' (20 June 2013) As these recommendations suggest, the present lesson only scratches the surface of what the Unix shell environment is capable of. It is hoped, however, that this lesson has provided a taster sufficient to prompt further investigation and productive play. diff --git a/en/lessons/retired/OCR-and-Machine-Translation.md b/en/lessons/retired/OCR-and-Machine-Translation.md index c1e4c5997c..485538beb2 100644 --- a/en/lessons/retired/OCR-and-Machine-Translation.md +++ b/en/lessons/retired/OCR-and-Machine-Translation.md @@ -80,7 +80,7 @@ With ImageMagick installed, we can now convert our files from PDF to TIFF and ma The command does several things that significantly increase the OCR accuracy rate. The `density` and `depth` commands both make sure the file has the appropriate dots per inch [(DPI)](https://en.wikipedia.org/wiki/Dots_per_inch) for OCR. The `strip`, `background`, and `alpha` commands make sure that the file has the right background. Most importantly, this command converts the PDF into a TIFF image file. If you are not using a PDF, you should still use the above command to ensure the image is ready for OCR. -After these changes, your image may still have problems. For example, there may be a skew or uneven brightness. Fortunately, [ImageMagick](https://imagemagick.org/index.php) is a powerful tool that can help you clean image files. For other ImageMagick options that can improve OCR quality, review this helpful [collection of scripts](http://www.fmwconcepts.com/imagemagick/textcleaner/index.php). Because OCR is a command line tool, you can write a script that will loop over over all of your images (hundreds or thousands) at once. You will learn how to write these kinds of scripts later in the lesson. +After these changes, your image may still have problems. For example, there may be a skew or uneven brightness. Fortunately, [ImageMagick](https://imagemagick.org/index.php) is a powerful tool that can help you clean image files. For other ImageMagick options that can improve OCR quality, review this helpful [collection of scripts](https://www.fmwconcepts.com/imagemagick/textcleaner/index.php). Because OCR is a command line tool, you can write a script that will loop over over all of your images (hundreds or thousands) at once. You will learn how to write these kinds of scripts later in the lesson. # OCR This lesson will use the OCR program [Tesseract](https://github.com/tesseract-ocr/tesseract), the most popular OCR program for Digital Humanities projects. Google maintains Tesseract as free software and released it under the Apache License, Version 2.0. Tesseract supports over 100 different languages, but if you have a particularly difficult or unique script (calligraphy or other handwriting) it might be worth training your own OCR model. For typewritten documents, you need a program that will recognize several similar fonts and correctly identify imperfect letters. Tesseract 4.1 does just that. Google has already trained Tesseract to recognize a variety of fonts for dozens of languages. The following commands will install Tesseract as well as the Russian language package, which you will need for the rest of the lesson: diff --git a/en/lessons/retired/OCR-with-Tesseract-and-ScanTailor.md b/en/lessons/retired/OCR-with-Tesseract-and-ScanTailor.md index 0f23f2302e..e18fff3bce 100755 --- a/en/lessons/retired/OCR-with-Tesseract-and-ScanTailor.md +++ b/en/lessons/retired/OCR-with-Tesseract-and-ScanTailor.md @@ -64,7 +64,7 @@ I use Zotero for keeping track of the metadata of my sources, and here is an exa So, now that we have the text as an image, we will move on to image preprocessing. -For image preprocessing I use a free and open source program called *Scan Tailor*. You can download Scan Tailor [here] (http://scantailor.org/downloads/). +For image preprocessing I use a free and open source program called *Scan Tailor*. You can download Scan Tailor [here] (https://scantailor.org/downloads/). When you have downloaded Scan Tailor, open the text scans in the program by clicking "New project", browsing and selecting the file where you saved your text scans, and finally selecting the images you want to preprocess. Scan Tailor is a relatively heavy program, which means that it reserves quite a lot of the computer's capacity. For that reason it is better to divide large files into smaller entities, and preprocess them in parts. When starting a new project you can choose which images you want to select for preprocessing. When you have selected the scans for preprocessing, click "OK". @@ -113,7 +113,7 @@ Save the preprocessed images to a place where it is easy to access them by using OK, let's move on! For OCR we will use a free and open source program called Tesseract. You can install Tesseract [here](https://code.google.com/p/tesseract-ocr/wiki/ReadMe). -If you are going to OCR other languages than English, you will also need to install the [language package](https://code.google.com/p/tesseract-ocr/downloads/list) for that language, and unpack it by using [7-zip](http://www.7-zip.org/). +If you are going to OCR other languages than English, you will also need to install the [language package](https://code.google.com/p/tesseract-ocr/downloads/list) for that language, and unpack it by using [7-zip](https://www.7-zip.org/). Now that we have Tesseract, we can proceed to doing the actual OCR! diff --git a/en/lessons/retired/getting-started-with-github-desktop.md b/en/lessons/retired/getting-started-with-github-desktop.md index 5d4224dbbc..5324093962 100755 --- a/en/lessons/retired/getting-started-with-github-desktop.md +++ b/en/lessons/retired/getting-started-with-github-desktop.md @@ -21,7 +21,7 @@ retired: true retirement-reason: | This lesson is for an old version of GitHub Desktop that is now no longer maintained or supported by GitHub. The new version and its documentation can be found at - We also recommend the Software Carpentry tutorial on version control at + We also recommend the Software Carpentry tutorial on version control at doi: 10.46430/phen0051 --- @@ -140,7 +140,7 @@ The Markdown syntax won't be covered in this lesson in order to keep the length ### Text Editors -To write in plain text we want to use a text editor. There are a huge number of free and paid text editors available. Some of these are very straightforward and simple to use while others have a learning curve and potential uses beyond simple text editing. In the long run using a more advanced and extendable text editor like Vim or Emacs may save you time but for now we can start with a simpler editor. [Atom](https://atom.io/) is a good option for getting started. Atom is a text editor built by GitHub and includes syntax highlighting for Markdown alongside integration with GitHub. It is free and open source, a full 'flight manual', including installation instructions, is available [here](http://flight-manual.atom.io/). +To write in plain text we want to use a text editor. There are a huge number of free and paid text editors available. Some of these are very straightforward and simple to use while others have a learning curve and potential uses beyond simple text editing. In the long run using a more advanced and extendable text editor like Vim or Emacs may save you time but for now we can start with a simpler editor. [Atom](https://atom.io/) is a good option for getting started. Atom is a text editor built by GitHub and includes syntax highlighting for Markdown alongside integration with GitHub. It is free and open source, a full 'flight manual', including installation instructions, is available [here](https://flight-manual.atom.io/). If you don't want to install any new software then you can use your system's included text editor: TextEdit for Mac and Notepad for windows. If you decide to use Markdown beyond this tutorial then you will benefit from a text editor which includes syntax highlighting for Markdown alongside other features useful for writing. @@ -205,7 +205,7 @@ There are differences between using version control for code and text which will It is important that you use meaningful commit summaries and messages. Writing good commit messages requires some prior thought. Messages that make sense to you as an explanation of changes when you make a commit may no longer make sense to you in the future. If you are going to use version control in collaboration with other people it is especially important that other people can understand your commit messages. Version control as a system for managing changes to documents works best when active thought goes into using the software. It is therefore particularly important when collaborating with other that there is a shared understanding and approach to using version control. -One way of addressing this is to try to follow a 'commit style'. One influential [suggestion](http://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html) for a commit style has been made by Tim Pope. The style suggestions made by Tim Pope are partly ['built in'](https://github.com/blog/926-shiny-new-commit-styles) to the GitHub Desktop commit message interface but understanding the format will help ensure a consistent approach. The following commit message paraphrases Tim Pope's suggested format to focus on commits relating to text rather than code: +One way of addressing this is to try to follow a 'commit style'. One influential [suggestion](https://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html) for a commit style has been made by Tim Pope. The style suggestions made by Tim Pope are partly ['built in'](https://github.com/blog/926-shiny-new-commit-styles) to the GitHub Desktop commit message interface but understanding the format will help ensure a consistent approach. The following commit message paraphrases Tim Pope's suggested format to focus on commits relating to text rather than code: ``` Capitalized, short (50 chars or less) summary diff --git a/en/lessons/retired/graph-databases-and-SPARQL.md b/en/lessons/retired/graph-databases-and-SPARQL.md index 72412d4761..9912794ad8 100755 --- a/en/lessons/retired/graph-databases-and-SPARQL.md +++ b/en/lessons/retired/graph-databases-and-SPARQL.md @@ -71,15 +71,15 @@ Vocabulary Program][getty], has also released their series of authoritative databases on geographic place names, terms for describing art and architecture, and variant spellings of artist names, as LOD. -[getty]: http://vocab.getty.edu +[getty]: https://vocab.getty.edu -[bm]: http://collection.britishmuseum.org +[bm]: https://collection.britishmuseum.org -[Europeana]: http://labs.europeana.eu/api/linked-open-data-introduction +[Europeana]: https://labs.europeana.eu/api/linked-open-data-introduction -[saam]: http://americanart.si.edu +[saam]: https://americanart.si.edu -[yale]: http://britishart.yale.edu/collections/using-collections/technology/linked-open-data +[yale]: https://britishart.yale.edu/collections/using-collections/technology/linked-open-data SPARQL is the language used to query these databases. This language is particularly powerful because it does not presuppose the perspectives that users @@ -137,7 +137,7 @@ maximum flexibility in deciding how they wish to query it. SPARQL lets us translate heavily interlinked, graph data into normalized, tabular data with rows and columns you can open in programs like Excel, or -import into a visualization suite such as [plot.ly](http://plot.ly) or +import into a visualization suite such as [plot.ly](https://plot.ly) or [Palladio]. It is useful to think of a SPARQL query as a [Mad @@ -284,13 +284,13 @@ SPARQL endpoint is a web address that accepts SPARQL queries and returns results. The BM endpoint is like many others: if you navigate to it in a web browser, it presents you with a text box for composing queries. -[bms]: http://collection.britishmuseum.org/sparql +[bms]: https://collection.britishmuseum.org/sparql {% include figure.html filename="sparql03.png" caption="The BM SPARQL endpoint webpage. For all the queries in this tutorial, make sure that you have left the 'Include inferred' and 'Expand results over equivalent URIs' boxes unchecked." %} When starting to explore a new RDF database, it helps to look at the relationships that stem from a single [example -object](http://collection.britishmuseum.org/id/object/PPA82633). +object](https://collection.britishmuseum.org/id/object/PPA82633). (For each of the following queries, click on the "Run query" link below to see the results. You can then run it as @@ -304,7 +304,7 @@ WHERE { } ``` -[Run query](http://collection.britishmuseum.org/sparql?query=SELECT+*%0D%0AWHERE+%7B%0D%0A++%3Chttp%3A%2F%2Fcollection.britishmuseum.org%2Fid%2Fobject%2FPPA82633%3E+%3Fp+%3Fo+.%0D%0A++%7D&_implicit=false&_equivalent=false&_form=%2Fsparql) +[Run query](https://collection.britishmuseum.org/sparql?query=SELECT+*%0D%0AWHERE+%7B%0D%0A++%3Chttp%3A%2F%2Fcollection.britishmuseum.org%2Fid%2Fobject%2FPPA82633%3E+%3Fp+%3Fo+.%0D%0A++%7D&_implicit=false&_equivalent=false&_form=%2Fsparql) By calling `SELECT ?p ?o` we're asking the database to return the values of `?p` and `?o` as described in the `WHERE {}` command. This query returns every @@ -485,14 +485,14 @@ authorities. One endpoint that does, however, is [Europeana's][eursparql]. They have created links between the objects in their database and records about individuals in -[DBPedia](http://wiki.dbpedia.org/) and [VIAF](https://viaf.org/), places in -[GeoNames](http://sws.geonames.org/), and concepts in the Getty Art & +[DBPedia](https://wiki.dbpedia.org/) and [VIAF](https://viaf.org/), places in +[GeoNames](https://sws.geonames.org/), and concepts in the Getty Art & Architecture thesaurus. SPARQL allows you to insert `SERVICE` statements that instruct the database to "phone a friend" and run a portion of the query on an outside dataset, using the results to complete the query on the local dataset. While this lesson will go into the data models in Europeana and DBpedia in depth, the following query illustrates how a `SELECT` statement works. You may run it yourself by copying and pasting the query text into the [Europeana endpoint][eursparql]. -[eursparql]: http://sparql.europeana.eu/ +[eursparql]: https://sparql.europeana.eu/ ``` PREFIX edm: @@ -555,9 +555,9 @@ and languages. Parsing the XML verson of this output may be done with a tool like Beautiful Soup ([see its _Programming Historian_ lesson](/lessons/intro-to-beautiful-soup.html)) or [Open -Refine](http://openrefine.org/). To quickly convert JSON results from a SPARQL +Refine](https://openrefine.org/). To quickly convert JSON results from a SPARQL endpoint into a tabular format, I recommend the free command line utility -[jq](http://stedolan.github.io/jq/download/). (For a tutorial on using command +[jq](https://stedolan.github.io/jq/download/). (For a tutorial on using command line programs, see ["Introduction to the Bash Command Line"](/lessons/intro-to-bash.html).) The following query will convert the special JSON RDF format into a CSV file, which you may load into your preferred @@ -579,7 +579,7 @@ to load data from the BM endpoint you must use the address aggregation query we used above to count artworks by type and clicking on "Run query". Palladio should display a preview table. -[Palladio]: http://palladio.designhumanities.org/ +[Palladio]: https://palladio.designhumanities.org/ {% include figure.html filename="sparql10.png" caption="Palladio's SPARQL query interface." %} @@ -614,12 +614,12 @@ searching, or doing other mathematical operations more complex than counting. For a more complete rundown of the commands available in SPARQL, see these links: -- [Wikibooks SPARQL tutorial](http://en.wikibooks.org/wiki/XQuery/SPARQL_Tutorial) +- [Wikibooks SPARQL tutorial](https://en.wikibooks.org/wiki/XQuery/SPARQL_Tutorial) - [Full W3C Overview of SPARQL](https://www.w3.org/TR/sparql11-overview/) Both the Europeana and Getty Vocabularies LOD sites also offer extensive, and quite complex example queries which can be good sources for understanding how to search their data: -- [Europeana SPARQL how-to](http://labs.europeana.eu/api/linked-open-data-SPARQL-endpoint) -- [Getty Vocabularies Example Queries](http://vocab.getty.edu/queries) +- [Europeana SPARQL how-to](https://labs.europeana.eu/api/linked-open-data-SPARQL-endpoint) +- [Getty Vocabularies Example Queries](https://vocab.getty.edu/queries) diff --git a/en/lessons/retired/intro-to-augmented-reality-with-unity.md b/en/lessons/retired/intro-to-augmented-reality-with-unity.md index f1b8958e5a..219dd4a081 100755 --- a/en/lessons/retired/intro-to-augmented-reality-with-unity.md +++ b/en/lessons/retired/intro-to-augmented-reality-with-unity.md @@ -45,19 +45,19 @@ In this introductory tutorial, you will learn how to: ## How can Humanists use Augmented Reality? -Novel applications of AR continue to surface within a variety of industries: [museums](https://www.youtube.com/watch?v=gx_UQxx54lo) are integrating AR content into their displays, [companies](http://www.gizmag.com/ikea-augmented-reality-catalog-app/28703/) are promoting AR apps in lieu of print or even web-based catalogs, and [engineering firms](https://www.youtube.com/watch?v=bXqe2zSepQ4) are creating AR applications showcasing their efforts to promote sustainability. [Predicted to grow](http://www.digi-capital.com/news/2015/04/augmentedvirtual-reality-to-hit-150-billion-disrupting-mobile-by-2020/#.VbetCU1VhHw) into a $120 billion industry within the next five years, augmented reality is an exciting new medium that humanists cannot afford to ignore. Indeed, many scholars within the growing field of digital humanities are beginning to explore how AR can be utilized as a viable medium of scholarly engagement within public spaces, objects, images, and texts. +Novel applications of AR continue to surface within a variety of industries: [museums](https://www.youtube.com/watch?v=gx_UQxx54lo) are integrating AR content into their displays, [companies](https://www.gizmag.com/ikea-augmented-reality-catalog-app/28703/) are promoting AR apps in lieu of print or even web-based catalogs, and [engineering firms](https://www.youtube.com/watch?v=bXqe2zSepQ4) are creating AR applications showcasing their efforts to promote sustainability. [Predicted to grow](https://www.digi-capital.com/news/2015/04/augmentedvirtual-reality-to-hit-150-billion-disrupting-mobile-by-2020/#.VbetCU1VhHw) into a $120 billion industry within the next five years, augmented reality is an exciting new medium that humanists cannot afford to ignore. Indeed, many scholars within the growing field of digital humanities are beginning to explore how AR can be utilized as a viable medium of scholarly engagement within public spaces, objects, images, and texts. {% include figure.html filename="new-ar-dev-1.png" caption="Augmented reality can be used to overlay digital information onto existing texts such as historical markers. This modified image is based on a photograph by Nicholas Henderson." %} -Since at least 2010, [digital artists](https://manifestarblog.wordpress.com/about/) have been creating AR applications for social advocacy and cultural intervention. For example, Tamiko Thiel's AR project [Clouding Green](http://www.tamikothiel.com/AR/clouding-green.html) reveals the carbon footprint of specific technology companies. Projects such as Thiel's capitalize on AR's unique rhetorical affordance to provide compelling, site-specific interactions between physical and digital spaces. +Since at least 2010, [digital artists](https://manifestarblog.wordpress.com/about/) have been creating AR applications for social advocacy and cultural intervention. For example, Tamiko Thiel's AR project [Clouding Green](https://www.tamikothiel.com/AR/clouding-green.html) reveals the carbon footprint of specific technology companies. Projects such as Thiel's capitalize on AR's unique rhetorical affordance to provide compelling, site-specific interactions between physical and digital spaces. -At the [Trace Initiative](http://web.archive.org/web/20180421163517/http://english.ufl.edu/trace_arcs/), a digital humanities organization in the University of Florida English Department, we seek to build upon the work of these artists by promoting the creation and circulation of humanities-focused mobile AR applications. We released our first AR application [to the Google Play store](https://play.google.com/store/apps/details?id=com.Trace.Dollars&hl=en) in spring 2016. +At the [Trace Initiative](https://web.archive.org/web/20180421163517/https://english.ufl.edu/trace_arcs/), a digital humanities organization in the University of Florida English Department, we seek to build upon the work of these artists by promoting the creation and circulation of humanities-focused mobile AR applications. We released our first AR application [to the Google Play store](https://play.google.com/store/apps/details?id=com.Trace.Dollars&hl=en) in spring 2016. The augmented reality software used in this tutorial relies on image-recognition technology, meaning that it requires some kind of visual trigger (a logo, painting, etc.) to know when to display digital content. In the example application depicted in the image above, the application is programmed to only display the digital image of John C. Calhoun if the camera "recognizes" the specific historical marker with which it is associated. For this lesson, we will augment the cover of a physical book with a digital overlay that displays a picture of the author. You could use the technical skills gained throughout this tutorial to create digital overlays for a variety of texts such as historical documents or signs. For example, you might create an application that allows readers to scan the pages of a book or document and access historical context or critique related to that specific page. Humanities scholars could also use this tutorial to create site-specific AR applications to educate visitors about cultural aspects of a location that have been excluded from its historical presentation. ## A Note About AR Creation Platforms -Unity is a very powerful and complex application used to create desktop, console, and mobile games. It is not designed exclusively for augmented reality development.As a result, this lesson has many detailed, albeit necessary, steps for navigating and operating the Unity interface. Although some of the steps might not be directly related to augmented reality development, they are certainly transferrable to other tutorials on Programming Historian or elsewhere that utilize Unity. If you would prefer to gain some familiarity with the Unity Editor prior to completing this lesson, I would suggest consulting [Unity's beginner tutorial videos](https://learn.unity.com/tutorial/live-sessions-on-unity-interface-and-essentials) and the online [Unity manual](http://docs.unity3d.com/Manual/LearningtheInterface.html). +Unity is a very powerful and complex application used to create desktop, console, and mobile games. It is not designed exclusively for augmented reality development.As a result, this lesson has many detailed, albeit necessary, steps for navigating and operating the Unity interface. Although some of the steps might not be directly related to augmented reality development, they are certainly transferrable to other tutorials on Programming Historian or elsewhere that utilize Unity. If you would prefer to gain some familiarity with the Unity Editor prior to completing this lesson, I would suggest consulting [Unity's beginner tutorial videos](https://learn.unity.com/tutorial/live-sessions-on-unity-interface-and-essentials) and the online [Unity manual](https://docs.unity3d.com/Manual/LearningtheInterface.html). Within Unity, you can access additional functionality by importing "extensions." It is common to import extensions into Unity to gain access to additional functionality and/or pre-designed game components such as characters or game props. The extension used within this lesson is called "Vuforia," and it will provide the code necessary to create AR applications within the Unity game engine. I discuss how to download Unity and import the Vuforia extension in the section labelled "Software Requirements." @@ -113,7 +113,7 @@ Next, you will need to import the augmented reality package you just downloaded ### Java Development Kit -Download and install the [Java Development Kit](http://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.html) for your operating system. +Download and install the [Java Development Kit](https://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.html) for your operating system. {% include figure.html filename="ar-dev-1-9.png" caption="Download the .exe file for your operating system." %} @@ -225,7 +225,7 @@ This cover of *Of Mice and Men* has sufficient visual complexity; however, it is {% include figure.html filename="ar-dev-11.png" caption="Photo courtesy of Mark Skwarek." %} -If you are taking a picture of your book cover, make sure that there are no extraneous features present in the image. In the case of the *Of Mice and Men* image above, this would be anything beyond the edge of the cover. If your image contains such extraneous features, either take another picture or open it in a photo editor such as [Gimp](http://www.gimp.org/) and +If you are taking a picture of your book cover, make sure that there are no extraneous features present in the image. In the case of the *Of Mice and Men* image above, this would be anything beyond the edge of the cover. If your image contains such extraneous features, either take another picture or open it in a photo editor such as [Gimp](https://www.gimp.org/) and crop out these features. [Consult this video tutorial](https://www.youtube.com/watch?v=2rGGpOTSpbc) for help on cropping and resizing images in Gimp. Make sure that your image file is under 2.5 mb and that it is a .jpg or .png file. {% include figure.html filename="ar-dev-12.png" caption="Crop out the area around the book." %} @@ -304,7 +304,7 @@ To adjust your perspective in 3D space, hold the Alt button (Option on Mac) on y {% include figure.html filename="ar-dev-5.gif" caption="Position your author image on top of the book cover." %} -Because Unity is optimized for 3D environments, it is sometimes difficult to work with 2D game objects such as images. If you are new to Unity, do not be alarmed if you cannot find your images or if you feel disoriented while manipulating them in your scene view. If you want to learn more about using Unity's transform tools, I would suggest checking out [this short video tutorial by Info Gamer](https://www.youtube.com/watch?v=2Ariq8vc5Vc) and reading up on [Transforms in the Unity Manual](http://docs.unity3d.com/Manual/Transforms.html). +Because Unity is optimized for 3D environments, it is sometimes difficult to work with 2D game objects such as images. If you are new to Unity, do not be alarmed if you cannot find your images or if you feel disoriented while manipulating them in your scene view. If you want to learn more about using Unity's transform tools, I would suggest checking out [this short video tutorial by Info Gamer](https://www.youtube.com/watch?v=2Ariq8vc5Vc) and reading up on [Transforms in the Unity Manual](https://docs.unity3d.com/Manual/Transforms.html). If you cannot find your author image in the scene view, try the following steps: @@ -329,7 +329,7 @@ If your overlay does not appear, double check the "Database Load Behaviour" comp ### Android -Before you can install your own applications on your Android device, you will need to [enable USB debugging](http://developer.android.com/tools/device.html). To do this, go to "Setting" > About Device" and tap the "Build number" seven times. Return to the previous screen and you should now see a "Developer Options" tab. Click it and make sure the option for "USB debugging" is checked. +Before you can install your own applications on your Android device, you will need to [enable USB debugging](https://developer.android.com/tools/device.html). To do this, go to "Setting" > About Device" and tap the "Build number" seven times. Return to the previous screen and you should now see a "Developer Options" tab. Click it and make sure the option for "USB debugging" is checked. {% include figure.html filename="ar-dev-25.png" caption="Tap the 'Build Number' seven times." %} diff --git a/en/lessons/retired/intro-to-beautiful-soup.md b/en/lessons/retired/intro-to-beautiful-soup.md index 6c9c4096ae..9eb80b72d8 100644 --- a/en/lessons/retired/intro-to-beautiful-soup.md +++ b/en/lessons/retired/intro-to-beautiful-soup.md @@ -187,7 +187,7 @@ The Congressional database that we’re using is not an easy one to scrape because the URL for the search results remains the same regardless of what you’re searching for. While this can be bypassed programmatically, it is easier for our purposes to go -to , search for +to , search for Congress number 43, and to save a copy of the results page. @@ -654,10 +654,10 @@ for tr in trs: You’ve done it! You have created a CSV file from all of the data in the table, creating useful data from the confusion of the html page. [Working with Text Files]: /lessons/working-with-text-files - [Command Line Bootcamp]: http://praxis.scholarslab.org/resources/bash/ - [Opening lines of Beautiful Soup]: http://www.crummy.com/software/BeautifulSoup/bs4/doc/ + [Command Line Bootcamp]: https://praxis.scholarslab.org/resources/bash/ + [Opening lines of Beautiful Soup]: https://www.crummy.com/software/BeautifulSoup/bs4/doc/ [installing python modules]: /lessons/installing-python-modules-pip - [urllib3]: http://urllib3.readthedocs.org/en/latest/ + [urllib3]: https://urllib3.readthedocs.org/en/latest/ [Automated Downloading with Wget]: /lessons/automated-downloading-with-wget [Downloading Multiple Records Using Query Strings]: /lessons/downloading-multiple-records-using-query-strings [Document Object Model]: https://en.wikipedia.org/wiki/Document_Object_Model diff --git a/en/lessons/retired/intro-to-the-zotero-api.md b/en/lessons/retired/intro-to-the-zotero-api.md index fe5c392722..d41923875b 100755 --- a/en/lessons/retired/intro-to-the-zotero-api.md +++ b/en/lessons/retired/intro-to-the-zotero-api.md @@ -201,7 +201,7 @@ Now that we have worked through retrieving information using the Zotero API, we can continue to use it to interact with the items stored in our library. - [Zotero]: http://zotero.org + [Zotero]: https://zotero.org [Quick Start Guide]: https://www.zotero.org/support/quick_start_guide [libZotero GitHub library]: https://github.com/fcheslack/libZotero [Installing Python Modules with pip]: /lessons/installing-python-modules-pip diff --git a/en/lessons/sentiment-analysis-syuzhet.md b/en/lessons/sentiment-analysis-syuzhet.md index 9cd7160f40..65f5b3c423 100644 --- a/en/lessons/sentiment-analysis-syuzhet.md +++ b/en/lessons/sentiment-analysis-syuzhet.md @@ -1,661 +1,661 @@ ---- -title: "Sentiment Analysis with 'syuzhet' using R" -slug: sentiment-analysis-syuzhet -original: analisis-de-sentimientos-r -layout: lesson -collection: lessons -date: 2021-03-23 -translation_date: 2023-04-01 -authors: -- Jennifer Isasi -translator: -- Adam Crymble -editors: -- Maria José Afanador-Llach -reviewers: -- Riva Quiroga -translation-editor: -- Rolando Rodriguez -translation-reviewer: -- Shuang Du -- Andrew Janco -review-ticket: https://github.com/programminghistorian/ph-submissions/issues/478 -difficulty: 2 -activity: analyzing -topics: [distant-reading, r, data-visualization] -abstract: This lesson teaches you how to obtain and analyse narrative texts for patterns of sentiment and emotion. -avatar_alt: Engraving of three faces expressing different emotions -doi: 10.46430/phen0110 ---- - -{% include toc.html %} - - -# Lesson Objectives - -This lesson introduces you to the [`syuzhet`](https://perma.cc/9DNJ-ZWPW) [sentiment analysis](https://perma.cc/A92Q-PM4D) algorithm, written by [Matthew Jockers](https://perma.cc/9PF8-3GZ4) using the [R programming language](https://perma.cc/W78Z-FUAX), and applies it to a single narrative text to demonstrate its research potential. The term 'syuzhet' is Russian (сюже́т) and translates roughly as 'plot', or the order in which events in the narrative are presented to the reader, which may be different than the actual time sequence of events (the '[fabula](https://perma.cc/M7C9-XT99)'). The `syuzhet` package similarly considers sentiment analysis in a time-series-friendly manner, allowing you to explore the developing sentiment in a text across the pages. - -To make the lesson useful for scholars working with non-English texts, this tutorial uses a Spanish-language novel, *[Miau](https://perma.cc/G6V3-JCWS)* by [Benito Pérez Galdós](https://perma.cc/9P3P-2FQP) (1888) as its case study. This allows you to learn the steps necessary to work with everything from accented characters to thinking through the intellectual problems of applying English language algorithms to non-English texts. You do not need to know Spanish to follow the lesson (though you will if you want to read the original novel). Some steps in the following instructions may not be necessary if you are working with English-language texts, but those steps should be self-evident. - -Although the lesson is not intended for advanced R users, it is expected that you will have some knowledge of R, including an expectation that you already have [R installed](https://www.r-project.org/) and that you know how to load R packages. The author recommends downloading [RStudio](https://www.rstudio.com/) as a user-friendly environment for working in R. If you have not used R before, you may first want to try working through some of the following introductory R lessons: - -* Taylor Arnold and Lauren Tilton, '[Basic Text Processing in R](/en/lessons/basic-text-processing-in-r)', *Programming Historian* 6 (2017), https://doi.org/10.46430/phen0061 -* Taryn Dewar, '[R Basics with Tabular Data](/en/lessons/r-basics-with-tabular-data)', *Programming Historian* 5 (2016), https://doi.org/10.46430/phen0056 -* Nabeel Siddiqui, '[Data Wrangling and Management in R](/en/lessons/data-wrangling-and-management-in-r)', *Programming Historian* 6 (2017), https://doi.org/10.46430/phen0063 - -You may also be interested in other sentiment analysis lessons: - -* Zoë Wilkinson Saldaña, '[Sentiment Analysis for Exploratory Data Analysis](/en/lessons/sentiment-analysis),' *Programming Historian* 7 (2018), https://doi.org/10.46430/phen0079 -* Matthew Jockers, '[Introduction to the Syuzhet Package](https://perma.cc/9BN2-F3N3)' (2020). - -At the end of the lesson you will be able to: - -* Develop appropriate research questions that apply sentiment analysis to literary or narrative texts -* Use the R programming language, RStudio, and the `syuzhet` package with the [NRC Word-Emotion Association Lexicon](https://perma.cc/A8M5-2SDG) to generate sentiment scores for words in texts of various languages -* Critically interpret the results of your sentiment analysis -* Visualise the results through a range of graphs (bar, word cloud) to aid interpretation - -This lesson was written and tested using version 4.2.x of R using a Mac and on 4.0.x using a Windows machine. - -> Generally, R works the same on Windows, Mac, and Linux operating systems. However, when working on a Windows machine with non-English texts or those containing accents or special characters, you will need to include some extra instructions to apply [UTF-8](https://perma.cc/5HY2-HHN2) character encoding to ensure special characters are properly interpreted. Where this is a necessary step, it is shown below. - -
    -Translator's Note for Educators: - -A number of steps in this tutorial require loading / running time that may exceed 15 to 30 minutes during which participants have to wait. This may affect your ability to use the tutorial in a time-limited live event such as a workshop. Note also that to use this tutorial in a workshop setting, participants will need the ability to install software on their machine. -
    - -# Background Information - -This section introduces the concepts and the software that you will use to perform a sentiment analysis of a text. It also introduces the case study document, the novel *Miau* by Benito Pérez Galdós, and the ways you can apply sentiment analysis meaningfully to a text such as *Miau*. - -## Sentiment Analysis - -Sentiment analysis, also known as opinion mining, is an umbrella term for a number of processes for automatically calculating the degree of negativity or positivity in a text. It has been used for some time in the fields of marketing and politics to better understand the public mood;[^1] however, its adoption in literary studies is more recent and as of yet no one method dominates use.[^2] Some approaches to sentiment analysis also enable you to measure the presence of a number of different emotions in a text, as will be the case for the example in this tutorial. - -What is the difference between 'emotion' and 'sentiment'? The two words are often used interchageably in English but refer to different concepts. - -According to Antonio R. Damasio, 'emotions' are the biologically rooted, instinctive reactions of our bodies to environmental stimuli.[^3] There is no universally agreed list of basic emotions, however a common model includes six: anger (or rage), joy, disgust (or revulsion), fear, sadness, and surprise -- though for Damasio the last of those falls into a category he would describe as a '[secondary emotion](https://perma.cc/Y675-4C52)'. In the case of the automated system that you will use, the secondary emotions 'anticipation' and 'trust' are also options for analysis. - -'Sentiment', on the other hand, is both the action of and effect of feeling an emotion. In other words, as Óscar Pereira Zazo notes, 'when an object, a person, a situation, or a thought brings us joy, it begins a process that can lead to the feeling of being joyful or happy'.[^4] Sentiment analysis suggests that you can measure the intensity of this effect (either positive, negative, or neutral) on the manifestation of an emotion. - -This lesson distiguishes between the two terms as described above. The effect (sentiment) will be measured as it evolves across the pages of the text, while the emotions will be measured by looking at word use more generally. - -## NRC Word-Emotion Association Lexicon - -Many sentiment analysis algorithms depend upon pre-compiled lexicons or dictionaries that assign numerical sentiment scores to words or phrases based on findings from previous linguistic research. The R package `syuzhet` has been designed to allow you to choose from four of these sentiment lexicons: [Bing](https://perma.cc/G9RV-RA82), [Afinn](https://perma.cc/GZB2-J2RH), [Stanford](https://perma.cc/TK8L-44ZW), and the [NRC Word-Emotion Association Lexicon](https://perma.cc/A8M5-2SDG).[^5] This lesson uses the NRC lexicon, as it is the only one of the four that can currently be used with non-English texts. - -This lexicon, which includes positive and negative sentiment values as well as eight emotional categories, was developed by Saif M. Mohammad, a scientist at the National Research Council Canada (NRC). The dataset that forms the lexicon has been manually annotated using the [Maximum Difference Scaling](https://perma.cc/KWW4-AFJ4) technique, or MaxDiff, to determine the most negative or positive sets of words relative to other words -- a sort of ranking of sentiment intensity of words.[^6] This particular lexicon has 14,182 unigrams (words) classified as either positive or negative. It also classifies a word's connection to various emotions: anger, anticipation, disgust, fear, joy, sadness, surprise, and trust. Using automatic translation, which may lack linguistic nuance in unpredictable ways, it is available in more than one hundred languages. - -The license on the dataset allows free use of the NRC lexicon for research purposes. All data is available for download. - -The [NRC Word-Emotion Association Lexicon](https://perma.cc/A8M5-2SDG) website outlines the different categories and classifications in the dataset. It also provides a number of resources that can help you to better understand how the lexicon was built, including links to published research, more information on obtaining values for individual words, the organisation of the dataset, and how to extend it. - -## The `syuzhet` R Package - -The [R package](https://cran.r-project.org/web/packages/syuzhet/vignettes/syuzhet-vignette.html) `syuzhet` was released in 2015 by Matthew Jockers; at the time of writing it is still being actively maintained (we use version 1.0.6, the November 2020 release, in this lesson). - -If you intend to use the software on non-English texts, you should be aware that the package has been developed and tested in English, and it has not been received without controversy, including from [Annie Swafford](https://perma.cc/TYT3-5DTU) who challenged some of the algorithm's assumptions about text and the use of `syuzhet` in a research setting. This included concerns about incorrectly splitting sentences involving quotation marks, and problems with using a sentiment lexicon designed for modern English on a historic text that uses the same words in slightly different ways. Assigning concrete values of measurement to literary texts, which are by their nature quite subjective, is always challenging and potentially problematic. A series of archived blog entries by Jockers outline [his thoughts on the method and address some of the criticisms](https://web.archive.org/web/20190708100723/http://www.matthewjockers.net/page/2/) about the degree to which sentiment can accurately be measured when sometimes even humans disagree on a passage of text's effects on the reader. - - -> Some Research Warnings: The lexicon assigns values to individual words which are used as the basis for conducting the quantitative analysis. Those values were assigned by humans working in North America and may carry English-language and North American cultural biases. Researchers must therefore take several things into account before applying this methodology in their work: -> -> - The Spanish lexicon (and other non-English versions) is a direct translation carried out via machine translation. In the author's opinion, these systems are already fairly reliable when translating between English and Spanish but less so for other languages that NRC claims to be operable with, including Basque, for example. -> - The sentiment and emotion scores of each word need to be understood in cultural and temporal context. A term that the people building the NRC lexicon labelled positive may be negative in other contexts. This type of approach is therefore inherently coarse in its ability to reflect a *true* reading of the texts as conducted by a subject specialist through close reading. -> - The author does not recommend the use of this methodology in texts that are significantly metaphorical or symbolic. -> - This particular method does not properly handle negation. For example, it will wrongly classify 'I am not happy' as positive because it looks at individual words only. Research by Richard Socher (2014) has attempted to improve issues of negation in sentiment analysis, and may be worth exploring for those with a genuine research need.[^7] -> Following the spirit of adaptability of *Programming Historian* lessons in other languages, the author has decided to use `syuzhet` in its original form; however, at the end of the lesson you will be introduced to some advanced functions that will help you use your own sentiment dictionary with the package. - -As this tutorial works with emotion of a Spanish text, Table 1 provides a simple translation matrix of the key emotion names for ease of reference. - - -Table 1: Emotion categories in English and Spanish - -| English | Spanish | -| -------- | ------- | -| anger | enfado | -| anticipation | anticipación | -| disgust | disgusto | -| fear | miedo | -| joy | alegría | -| sadness | tristeza | -| surprise | sorpresa | -| trust | confianza | -| negative | negativo | -| positive | positivo | - - -## A Brief Example - -Before diving into the full analysis of our text *Miau*, we offer a short example of sentiment analysis in action, using `syuzhet` together with the NRC lexicon, focusing on the outputs instead of the code. This analysis uses R and prompts you to [tokenise](https://perma.cc/243B-E9M7) the text into a list of single-word strings (unigrams) that are then analysed one at a time. Sentence-level analysis is also possible in sentiment analysis, but is not the focus of this tutorial. - -Consider the analysis of the final passage from *Miau*: - -> **Spanish Original**: Retumbó el disparo en la soledad de aquel abandonado y tenebroso lugar; Villaamil, dando terrible salto, hincó la cabeza en la movediza tierra, y rodó seco hacia el abismo, sin que el conocimiento le durase más que el tiempo necesario para poder decir: «Pues... sí...». -> -> **Rough English Translation**: The shot boomed out in the solitude of that abandoned and gloomy space; Villaamil, taking a terrible leap, bowed his head to the moving earth and rolled towards the abyss, his awareness lasting no longer than the time necessary to say: 'Well...yes...'. -> -> *Miau* by Benito Pérez Galdós. - -This passage will be transformed into a list of words: - -```R -example: - -> [1] "retumbó" "el" "disparo" "en" "la" "soledad" -> [7] "de" "aquel" "abandonado" "y" "tenebroso" "lugar" -> [13] "villaamil" "dando" "terrible" "salto" "hincó" "la" ... -``` - -Using the sentiment analysis function, you then calculate the eight emotions as classified by NRC, as well as the positive and negative scores of each word. The following is the result for the first few words in this short passage: - -```R -print(example_2, row.names = example) - -> anger anticipation disgust fear joy sadness surprise trust negative positive -> retumbó 0 0 0 0 0 0 0 0 0 0 -> el 0 0 0 0 0 0 0 0 0 0 -> disparo 3 0 0 2 0 2 1 0 3 0 -> en 0 0 0 0 0 0 0 0 0 0 -> la 0 0 0 0 0 0 0 0 0 0 -> solitude 0 0 0 2 0 2 0 0 2 0 -> de 0 0 0 0 0 0 0 0 0 0 -> aquel 0 0 0 0 0 0 0 0 0 0 -> abandonado 2 0 0 1 0 2 0 0 3 0 -> y 0 0 0 0 0 0 0 0 0 0 -> tenebroso 0 0 0 0 0 0 0 0 0 0 -> lugar 0 0 0 0 0 0 0 0 0 0 -> villaamil 0 0 0 0 0 0 0 0 0 0 -> dando 0 0 0 0 0 0 0 0 0 1 -> terrible 2 1 2 2 0 2 0 0 2 0 -> salto 0 0 0 0 0 0 0 0 0 0 -> hincó 0 0 0 0 0 0 0 0 0 0 -> la 0 0 0 0 0 0 0 0 0 0 -... -``` - -
    -Translator's Note: -R will not translate these into English for you, but to make the tutorial easier to follow for English speakers, the same output would look like the following if the passage was in English (notice that when translating word-by-word the results are slightly different than when translating whole passages, as above): -
    - -```R -print(example_2, row.names = example) - -> anger anticipation disgust fear joy sadness surprise trust negative positive -> boomed 0 0 0 0 0 0 0 0 0 0 -> the 0 0 0 0 0 0 0 0 0 0 -> shot 3 0 0 2 0 2 1 0 3 0 -> in 0 0 0 0 0 0 0 0 0 0 -> the 0 0 0 0 0 0 0 0 0 0 -> solitude 0 0 0 2 0 2 0 0 2 0 -> of 0 0 0 0 0 0 0 0 0 0 -> that 0 0 0 0 0 0 0 0 0 0 -> abandoned 2 0 0 1 0 2 0 0 3 0 -> and 0 0 0 0 0 0 0 0 0 0 -> gloomy 0 0 0 0 0 0 0 0 0 0 -> place 0 0 0 0 0 0 0 0 0 0 -> villaamil 0 0 0 0 0 0 0 0 0 0 -> taking 0 0 0 0 0 0 0 0 0 1 -> terrible 2 1 2 2 0 2 0 0 2 0 -> leap 0 0 0 0 0 0 0 0 0 0 -> bowed 0 0 0 0 0 0 0 0 0 0 -> his 0 0 0 0 0 0 0 0 0 0 -... -``` - -The results are returned in a [data frame](https://perma.cc/ER4M-WRRC). Using this scoring system, every word in our human languages has a default value of 0 indicating no connection to the corresponding emotion. Any words not in the NRC lexicon will be treated by the code as if they have values of 0 for all categories. Any word with a scores greater than 0 indicates that it is both present in the NRC lexicon, and that it has been assigned a value by the researchers responsible for that lexicon indicating the strength of its connection to one of the emotional categories. - -In this example we can see that the words 'disparo' (shot), 'soledad' (solitude), 'abandonado' (abandoned), and 'terrible' (terrible) have a negative score associated with them (second-to-last column), while 'dando' (taking) is judged as a positive word (last column). - -We are also able to see which emotions each word is connected to: 'disparo' (shot) is associated with *anger* (3), *fear* (2), *sadness* (2), and *surprise* (1). Higher numbers mean greater strength of the connection to that emotion. - -The possibilities of exploring, analysing, and visualising these results depend on your programming skills, but also your research needs. To help you reach your potential with sentiment analysis, this lesson introduces you how to analyse data and build understanding of the results through various visualisations. - -## Appropriate Research Questions - -As already stated, in this lesson, you will analyse the Spanish novel *Miau* by [Benito Pérez Galdós](https://perma.cc/9P3P-2FQP), published in 1888. Known for his Spanish realist novels, this particular Pérez Galdós story takes place in Madrid at the end of the nineteenth century and satirises the government administration of the day. In a kind of tragic comedy, we witness the final days of Ramón Villaamil after becoming unemployed, while his family is trying to stretch their meagre budget while keeping up the pretence of wealthy living. Villaamil's spiral of misfortune and his inability to find a new job ends in tragedy. - -From a research standpoint, the question is: Can we observe the emotional downward spiral of this plot through an automatic extraction of sentiment in the text? Does a human reader's interpretation of the negative experiences of Villaamil match the results of the algorithm? And if so, what words within the novel are used most to signal the emotional trajectory of the story? - - -# Obtaining Sentiment and Emotion Scores - -The process of conducting the sentiment analysis is a four stage affair. First, code must be installed and loaded into the R environment of your choice. Then, you must load and pre-process the text you want to analyse. Then you conduct your analysis. Finally, you turn your attention to interpreting the results. - -## Install and Load Relevant R Packages - -Before processing the text, you must first install and load the correct R code packages. In this case, that includes [`syuzhet`](https://cran.r-project.org/web/packages/syuzhet/vignettes/syuzhet-vignette.html). You will also be visualising the results, which will require a number of other R packages: [`RColorBrewer`](https://cran.r-project.org/web/packages/RColorBrewer/index.html), [`wordcloud`](https://perma.cc/GM67-HBH3), [`tm`](https://perma.cc/T2JG-LEBJ) and [`NLP`](https://perma.cc/NS79-H5DH). - -To install and load these packages, copy and execute the sample code below in your chosen R coding environment. The first few lines will install the packages (only needed if you haven't already got the packages installed). The second set of lines will load them so that you can use them in your programme. The installation of these packages may take a few minutes. - -```R -# Install the Packages -install.packages("syuzhet") -install.packages("RColorBrewer") -install.packages("wordcloud") -install.packages("tm") - -# Load the Packages -library(syuzhet) -library(RColorBrewer) -library(wordcloud) -library(tm) -``` - -## Load and Prepare the Text - -Next, download a machine readable copy of the novel: [*Miau*](/assets/analisis-de-sentimientos-r/galdos_miau.txt) and make sure to save it as a .txt file. When you open the file you will see that the novel is in [plain text](https://perma.cc/Z5WH-V9SW) format, which is essential for this particular analysis using R. - -With the text at hand, you first need to load it into R as one long string so that you can work with it programmatically. Make sure to replace `FILEPATH` with the location of the novel on your own computer (don't just type 'FILEPATH'). This loading process is slightly different on Mac/Linux and Windows machines: - -### On Mac and Linux - -You can [find the FILEPATH](https://perma.cc/ZXZ8-FZHG) using your preferred method. The final format on my computer is `/Users/Isasi/Desktop/miau.txt` - -On a Mac/Linux machine, use the function `get_text_as_string`, which is part of the `syuzhet` package: - -```R -text_string <- get_text_as_string("FILEPATH") -``` - -### On Windows - -You can [find the FILEPATH](https://perma.cc/N9R4-HEJY) using your preferred method. The final format on my computer is `C:\\Users\\Isasi\\Desktop\\miau.txt` - -The Windows operating system cannot directly read characters with tildes, accents, or from extended alphabet sets, all of which are commonly used in languages such as Spanish, French, and Portuguese. Therefore we must first alert the software that our novel uses the [UTF-8](https://perma.cc/5HY2-HHN2) set of characters (which includes accents and many other non-English characters). We do this using the `scan` function. - -> Note that when typing your filepath, you may need to escape the backslashes (`\`) in the filepath. To do this, just add a second backslash each time it appears in the path. (E.g. "`C:\\...`" - -```R -text_string <- scan(file = "FILEPATH", fileEncoding = "UTF-8", what = character(), sep = "\n", allowEscapes = T) -``` ---- - -Now that the data has loaded, you have to format it in the way the sentiment analysis algorithm expects to receive it. In this particular case, that is as a [list](https://perma.cc/LPV9-XGX8) containing either single words or sentences (here you will focus on individual words only). - -This means you need an intermediate step between loading the text and extracting the sentiment values. To meet this need, we will divide the character string into a list of words, sometimes also referred to as [unigrams](https://perma.cc/FX4C-ZLYB) or [tokens](https://perma.cc/V6UY-KKVK). - -To do this you can use the package's built-in `get_tokens()` function to generate a new data object containing each individual word as a list. This function also removes spaces and punctuation from the original text. This approach to tokenisation uses [regular expressions](https://perma.cc/W7YD-K3R7) and is not always appropriate in all use cases. It will, for example, split hyphenated words into two. Depending on your text, you should consider the implications of your chosen method of tokenisation as you can use any method you like as long as the output is in the same format as in the example below. - -```R -text_words <- get_tokens(text_string) -head(text_words) - -> [1] "miau" "por" "b" "pérez" "galdós" "14" -``` - -Now you can use the `length()` function to count how many words are in the original text: - -```R -length(text_words) - -> [1] 97254 -``` - -If you want to analyse the text by sentence, use the `get_sentences()` function and follow the same proccess except for creating the word cloud below: - -```R -> sentence_vector <- get_sentences(text_string) -length(sentence_vector) -[1] 6022 -``` - - -## Extracting Data with the NRC Sentiment Lexicon - -Now you can use the `get_nrc_sentiment` function to obtain the sentiment scores for each word in the novel. The default vocabulary for the software is English. Since this text is in Spanish, you will use the `lang` argument to set the vocabulary to Spanish. This would not be necessary if working on an English text. Then you will create a new data object to store the extracted data so that you can work with it further. This `get_nrc_sentiment` function searches for the presence of the eight emotions and two sentiments against each word in your list, and assigns each a number greater than 0 if the word is found within the NRC's lexicon. Depending on the speed of your computer and the nature of your text, this process may take between 15 and 30 minutes. - -```R -sentiment_scores <- get_nrc_sentiment(text_words, lang="spanish") -``` -You can also use this package with [a range of other languages](https://perma.cc/9BN2-F3N3), though the 2020 release only works on languages with Latin-based alphabets. Other lessons that can be substituted for `spanish` in the above line of code are: `basque`, `catalan`, `danish`, `dutch`, `english`, `esperanto`, `finnish`, `french`, `german`, `irish`, `italian`, `latin`, `portuguese`, `romanian`, `swedish`, and `welsh`. We can hope that the functionality will improve in future to include more languages. - -Some users reported getting a warning message when the code finished running. At the time of writing this is a warning that the `syuzhet` codebase may need to be updated in future, but should not affect your ability to use it at present. The warning was that "spread_() was deprecated in tidyr 1.2.0. Please use spread() instead. The deprecated feature was likely used in the syuzhet package. Please report the issue to the authors." In this case, only Matthew Jockers can fix the error, as it is an issue with the code he created, not with your instructions to run it. - -When the process finishes, you may want to verify the contents of the new data object. To avoid printing thousands of lines of text, you can use the `head()` function to show only the first six unigrams. If you are following the example, you should see the following (which is lacking in context at this point). - -```R -head(sentiment_scores) - -> anger anticipation disgust fear joy sadness surprise trust negative positive -> 1 0 0 0 0 0 0 0 0 0 0 -> 2 0 0 0 0 0 0 0 0 0 0 -> 3 0 0 0 0 0 0 0 0 0 0 -> 4 0 0 0 0 0 0 0 0 0 0 -> 5 0 0 0 0 0 0 0 0 0 0 -> 6 0 0 0 0 0 0 0 0 0 0 -``` - -### Summary of the Text - -More interesting is a summary of the values associated with each of the six emotions and two sentiments, which can be displayed using the `summary()` function. This can be very useful when comparing various texts, and can allow you to see different measures, such as the average relative value of each of the emotions and the two sentiments. For example, we can see that the novel *Miau* on average ([mean](https://perma.cc/5NKH-2TYV)), uses more positive (0.05153) language than negative (0.04658), according to the algorithm. However, it seems that terms associated with sadness (0.02564) are also more prevalent than those associated with joy (0.01929). - -This summary output also shows a number of other calculations, many of which have a value of 0, including the [median](https://perma.cc/KB36-B855). Words that are not found in the sentiment lexicon (NRC) will automatically be treated as if they have a value of 0. Because there are a lot of categories and the story is quite complex, it is not surprising that no one emotion or sentiment has distinctively high statistical values. This makes the minimum, maximum, and mean the most useful measures from this summary output. - -```R -summary(sentiment_scores) - -> anger anticipation disgust fear -> Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000 -> 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 -> Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000 -> Mean :0.01596 Mean :0.02114 Mean :0.01263 Mean :0.02243 -> 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 -> Max. :5.00000 Max. :3.00000 Max. :6.00000 Max. :5.00000 -> joy sadness surprise trust -> Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000 -> 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 -> Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000 -> Mean :0.01929 Mean :0.02564 Mean :0.01035 Mean :0.03004 -> 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 -> Max. :5.00000 Max. :7.00000 Max. :2.00000 Max. :3.00000 -> negative positive -> Min. :0.00000 Min. :0.00000 -> 1st Qu.:0.00000 1st Qu.:0.00000 -> Median :0.00000 Median :0.00000 -> Mean :0.04658 Mean :0.05153 -> 3rd Qu.:0.00000 3rd Qu.:0.00000 -> Max. :7.00000 Max. :5.00000 -``` - -# Interpreting the Results - -You now have the quantitative results of your sentiment analysis of a text. Now, what can you do with these numbers? This section introduces three different visualisations of the data: bar charts, word counts, and word clouds, which offer quick but different ways of making sense of the outputs and telling a story or forming an argument about what you've discovered. - -## Bar Chart by Emotion - -To quickly get a sense of which emotions have a major presence in the text, a bar chart is both a simple and effective format for displaying your data (Figure 1). The built-in [`barplot()`](https://perma.cc/5DXU-CYS9) function can be paired with the summary data of each of the emotions: *anger*, *anticipation*, *disgust*, *fear*, *joy*, *sadness*, *surprise*, and *trust*. These are stored in columns 1 to 8 of our data table. This approach of displaying the data uses the `prop.table()` function with the results of each of the emotion words to present the results.[^8] - -```R -barplot( - colSums(prop.table(sentiment_scores[, 1:8])), - space = 0.2, - horiz = FALSE, - las = 1, - cex.names = 0.7, - col = brewer.pal(n = 8, name = "Set3"), - main = "'Miau' by Benito Pérez Galdós, 1907 edition", - sub = "Analysis by Dr Jennifer Isasi", - xlab="emotions", ylab = NULL) -``` - -The rest of the parameters that you can see in the code are optional and have been added to help you learn how to customise the graph outputs. They include indicating the space between the bars (`space = 0.2`), that the chart should include vertical not horizontal bars (`horiz=FALSE`), and that the values on the axis should increase in units of 1 (`las=1`). We also reduce the font size of the labels (`cex.names = 0.7`) to make sure they fit nicely on the screen. Thanks to the [`RColorBrewer`](https://perma.cc/BHK9-AY7S) package that we installed and loaded at the beginning of the lesson, we can automatically colour the columns. In this case we've used the `brewer.pal` colour palette from `Set3`, and specified we need 8 colours (`n=8`) – one colour per columnn. You can learn more about `RColorBrewer` and its options on [the documentation page for that package](https://perma.cc/4EHL-P8E9). Finally, we add a title and subtitle to the graph using the `main` and `sub` parameters, along with the word `emotions` on the X axis. We have not added a label to the Y axis, but you could do so if you wished by following the model above. - -{% include figure.html filename="tr-en-analisis-de-sentimientos-r-1.png" alt="Bar chart showing the calculated scores of six emotions and two sub-emotions measured in the novel ‘Miau’ (1907) by Pérez Galdós. The emotions are anger, anticipation, disgust, fear, joy, sadness, surprise, and trust. The ‘trust’ bar is the tallest, followed by ‘sadness’ and ‘fear’, while ‘disgust’ and ‘surprise’ are the shortest. This is included because it shows the relative outputs of the sentiment analysis algorithm across these seven emotions." caption="Figure 1: Bar chart showing the calculated scores of six emotions and two sub-emotions measured in the novel 'Miau' by Pérez Galdós." %} - -If you are not interested in modifying these parameters, you could create a bar chart with default styling using the following code: - -```R -barplot(colSums(prop.table(sentiment_scores[, 1:8]))) -``` - -> Make sure you have enough space in the display window for the graph to draw properly, including space for the labels. - -This information already indicates to us that the *sadness* and *fear* emotions are more prevalent than those of *disgust* or *surprise*. But what words does Galdós use to express *fear*? And how often does each emotionally charged word appear in the novel? - -## Counting Words by Emotion - -One of the measures you can calculate using sentiment analysis is the frequency of words appearing in the text and how those words relate with each emotional category. To start with, you need to create a data object with all of the words that have a value greater than 0 -- in this case you will start with those corresponding to the *sadness* column. In order to select only that column, use the dollar symbol `$` after the name of your `sentiment_scores` variable to specify the name of the column you want to work with: *sadness*. - - -```R -sad_words <- text_words[sentiment_scores$sadness> 0] -``` - -The contents of `sad_words` does not tell you much on its own, since it only offers you the list of relevant words without any further context. To also obtain the number of appearances of each 'sadness' word, you can generate a table. To get a quick look of some of the top entries, use the `unlist` and `table` functions along with the `decreasing` argument to display the matches in descending order (if you want ascending order, change TRUE to FALSE); you can create a new table object to print the first twelve words in the list, along with their frequency using the following code (see Table 2 for translations of the Spanish words): - -```R -sad_word_order <- sort(table(unlist(sad_words)), decreasing = TRUE) -head(sad_word_order, n = 12) - -> muy nada pobre tarde -> 271 156 64 58 -> mal caso malo salir -> 57 50 39 35 -> madre insignificante ay culpa -> 33 29 24 22 -``` - -Table 2: English translations of the Spanish words in the preceding code output block - -| Spanish | English | -| ------- | ------- | -| muy | very | -| nada | nothing | -| pobre | poor | -| tarde | late | -| mal | bad | -| caso | case | -| malo | bad | -| salir | to leave | -| madre | mother | -| insignificante | insignificant | -| ay | ow! | -| culpa | fault | - - -If you want to know how many unique words are connected to sadness, you can use the `length` function on the newly created `sad_word_order` variable: - -```R -length(sad_word_order) - -> [1] 349 -``` - -You can repeat the same operation with the rest of the emotion categories, or those that you are interested in, as well as those with positive or negative sentiment scores. To make sure you understand how to adapt the code, try to obtain the results for the emotion 'joy' and compare them with 'sadness'.[^9] - -Depending on the type of analysis that you want to conduct, this may be an efficient approach. For the purposes of this introductory lesson, you are also going next generate a word cloud to help visualise the terms associated with each emotional category (for demonstration purposes, you will use four). - - -## An Emotional Word Cloud - -In order to create a word cloud of terms that correspond with each emotion in *Miau*, you are going to first collect all words with an emotion score greater than 0. Similarly to the previous example, you use the `$` symbol to specify which column of data (which emotion) you are interested in, indicating that you want entries with a value greater than 0. - -If working on a machine running Windows, you will have to indicate to the programme if your text contains accented characters using the following approach: - -### On Mac and Linux - -```R -cloud_emotions_data <- c( - paste(text_words[sentiment_scores$sadness> 0], collapse = " "), - paste(text_words[sentiment_scores$joy > 0], collapse = " "), - paste(text_words[sentiment_scores$anger > 0], collapse = " "), - paste(text_words[sentiment_scores$fear > 0], collapse = " ")) -``` - -### On Windows - -Windows needs an additional step to indicate the text is in UTF-8 format, which is done using the `iconv` function. - -```R -cloud_emotions_data <- c( - paste(text_words[sentiment_scores$sadness> 0], collapse = " "), - paste(text_words[sentiment_scores$joy > 0], collapse = " "), - paste(text_words[sentiment_scores$anger > 0], collapse = " "), - paste(text_words[sentiment_scores$fear > 0], collapse = " ")) - -cloud_emotions_data <- iconv(cloud_emotions_data, "latin1", "UTF-8") -``` - -Once you have collected the data for the four target emotions, you can organise it into four separate `documents` to use as the basis for creating each of your four word clouds: - -```R -cloud_corpus <- Corpus(VectorSource(cloud_emotions_data)) -``` - -Next, you transform the corpus into a term-document matrix using the `TermDocumentMatrix()` function. Then you specify that you want the data organised as a matrix using the `as.matrix()` function. - -To see the first few entries of this output, use the `head` function: - -```R -cloud_tdm <- TermDocumentMatrix(cloud_corpus) -cloud_tdm <- as.matrix(cloud_tdm) -head(cloud_tdm) - -> Docs -> Terms 1 2 3 4 -> abandonado 4 0 4 0 -> abandonar 1 0 0 0 -> abandonará 2 0 0 0 -> abandonaré 1 0 0 0 -> abandonarías 1 0 0 0 -> abandono 3 0 3 0 - -``` - -Now, rename the numbered columns with the relevant emotion words so that the output is more human-readable. Again, you can see the state of your dataset with the `head` function: - -```R -colnames(cloud_tdm) <- c('sadness', 'happiness', 'anger', 'joy') -head(cloud_tdm) - -> Docs -> Terms sadness happiness anger trust -> abandonado 4 0 4 4 -> abandonar 1 0 0 1 -> abandonará 2 0 0 2 -> abandonaré 1 0 0 1 -> abandonarías 1 0 0 1 -> abandono 3 0 3 3 -``` - -Finally, you can visualise these results as a word cloud. The font size of a word in a word cloud is linked to the frequency of its appearance in the document. We can also control a number of other aspects of the word cloud's presentation. - -To start, use the `set.seed()` function to ensure that while following along your outputs will look the same as in the example (if you don't do this your output will have a randomised pattern and may not match the screenshots herein - which may not be important for your own research results but is helpful when following along). - -To generate the cloud itself, use the [comparison.cloud](https://perma.cc/6QRY-5KBG) function from the R `wordcloud` package. In this example, you will indicate that the object `cloud_tdm` will have a non-random word order. You will also specify the colour scheme of each group of words, the title size, and general scale of the visualisation. To make the cloud readable, you will also specify a maximum number of terms. These parameters are all adjustable. - -```R -set.seed(757) # this can be set to any integer -comparison.cloud(cloud_tdm, random.order = FALSE, - colors = c("green", "red", "orange", "blue"), - title.size = 1, max.words = 50, scale = c(2.5, 1), rot.per = 0.4) -``` - -You should get an image similar to Figure 2 although with the location of the words altered since it is generated according to the size of the canvas. - -{% include figure.html filename="tr-en-analisis-de-sentimientos-r-2.png" alt="Word Cloud of most frequent words corresponding to sadness, happiness, anger, and joy in the novel ‘Miau’ by Pérez Galdós. The words are colour-coded to show that they correspond with one of the four emotions, and use a cartesian coordinate system so that all words most closely associated with happiness are in the top left quadrant, sadness in the top right, and so on. Words that are most prevalent in the text appear closest to the centre of the graph. The word ‘muy’ (Spanish for ‘very’) is the largest word, and is associated with sadness. This is included because it shows which words are prevalent, and which emotions they are most closely associated with according to the sentiment analysis algorithm." caption="Figure 2: Word Cloud of most frequent words corresponding to sadness, happiness, anger, and joy in the novel 'Miau' by Pérez Galdós." %} - -What does the word cloud suggest to you? Surely the connection of 'very' (muy) to the sadness emotion and of 'money' (dinero) to the anger emotion needs further consideration. These less obvious results are exactly what many scholars warn about when thinking about sentiment analysis, and demonstrate why a researcher must always ask if the outcomes of the analysis make sense before trying to draw any research conclusions from them. As noted, the sentiment analysis vocabulary used in this tutorial uses a vocabulary that's been automatically translated from English, and is thus not perfect when used on Spanish-language text. - -## Visualising Emotion and Sentiment Across the Progression of a Text - -To complement the isolated readings of emotions as above, you can also study the fluctuation of positive and negative sentiment across the text (Figure 3). R provides a way to both normalise and visualise this time-series sentiment analysis data. Since the sentiment analysis algorithm assigns both positive and negative sentiment scores, you need to generate data between a range of -1 (most negative moments) and 1 (most positive moments); 0 is considered neutral. To calculate these scores, you multiply the values in the negative values of the original `sentiment_scores` data table by -1 and then add the result to the positive values. - - -```R -sentiment_valence <- (sentiment_scores$negative *-1) + sentiment_scores$positive -``` - -Finally, you can generate a graph with the `simple_plot()` function, which is built into the `syuzhet` package, and which offers you a choice of two different graphs; the first presents the various measurements calculated by the algorithm, and the second is a normalisation of those measures. The horizontal axis (X axis) presents the text in 100 normalised fragments and the vertical axis (Y axis) shows the strength of the sentiment in the text. Depending on the computing power of your machine, the graph may take 20 to 30 minutes to finish rendering. - -```R -simple_plot(sentiment_valence) -``` - -> Make sure your graph display window is sized large enough to actually draw the graph. If it isn't you will see the error message: `Error in plot.new() : figure margins too large.` - -{% include figure.html filename="tr-en-analisis-de-sentimientos-r-3.png" alt="A pair of line charts that show the rough emotional intensity of positive and negative sentiment across the whole novel. The graphs use a line graph with a solid curving line moving left-to-right to represent the beginning, middle, and end. In this particular novel, a simplified chart shows that the sentiment rises through the first quarter of the story, before diving in the middle and staying low until the end, representing quite a depressing story. A less simplified version shows that the sentiment picks up a few times later in the novel, but dips well into negative sentiment a number of times. This is included because it shows the emotional intensity of the novel over time." caption="Figure 3: Evolution of the use of positive and negative sentiment through the novel 'Miau' by Pérez Galdós" %} - -Based on Figure 3, you might conclude that the novel *Miau* begins with fairly neutral language, transitions into moments of happiness early on, and moves into some quite negative description in the remaining pages, ending on a negative note, as indicated by the sample sentence we drew upon earlier in the lesson in which Villaamil dies. Anyone who has read the novel will know well the protagonist's despair, so in this case the analysis matches a traditional reading of the text, which answers our research question about whether or not the automated sentiment analysis reflects a close reading of the text. - - -# Save Your Data - -If you want to save the data so that you can come back to it later, you can archive it in comma separates values ([CSV](https://perma.cc/64FY-NTSU)) format, using the function `write.csv()`. This will save your main data table, `sentiment_scores`, which contains the results of the eight emotions and two sentiments we generated, and puts that into a CSV file. You can also add the keyword associated with each row in the left-most column to act as helpful labels. - - -```R -write.csv(sentiment_scores, file = "analysis_sent_miau.csv", row.names = text_words) -``` - -Now you have all of the tools and knowledge you need to start to analyse your own texts and compare them with each other. - -# Loading your own Sentiment Lexicon - -While the above introduction provides you with many tools for exploring sentiment analysis, this tutorial has not presented an exhaustive list of possibilities. - -You may be working on a project in which you have already created a sentiment dictionary that you would like to use. Or perhaps you need to be able to customise a vocabulary and its corresponding sentiment scores to apply to a particular cultural or temporal context related to your research. Maybe you're looking to improve upon the automatically translated results of the NRC lexicon used here. In each of those cases, as of mid 2022, you can also load your own lexicon dataset into the software using the `custom` function to repeat some of the calculations and visualisations used in this lesson. - -To load your own sentiment lexicon, you first have to create or modify a dataframe containing at minimum a column of words and a column containing the corresponding scores for those words, which the author recommends saving in a CSV file format. - -Try this example: - -```R -|word|value| -|---|---| -|amor|1| -|cólera|-1| -|alfombra|0| -|catástrofe|-2| -``` - -Next, to load your saved data from a CSV file, use the `read.csv` function, which will create a new dataset that you can access in R just as you have in the above examples (change 'FILEPATH' to the full location of your CSV file): - -```R -personalised_vocabulary <- read.csv("FILEPATH") -method <- "custom" -sentiments_sentences <- get_sentiment(sentences_vector, method = method, lexicon = personalised_vocabulary) -``` - -
    -Warning: If you get an error message 'incomplete final line found by readTableHeader', this indicates that your CSV file has not formatted properly and lacks an 'end of line' character at the end of the file. The easiest way to correct this is to open your CSV file in a text editor (not MS Word), scroll to the end of the file, press return, and re-save the file. A fuller explanation of this error is available on Stack Overflow. -
    - -If you want to visualise sentiment across the progression of a text, you can use the `plot` function, which uses the same graphing parameters that you've already learned: - -```R -plot(sentiments_sentences, - type = "l", - main = "'Miau' by Benito Pérez Galdós, 1907 edition", - sub = "Analysis by Dr Jennifer Isasi", - xlab="emotions", ylab = " " - ) -``` - -Keep in mind that this form of customised analysis is limited, and that you may not be able to perform all of the same operations that we introduced above. For example, following the model example with your own dictionary, as you would not have information about emotions you would not be able to make a word cloud in the same way. - - -# Works Cited - -* Arnold, Taylor, and Lauren Tilton. 'Basic Text Processing in R', *Programming Historian* 6 (2017), https://doi.org/10.46430/phen0061 -* Damasio, Antonio R. *El error de Descartes: La razón de las emociones* (Andres Bello, 1999). -* Dewar, Taryn. 'R Basics with Tabular Data', *Programming Historian* 5 (2016), https://doi.org/10.46430/phen0056. -* Gottschalk, Louis, and Goldine Gleser. *The Measurement of Psychological States through the Content Analysis of Verbal Behaviour* (University of California, 1969). -* Heuser, Ryan, Franco Moretti, Erik Steiner. 'The Emotions of London' *Stanford Literary Lab*, Pamphlet 13 (2016) 1-10. -* Hu, Minqing, and Bing Liu, 'Mining and Summarizing Customer Reviews.', *Proceedings of the ACM SIGKDD International Conference on Knowledge Discovery & Data Mining* (KDD-2004), 2004. -* Jockers, Matthew. 'Introduction to the Syuzhet Package' *CRAN* (2020), [https://cran.r-project.org/web/packages/syuzhet/vignettes/syuzhet-vignette.html](https://cran.r-project.org/web/packages/syuzhet/vignettes/syuzhet-vignette.html -). -* Jockers, Matthew. 'Some thoughts on Annie's thoughts...about Syuzhet' *Matthew L. Jockers* (2015), [http://www.matthewjockers.net/page/2/](https://web.archive.org/web/20190708100723/http://www.matthewjockers.net/page/2/). -* Leemans, Inger, Janneke M. van der Zwaan, Isa Maks, Erika Kujpers, Kristine Steenberge. 'Mining Embodied Emotions: A Comparative Analysis of Sentiment and Emotion in Dutch Texts, 1600-1800' *Digital Humanities Quarterly* 11 (2017). -* Liu, Bing. *Sentiment Analysis and Opinion Mining* (Morgan & Claypool, 2012). -* Meder, Theo, Dong Nguyen, Rilana Gravel. ‘The Apocalypse on Twitter’ *Digital Scholarship in the Humanities* 31 (2016), 398-410. -* Mohammad, Saif. 'NRC Word-Emotion Association Lexicon', *National Research Council Canada* (2010), [https://saifmohammad.com/WebPages/NRC-Emotion-Lexicon.htm](https://perma.cc/A8M5-2SDG). -* Mohammad, Saif, and Peter D. Turney. 'Crowdsourcing a Word–Emotion Association Lexicon' *Computational Intelligence* 29 (2013): 436-465, doi: 10.1111/j.1467-8640.2012.00460.x. -* Nguyen, Thein Hai, Kiyoaki Shirai, Julien Velcin. 'Sentiment Analysis on Social Media for Stock Movement Prediction' *Expert Systems with Applications* 42 (2015), 9603-9611. -* Nielsen, Finn Årup. 'AFINN Sentiment Lexicon' (2009-2011). -* Pereira Zazo, Óscar. *El analisis de la comunicación en español* (Kendal Hunt, 2015). -* Pérez Galdós, Benito. *Miau* (La Guirnalda, 1888). -* Pérez Galdós, Benito. *Miau* (Sucesores de Hernando, 1907). -* Rodríguez Aldape, Fernando Manuel. *Cuantificación del Interés de un usuario en un tema mediante minería de texto y análisis de sentimiento.* (MA Thesis, Universidad Autónoma de Nuevo León, 2013). -* Schmidt, Thomas, Manuel Burghardt, Christian Wolff. 'Towards Multimodal Sentiment Analysis of Historic Plays: A Case Study with Text and Audio for Lessing's Emilia Galotti' *4th Conference of the Association of Digital Humanities in the Nordic Countries* (2019). -* Siddiqui, Nabeel. 'Data Wrangling and Management in R', *Programming Historian* 6 (2017), https://doi.org/10.46430/phen0063. -* Sprugnoli, Rachele, Sara Tonelli, Alessandro Marchetti, Giovanni Moretti. 'Towards Sentiment Analysis for Historical Texts' *Digital Scholarship in the Humanities* 31 (2016): 762-772. -* Stone, Philip, Dexter Dunphy, Marshall Smith. ‘The General Inquirer: A Computer Approach to Content Analysis’ (M.I.T. Press, 1966). -* Swafford, Annie. 'Problems with the Syuzhet Package' *Anglophile in Academia* (2015), [https://annieswafford.wordpress.com/2015/03/02/syuzhet/](https://perma.cc/TYT3-5DTU). -* Wilkinson Saldaña, Zoë. 'Sentiment Analysis for Exploratory Data Analysis,' *Programming Historian* 7 (2018), https://doi.org/10.46430/phen0079 - - - -# Notes - -[^1]: For example, see: Louis Gottschalk, Goldine Gleser (1969) *The Measurement of Psychological States through the Content Analysis of Verbal Behaviour* (University of California); Philip Stone, Dexter Dunphy, Marshall Smith (1966) ‘The General Inquirer: A Computer Approach to Content Analysis’ (M.I.T. Press); Bing Liu, (2012) *Sentiment Analysis and Opinion Mining* (Morgan & Claypool); Thein Hai Nguyen, Kiyoaki Shirai, Julien Velcin (2015). ‘Sentiment Analysis on Social Media for Stock Movement Prediction’ *Expert Systems with Applications* 42: 9603-9611; Theo Meder, Dong Nguyen, Rilana Gravel (2016). ‘The Apocalypse on Twitter’ *Digital Scholarship in the Humanities* 31 (2): 398-410. -[^2]: For some examples in English, see: Inger Leemans, Janneke M. van der Zwaan, Isa Maks, Erika Kujpers, Kristine Steenberge (2017). 'Mining Embodied Emotions: A Comparative Analysis of Sentiment and Emotion in Dutch Texts, 1600-1800' *Digital Humanities Quarterly* 11 (4); Rachele Sprugnoli, Sara Tonelli, Alessandro Marchetti, Giovanni Moretti (2016). 'Towards Sentiment Analysis for Historical Texts' *Digital Scholarship in the Humanities* 31 (4): 762-772; Thomas Schmidt, Manuel Burghardt, Christian Wolff (2019). 'Towards Multimodal Sentiment Analysis of Historic Plays: A Case Study with Text and Audio for Lessing's Emilia Galotti' *4th Conference of the Association of Digital Humanities in the Nordic Countries*; Ryan Heuser, Franco Moretti, Erik Steiner (2016). 'The Emotions of London' *Stanford Literary Lab*, Pamphlet 13: 1-10. -[^3]: Antonio R. Damasio, *El Error de Descartes: La razón de las emociones*. (Barcelona: Andres Bello, 1999). -[^4]: Óscar Pereira Zazo, *El analisis de la comunicación en español* (Iowa: Kendal Hunt, 2015), 32. -[^5]: 'Bing': Minqing Hu and Bing Liu, 'Mining and summarizing customer reviews.', *Proceedings of the ACM SIGKDD International Conference on Knowledge Discovery & Data Mining* (KDD-2004), 2004; 'Afinn': Finn Årup Nielsen, 'AFINN Sentiment Lexicon' (2009-2011); 'NRC': Saif Mohammad, '[NRC Word-Emotion Association Lexicon](https://perma.cc/A8M5-2SDG)', *National Research Council Canada* (2010). -[^6]: Saif Mohammad and Peter D. Turney, 'Crowdsourcing a Word–Emotion Association Lexicon', *Computational intelligence* 29 (2013): 436-465, doi: 10.1111/j.1467-8640.2012.00460.x -[^7]: Richard Socher, 'Recursive Deep Learning for Natural Language Processing and Computer Vision' PhD diss., (Stanford University, 2014). -[^8]: Thanks to Mounika Puligurthi, intern at the University of Texas (UT) Digital Scholarship Office (during the spring of 2019), for her help interpreting this calculation. -[^9]: There are more words assigned to the emotion *sadness* than to *joy*, both in total number of words (2,061 vs 1,552) and in unique words (349 vs 263). The word 'Mother' appears under both sadness and joy with a value of 33 points. What do you think the significance of that classification decision is? +--- +title: "Sentiment Analysis with 'syuzhet' using R" +slug: sentiment-analysis-syuzhet +original: analisis-de-sentimientos-r +layout: lesson +collection: lessons +date: 2021-03-23 +translation_date: 2023-04-01 +authors: +- Jennifer Isasi +translator: +- Adam Crymble +editors: +- Maria José Afanador-Llach +reviewers: +- Riva Quiroga +translation-editor: +- Rolando Rodriguez +translation-reviewer: +- Shuang Du +- Andrew Janco +review-ticket: https://github.com/programminghistorian/ph-submissions/issues/478 +difficulty: 2 +activity: analyzing +topics: [distant-reading, r, data-visualization] +abstract: This lesson teaches you how to obtain and analyse narrative texts for patterns of sentiment and emotion. +avatar_alt: Engraving of three faces expressing different emotions +doi: 10.46430/phen0110 +--- + +{% include toc.html %} + + +# Lesson Objectives + +This lesson introduces you to the [`syuzhet`](https://perma.cc/9DNJ-ZWPW) [sentiment analysis](https://perma.cc/A92Q-PM4D) algorithm, written by [Matthew Jockers](https://perma.cc/9PF8-3GZ4) using the [R programming language](https://perma.cc/W78Z-FUAX), and applies it to a single narrative text to demonstrate its research potential. The term 'syuzhet' is Russian (сюже́т) and translates roughly as 'plot', or the order in which events in the narrative are presented to the reader, which may be different than the actual time sequence of events (the '[fabula](https://perma.cc/M7C9-XT99)'). The `syuzhet` package similarly considers sentiment analysis in a time-series-friendly manner, allowing you to explore the developing sentiment in a text across the pages. + +To make the lesson useful for scholars working with non-English texts, this tutorial uses a Spanish-language novel, *[Miau](https://perma.cc/G6V3-JCWS)* by [Benito Pérez Galdós](https://perma.cc/9P3P-2FQP) (1888) as its case study. This allows you to learn the steps necessary to work with everything from accented characters to thinking through the intellectual problems of applying English language algorithms to non-English texts. You do not need to know Spanish to follow the lesson (though you will if you want to read the original novel). Some steps in the following instructions may not be necessary if you are working with English-language texts, but those steps should be self-evident. + +Although the lesson is not intended for advanced R users, it is expected that you will have some knowledge of R, including an expectation that you already have [R installed](https://www.r-project.org/) and that you know how to load R packages. The author recommends downloading [RStudio](https://www.rstudio.com/) as a user-friendly environment for working in R. If you have not used R before, you may first want to try working through some of the following introductory R lessons: + +* Taylor Arnold and Lauren Tilton, '[Basic Text Processing in R](/en/lessons/basic-text-processing-in-r)', *Programming Historian* 6 (2017), https://doi.org/10.46430/phen0061 +* Taryn Dewar, '[R Basics with Tabular Data](/en/lessons/r-basics-with-tabular-data)', *Programming Historian* 5 (2016), https://doi.org/10.46430/phen0056 +* Nabeel Siddiqui, '[Data Wrangling and Management in R](/en/lessons/data-wrangling-and-management-in-r)', *Programming Historian* 6 (2017), https://doi.org/10.46430/phen0063 + +You may also be interested in other sentiment analysis lessons: + +* Zoë Wilkinson Saldaña, '[Sentiment Analysis for Exploratory Data Analysis](/en/lessons/sentiment-analysis),' *Programming Historian* 7 (2018), https://doi.org/10.46430/phen0079 +* Matthew Jockers, '[Introduction to the Syuzhet Package](https://perma.cc/9BN2-F3N3)' (2020). + +At the end of the lesson you will be able to: + +* Develop appropriate research questions that apply sentiment analysis to literary or narrative texts +* Use the R programming language, RStudio, and the `syuzhet` package with the [NRC Word-Emotion Association Lexicon](https://perma.cc/A8M5-2SDG) to generate sentiment scores for words in texts of various languages +* Critically interpret the results of your sentiment analysis +* Visualise the results through a range of graphs (bar, word cloud) to aid interpretation + +This lesson was written and tested using version 4.2.x of R using a Mac and on 4.0.x using a Windows machine. + +> Generally, R works the same on Windows, Mac, and Linux operating systems. However, when working on a Windows machine with non-English texts or those containing accents or special characters, you will need to include some extra instructions to apply [UTF-8](https://perma.cc/5HY2-HHN2) character encoding to ensure special characters are properly interpreted. Where this is a necessary step, it is shown below. + +
    +Translator's Note for Educators: + +A number of steps in this tutorial require loading / running time that may exceed 15 to 30 minutes during which participants have to wait. This may affect your ability to use the tutorial in a time-limited live event such as a workshop. Note also that to use this tutorial in a workshop setting, participants will need the ability to install software on their machine. +
    + +# Background Information + +This section introduces the concepts and the software that you will use to perform a sentiment analysis of a text. It also introduces the case study document, the novel *Miau* by Benito Pérez Galdós, and the ways you can apply sentiment analysis meaningfully to a text such as *Miau*. + +## Sentiment Analysis + +Sentiment analysis, also known as opinion mining, is an umbrella term for a number of processes for automatically calculating the degree of negativity or positivity in a text. It has been used for some time in the fields of marketing and politics to better understand the public mood;[^1] however, its adoption in literary studies is more recent and as of yet no one method dominates use.[^2] Some approaches to sentiment analysis also enable you to measure the presence of a number of different emotions in a text, as will be the case for the example in this tutorial. + +What is the difference between 'emotion' and 'sentiment'? The two words are often used interchageably in English but refer to different concepts. + +According to Antonio R. Damasio, 'emotions' are the biologically rooted, instinctive reactions of our bodies to environmental stimuli.[^3] There is no universally agreed list of basic emotions, however a common model includes six: anger (or rage), joy, disgust (or revulsion), fear, sadness, and surprise -- though for Damasio the last of those falls into a category he would describe as a '[secondary emotion](https://perma.cc/Y675-4C52)'. In the case of the automated system that you will use, the secondary emotions 'anticipation' and 'trust' are also options for analysis. + +'Sentiment', on the other hand, is both the action of and effect of feeling an emotion. In other words, as Óscar Pereira Zazo notes, 'when an object, a person, a situation, or a thought brings us joy, it begins a process that can lead to the feeling of being joyful or happy'.[^4] Sentiment analysis suggests that you can measure the intensity of this effect (either positive, negative, or neutral) on the manifestation of an emotion. + +This lesson distiguishes between the two terms as described above. The effect (sentiment) will be measured as it evolves across the pages of the text, while the emotions will be measured by looking at word use more generally. + +## NRC Word-Emotion Association Lexicon + +Many sentiment analysis algorithms depend upon pre-compiled lexicons or dictionaries that assign numerical sentiment scores to words or phrases based on findings from previous linguistic research. The R package `syuzhet` has been designed to allow you to choose from four of these sentiment lexicons: [Bing](https://perma.cc/G9RV-RA82), [Afinn](https://perma.cc/GZB2-J2RH), [Stanford](https://perma.cc/TK8L-44ZW), and the [NRC Word-Emotion Association Lexicon](https://perma.cc/A8M5-2SDG).[^5] This lesson uses the NRC lexicon, as it is the only one of the four that can currently be used with non-English texts. + +This lexicon, which includes positive and negative sentiment values as well as eight emotional categories, was developed by Saif M. Mohammad, a scientist at the National Research Council Canada (NRC). The dataset that forms the lexicon has been manually annotated using the [Maximum Difference Scaling](https://perma.cc/KWW4-AFJ4) technique, or MaxDiff, to determine the most negative or positive sets of words relative to other words -- a sort of ranking of sentiment intensity of words.[^6] This particular lexicon has 14,182 unigrams (words) classified as either positive or negative. It also classifies a word's connection to various emotions: anger, anticipation, disgust, fear, joy, sadness, surprise, and trust. Using automatic translation, which may lack linguistic nuance in unpredictable ways, it is available in more than one hundred languages. + +The license on the dataset allows free use of the NRC lexicon for research purposes. All data is available for download. + +The [NRC Word-Emotion Association Lexicon](https://perma.cc/A8M5-2SDG) website outlines the different categories and classifications in the dataset. It also provides a number of resources that can help you to better understand how the lexicon was built, including links to published research, more information on obtaining values for individual words, the organisation of the dataset, and how to extend it. + +## The `syuzhet` R Package + +The [R package](https://cran.r-project.org/web/packages/syuzhet/vignettes/syuzhet-vignette.html) `syuzhet` was released in 2015 by Matthew Jockers; at the time of writing it is still being actively maintained (we use version 1.0.6, the November 2020 release, in this lesson). + +If you intend to use the software on non-English texts, you should be aware that the package has been developed and tested in English, and it has not been received without controversy, including from [Annie Swafford](https://perma.cc/TYT3-5DTU) who challenged some of the algorithm's assumptions about text and the use of `syuzhet` in a research setting. This included concerns about incorrectly splitting sentences involving quotation marks, and problems with using a sentiment lexicon designed for modern English on a historic text that uses the same words in slightly different ways. Assigning concrete values of measurement to literary texts, which are by their nature quite subjective, is always challenging and potentially problematic. A series of archived blog entries by Jockers outline [his thoughts on the method and address some of the criticisms](https://web.archive.org/web/20190708100723/https://www.matthewjockers.net/page/2/) about the degree to which sentiment can accurately be measured when sometimes even humans disagree on a passage of text's effects on the reader. + + +> Some Research Warnings: The lexicon assigns values to individual words which are used as the basis for conducting the quantitative analysis. Those values were assigned by humans working in North America and may carry English-language and North American cultural biases. Researchers must therefore take several things into account before applying this methodology in their work: +> +> - The Spanish lexicon (and other non-English versions) is a direct translation carried out via machine translation. In the author's opinion, these systems are already fairly reliable when translating between English and Spanish but less so for other languages that NRC claims to be operable with, including Basque, for example. +> - The sentiment and emotion scores of each word need to be understood in cultural and temporal context. A term that the people building the NRC lexicon labelled positive may be negative in other contexts. This type of approach is therefore inherently coarse in its ability to reflect a *true* reading of the texts as conducted by a subject specialist through close reading. +> - The author does not recommend the use of this methodology in texts that are significantly metaphorical or symbolic. +> - This particular method does not properly handle negation. For example, it will wrongly classify 'I am not happy' as positive because it looks at individual words only. Research by Richard Socher (2014) has attempted to improve issues of negation in sentiment analysis, and may be worth exploring for those with a genuine research need.[^7] +> Following the spirit of adaptability of *Programming Historian* lessons in other languages, the author has decided to use `syuzhet` in its original form; however, at the end of the lesson you will be introduced to some advanced functions that will help you use your own sentiment dictionary with the package. + +As this tutorial works with emotion of a Spanish text, Table 1 provides a simple translation matrix of the key emotion names for ease of reference. + + +Table 1: Emotion categories in English and Spanish + +| English | Spanish | +| -------- | ------- | +| anger | enfado | +| anticipation | anticipación | +| disgust | disgusto | +| fear | miedo | +| joy | alegría | +| sadness | tristeza | +| surprise | sorpresa | +| trust | confianza | +| negative | negativo | +| positive | positivo | + + +## A Brief Example + +Before diving into the full analysis of our text *Miau*, we offer a short example of sentiment analysis in action, using `syuzhet` together with the NRC lexicon, focusing on the outputs instead of the code. This analysis uses R and prompts you to [tokenise](https://perma.cc/243B-E9M7) the text into a list of single-word strings (unigrams) that are then analysed one at a time. Sentence-level analysis is also possible in sentiment analysis, but is not the focus of this tutorial. + +Consider the analysis of the final passage from *Miau*: + +> **Spanish Original**: Retumbó el disparo en la soledad de aquel abandonado y tenebroso lugar; Villaamil, dando terrible salto, hincó la cabeza en la movediza tierra, y rodó seco hacia el abismo, sin que el conocimiento le durase más que el tiempo necesario para poder decir: «Pues... sí...». +> +> **Rough English Translation**: The shot boomed out in the solitude of that abandoned and gloomy space; Villaamil, taking a terrible leap, bowed his head to the moving earth and rolled towards the abyss, his awareness lasting no longer than the time necessary to say: 'Well...yes...'. +> +> *Miau* by Benito Pérez Galdós. + +This passage will be transformed into a list of words: + +```R +example: + +> [1] "retumbó" "el" "disparo" "en" "la" "soledad" +> [7] "de" "aquel" "abandonado" "y" "tenebroso" "lugar" +> [13] "villaamil" "dando" "terrible" "salto" "hincó" "la" ... +``` + +Using the sentiment analysis function, you then calculate the eight emotions as classified by NRC, as well as the positive and negative scores of each word. The following is the result for the first few words in this short passage: + +```R +print(example_2, row.names = example) + +> anger anticipation disgust fear joy sadness surprise trust negative positive +> retumbó 0 0 0 0 0 0 0 0 0 0 +> el 0 0 0 0 0 0 0 0 0 0 +> disparo 3 0 0 2 0 2 1 0 3 0 +> en 0 0 0 0 0 0 0 0 0 0 +> la 0 0 0 0 0 0 0 0 0 0 +> solitude 0 0 0 2 0 2 0 0 2 0 +> de 0 0 0 0 0 0 0 0 0 0 +> aquel 0 0 0 0 0 0 0 0 0 0 +> abandonado 2 0 0 1 0 2 0 0 3 0 +> y 0 0 0 0 0 0 0 0 0 0 +> tenebroso 0 0 0 0 0 0 0 0 0 0 +> lugar 0 0 0 0 0 0 0 0 0 0 +> villaamil 0 0 0 0 0 0 0 0 0 0 +> dando 0 0 0 0 0 0 0 0 0 1 +> terrible 2 1 2 2 0 2 0 0 2 0 +> salto 0 0 0 0 0 0 0 0 0 0 +> hincó 0 0 0 0 0 0 0 0 0 0 +> la 0 0 0 0 0 0 0 0 0 0 +... +``` + +
    +Translator's Note: +R will not translate these into English for you, but to make the tutorial easier to follow for English speakers, the same output would look like the following if the passage was in English (notice that when translating word-by-word the results are slightly different than when translating whole passages, as above): +
    + +```R +print(example_2, row.names = example) + +> anger anticipation disgust fear joy sadness surprise trust negative positive +> boomed 0 0 0 0 0 0 0 0 0 0 +> the 0 0 0 0 0 0 0 0 0 0 +> shot 3 0 0 2 0 2 1 0 3 0 +> in 0 0 0 0 0 0 0 0 0 0 +> the 0 0 0 0 0 0 0 0 0 0 +> solitude 0 0 0 2 0 2 0 0 2 0 +> of 0 0 0 0 0 0 0 0 0 0 +> that 0 0 0 0 0 0 0 0 0 0 +> abandoned 2 0 0 1 0 2 0 0 3 0 +> and 0 0 0 0 0 0 0 0 0 0 +> gloomy 0 0 0 0 0 0 0 0 0 0 +> place 0 0 0 0 0 0 0 0 0 0 +> villaamil 0 0 0 0 0 0 0 0 0 0 +> taking 0 0 0 0 0 0 0 0 0 1 +> terrible 2 1 2 2 0 2 0 0 2 0 +> leap 0 0 0 0 0 0 0 0 0 0 +> bowed 0 0 0 0 0 0 0 0 0 0 +> his 0 0 0 0 0 0 0 0 0 0 +... +``` + +The results are returned in a [data frame](https://perma.cc/ER4M-WRRC). Using this scoring system, every word in our human languages has a default value of 0 indicating no connection to the corresponding emotion. Any words not in the NRC lexicon will be treated by the code as if they have values of 0 for all categories. Any word with a scores greater than 0 indicates that it is both present in the NRC lexicon, and that it has been assigned a value by the researchers responsible for that lexicon indicating the strength of its connection to one of the emotional categories. + +In this example we can see that the words 'disparo' (shot), 'soledad' (solitude), 'abandonado' (abandoned), and 'terrible' (terrible) have a negative score associated with them (second-to-last column), while 'dando' (taking) is judged as a positive word (last column). + +We are also able to see which emotions each word is connected to: 'disparo' (shot) is associated with *anger* (3), *fear* (2), *sadness* (2), and *surprise* (1). Higher numbers mean greater strength of the connection to that emotion. + +The possibilities of exploring, analysing, and visualising these results depend on your programming skills, but also your research needs. To help you reach your potential with sentiment analysis, this lesson introduces you how to analyse data and build understanding of the results through various visualisations. + +## Appropriate Research Questions + +As already stated, in this lesson, you will analyse the Spanish novel *Miau* by [Benito Pérez Galdós](https://perma.cc/9P3P-2FQP), published in 1888. Known for his Spanish realist novels, this particular Pérez Galdós story takes place in Madrid at the end of the nineteenth century and satirises the government administration of the day. In a kind of tragic comedy, we witness the final days of Ramón Villaamil after becoming unemployed, while his family is trying to stretch their meagre budget while keeping up the pretence of wealthy living. Villaamil's spiral of misfortune and his inability to find a new job ends in tragedy. + +From a research standpoint, the question is: Can we observe the emotional downward spiral of this plot through an automatic extraction of sentiment in the text? Does a human reader's interpretation of the negative experiences of Villaamil match the results of the algorithm? And if so, what words within the novel are used most to signal the emotional trajectory of the story? + + +# Obtaining Sentiment and Emotion Scores + +The process of conducting the sentiment analysis is a four stage affair. First, code must be installed and loaded into the R environment of your choice. Then, you must load and pre-process the text you want to analyse. Then you conduct your analysis. Finally, you turn your attention to interpreting the results. + +## Install and Load Relevant R Packages + +Before processing the text, you must first install and load the correct R code packages. In this case, that includes [`syuzhet`](https://cran.r-project.org/web/packages/syuzhet/vignettes/syuzhet-vignette.html). You will also be visualising the results, which will require a number of other R packages: [`RColorBrewer`](https://cran.r-project.org/web/packages/RColorBrewer/index.html), [`wordcloud`](https://perma.cc/GM67-HBH3), [`tm`](https://perma.cc/T2JG-LEBJ) and [`NLP`](https://perma.cc/NS79-H5DH). + +To install and load these packages, copy and execute the sample code below in your chosen R coding environment. The first few lines will install the packages (only needed if you haven't already got the packages installed). The second set of lines will load them so that you can use them in your programme. The installation of these packages may take a few minutes. + +```R +# Install the Packages +install.packages("syuzhet") +install.packages("RColorBrewer") +install.packages("wordcloud") +install.packages("tm") + +# Load the Packages +library(syuzhet) +library(RColorBrewer) +library(wordcloud) +library(tm) +``` + +## Load and Prepare the Text + +Next, download a machine readable copy of the novel: [*Miau*](/assets/analisis-de-sentimientos-r/galdos_miau.txt) and make sure to save it as a .txt file. When you open the file you will see that the novel is in [plain text](https://perma.cc/Z5WH-V9SW) format, which is essential for this particular analysis using R. + +With the text at hand, you first need to load it into R as one long string so that you can work with it programmatically. Make sure to replace `FILEPATH` with the location of the novel on your own computer (don't just type 'FILEPATH'). This loading process is slightly different on Mac/Linux and Windows machines: + +### On Mac and Linux + +You can [find the FILEPATH](https://perma.cc/ZXZ8-FZHG) using your preferred method. The final format on my computer is `/Users/Isasi/Desktop/miau.txt` + +On a Mac/Linux machine, use the function `get_text_as_string`, which is part of the `syuzhet` package: + +```R +text_string <- get_text_as_string("FILEPATH") +``` + +### On Windows + +You can [find the FILEPATH](https://perma.cc/N9R4-HEJY) using your preferred method. The final format on my computer is `C:\\Users\\Isasi\\Desktop\\miau.txt` + +The Windows operating system cannot directly read characters with tildes, accents, or from extended alphabet sets, all of which are commonly used in languages such as Spanish, French, and Portuguese. Therefore we must first alert the software that our novel uses the [UTF-8](https://perma.cc/5HY2-HHN2) set of characters (which includes accents and many other non-English characters). We do this using the `scan` function. + +> Note that when typing your filepath, you may need to escape the backslashes (`\`) in the filepath. To do this, just add a second backslash each time it appears in the path. (E.g. "`C:\\...`" + +```R +text_string <- scan(file = "FILEPATH", fileEncoding = "UTF-8", what = character(), sep = "\n", allowEscapes = T) +``` +--- + +Now that the data has loaded, you have to format it in the way the sentiment analysis algorithm expects to receive it. In this particular case, that is as a [list](https://perma.cc/LPV9-XGX8) containing either single words or sentences (here you will focus on individual words only). + +This means you need an intermediate step between loading the text and extracting the sentiment values. To meet this need, we will divide the character string into a list of words, sometimes also referred to as [unigrams](https://perma.cc/FX4C-ZLYB) or [tokens](https://perma.cc/V6UY-KKVK). + +To do this you can use the package's built-in `get_tokens()` function to generate a new data object containing each individual word as a list. This function also removes spaces and punctuation from the original text. This approach to tokenisation uses [regular expressions](https://perma.cc/W7YD-K3R7) and is not always appropriate in all use cases. It will, for example, split hyphenated words into two. Depending on your text, you should consider the implications of your chosen method of tokenisation as you can use any method you like as long as the output is in the same format as in the example below. + +```R +text_words <- get_tokens(text_string) +head(text_words) + +> [1] "miau" "por" "b" "pérez" "galdós" "14" +``` + +Now you can use the `length()` function to count how many words are in the original text: + +```R +length(text_words) + +> [1] 97254 +``` + +If you want to analyse the text by sentence, use the `get_sentences()` function and follow the same proccess except for creating the word cloud below: + +```R +> sentence_vector <- get_sentences(text_string) +length(sentence_vector) +[1] 6022 +``` + + +## Extracting Data with the NRC Sentiment Lexicon + +Now you can use the `get_nrc_sentiment` function to obtain the sentiment scores for each word in the novel. The default vocabulary for the software is English. Since this text is in Spanish, you will use the `lang` argument to set the vocabulary to Spanish. This would not be necessary if working on an English text. Then you will create a new data object to store the extracted data so that you can work with it further. This `get_nrc_sentiment` function searches for the presence of the eight emotions and two sentiments against each word in your list, and assigns each a number greater than 0 if the word is found within the NRC's lexicon. Depending on the speed of your computer and the nature of your text, this process may take between 15 and 30 minutes. + +```R +sentiment_scores <- get_nrc_sentiment(text_words, lang="spanish") +``` +You can also use this package with [a range of other languages](https://perma.cc/9BN2-F3N3), though the 2020 release only works on languages with Latin-based alphabets. Other lessons that can be substituted for `spanish` in the above line of code are: `basque`, `catalan`, `danish`, `dutch`, `english`, `esperanto`, `finnish`, `french`, `german`, `irish`, `italian`, `latin`, `portuguese`, `romanian`, `swedish`, and `welsh`. We can hope that the functionality will improve in future to include more languages. + +Some users reported getting a warning message when the code finished running. At the time of writing this is a warning that the `syuzhet` codebase may need to be updated in future, but should not affect your ability to use it at present. The warning was that "spread_() was deprecated in tidyr 1.2.0. Please use spread() instead. The deprecated feature was likely used in the syuzhet package. Please report the issue to the authors." In this case, only Matthew Jockers can fix the error, as it is an issue with the code he created, not with your instructions to run it. + +When the process finishes, you may want to verify the contents of the new data object. To avoid printing thousands of lines of text, you can use the `head()` function to show only the first six unigrams. If you are following the example, you should see the following (which is lacking in context at this point). + +```R +head(sentiment_scores) + +> anger anticipation disgust fear joy sadness surprise trust negative positive +> 1 0 0 0 0 0 0 0 0 0 0 +> 2 0 0 0 0 0 0 0 0 0 0 +> 3 0 0 0 0 0 0 0 0 0 0 +> 4 0 0 0 0 0 0 0 0 0 0 +> 5 0 0 0 0 0 0 0 0 0 0 +> 6 0 0 0 0 0 0 0 0 0 0 +``` + +### Summary of the Text + +More interesting is a summary of the values associated with each of the six emotions and two sentiments, which can be displayed using the `summary()` function. This can be very useful when comparing various texts, and can allow you to see different measures, such as the average relative value of each of the emotions and the two sentiments. For example, we can see that the novel *Miau* on average ([mean](https://perma.cc/5NKH-2TYV)), uses more positive (0.05153) language than negative (0.04658), according to the algorithm. However, it seems that terms associated with sadness (0.02564) are also more prevalent than those associated with joy (0.01929). + +This summary output also shows a number of other calculations, many of which have a value of 0, including the [median](https://perma.cc/KB36-B855). Words that are not found in the sentiment lexicon (NRC) will automatically be treated as if they have a value of 0. Because there are a lot of categories and the story is quite complex, it is not surprising that no one emotion or sentiment has distinctively high statistical values. This makes the minimum, maximum, and mean the most useful measures from this summary output. + +```R +summary(sentiment_scores) + +> anger anticipation disgust fear +> Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000 +> 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 +> Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000 +> Mean :0.01596 Mean :0.02114 Mean :0.01263 Mean :0.02243 +> 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 +> Max. :5.00000 Max. :3.00000 Max. :6.00000 Max. :5.00000 +> joy sadness surprise trust +> Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000 +> 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 +> Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000 +> Mean :0.01929 Mean :0.02564 Mean :0.01035 Mean :0.03004 +> 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 +> Max. :5.00000 Max. :7.00000 Max. :2.00000 Max. :3.00000 +> negative positive +> Min. :0.00000 Min. :0.00000 +> 1st Qu.:0.00000 1st Qu.:0.00000 +> Median :0.00000 Median :0.00000 +> Mean :0.04658 Mean :0.05153 +> 3rd Qu.:0.00000 3rd Qu.:0.00000 +> Max. :7.00000 Max. :5.00000 +``` + +# Interpreting the Results + +You now have the quantitative results of your sentiment analysis of a text. Now, what can you do with these numbers? This section introduces three different visualisations of the data: bar charts, word counts, and word clouds, which offer quick but different ways of making sense of the outputs and telling a story or forming an argument about what you've discovered. + +## Bar Chart by Emotion + +To quickly get a sense of which emotions have a major presence in the text, a bar chart is both a simple and effective format for displaying your data (Figure 1). The built-in [`barplot()`](https://perma.cc/5DXU-CYS9) function can be paired with the summary data of each of the emotions: *anger*, *anticipation*, *disgust*, *fear*, *joy*, *sadness*, *surprise*, and *trust*. These are stored in columns 1 to 8 of our data table. This approach of displaying the data uses the `prop.table()` function with the results of each of the emotion words to present the results.[^8] + +```R +barplot( + colSums(prop.table(sentiment_scores[, 1:8])), + space = 0.2, + horiz = FALSE, + las = 1, + cex.names = 0.7, + col = brewer.pal(n = 8, name = "Set3"), + main = "'Miau' by Benito Pérez Galdós, 1907 edition", + sub = "Analysis by Dr Jennifer Isasi", + xlab="emotions", ylab = NULL) +``` + +The rest of the parameters that you can see in the code are optional and have been added to help you learn how to customise the graph outputs. They include indicating the space between the bars (`space = 0.2`), that the chart should include vertical not horizontal bars (`horiz=FALSE`), and that the values on the axis should increase in units of 1 (`las=1`). We also reduce the font size of the labels (`cex.names = 0.7`) to make sure they fit nicely on the screen. Thanks to the [`RColorBrewer`](https://perma.cc/BHK9-AY7S) package that we installed and loaded at the beginning of the lesson, we can automatically colour the columns. In this case we've used the `brewer.pal` colour palette from `Set3`, and specified we need 8 colours (`n=8`) – one colour per columnn. You can learn more about `RColorBrewer` and its options on [the documentation page for that package](https://perma.cc/4EHL-P8E9). Finally, we add a title and subtitle to the graph using the `main` and `sub` parameters, along with the word `emotions` on the X axis. We have not added a label to the Y axis, but you could do so if you wished by following the model above. + +{% include figure.html filename="tr-en-analisis-de-sentimientos-r-1.png" alt="Bar chart showing the calculated scores of six emotions and two sub-emotions measured in the novel ‘Miau’ (1907) by Pérez Galdós. The emotions are anger, anticipation, disgust, fear, joy, sadness, surprise, and trust. The ‘trust’ bar is the tallest, followed by ‘sadness’ and ‘fear’, while ‘disgust’ and ‘surprise’ are the shortest. This is included because it shows the relative outputs of the sentiment analysis algorithm across these seven emotions." caption="Figure 1: Bar chart showing the calculated scores of six emotions and two sub-emotions measured in the novel 'Miau' by Pérez Galdós." %} + +If you are not interested in modifying these parameters, you could create a bar chart with default styling using the following code: + +```R +barplot(colSums(prop.table(sentiment_scores[, 1:8]))) +``` + +> Make sure you have enough space in the display window for the graph to draw properly, including space for the labels. + +This information already indicates to us that the *sadness* and *fear* emotions are more prevalent than those of *disgust* or *surprise*. But what words does Galdós use to express *fear*? And how often does each emotionally charged word appear in the novel? + +## Counting Words by Emotion + +One of the measures you can calculate using sentiment analysis is the frequency of words appearing in the text and how those words relate with each emotional category. To start with, you need to create a data object with all of the words that have a value greater than 0 -- in this case you will start with those corresponding to the *sadness* column. In order to select only that column, use the dollar symbol `$` after the name of your `sentiment_scores` variable to specify the name of the column you want to work with: *sadness*. + + +```R +sad_words <- text_words[sentiment_scores$sadness> 0] +``` + +The contents of `sad_words` does not tell you much on its own, since it only offers you the list of relevant words without any further context. To also obtain the number of appearances of each 'sadness' word, you can generate a table. To get a quick look of some of the top entries, use the `unlist` and `table` functions along with the `decreasing` argument to display the matches in descending order (if you want ascending order, change TRUE to FALSE); you can create a new table object to print the first twelve words in the list, along with their frequency using the following code (see Table 2 for translations of the Spanish words): + +```R +sad_word_order <- sort(table(unlist(sad_words)), decreasing = TRUE) +head(sad_word_order, n = 12) + +> muy nada pobre tarde +> 271 156 64 58 +> mal caso malo salir +> 57 50 39 35 +> madre insignificante ay culpa +> 33 29 24 22 +``` + +Table 2: English translations of the Spanish words in the preceding code output block + +| Spanish | English | +| ------- | ------- | +| muy | very | +| nada | nothing | +| pobre | poor | +| tarde | late | +| mal | bad | +| caso | case | +| malo | bad | +| salir | to leave | +| madre | mother | +| insignificante | insignificant | +| ay | ow! | +| culpa | fault | + + +If you want to know how many unique words are connected to sadness, you can use the `length` function on the newly created `sad_word_order` variable: + +```R +length(sad_word_order) + +> [1] 349 +``` + +You can repeat the same operation with the rest of the emotion categories, or those that you are interested in, as well as those with positive or negative sentiment scores. To make sure you understand how to adapt the code, try to obtain the results for the emotion 'joy' and compare them with 'sadness'.[^9] + +Depending on the type of analysis that you want to conduct, this may be an efficient approach. For the purposes of this introductory lesson, you are also going next generate a word cloud to help visualise the terms associated with each emotional category (for demonstration purposes, you will use four). + + +## An Emotional Word Cloud + +In order to create a word cloud of terms that correspond with each emotion in *Miau*, you are going to first collect all words with an emotion score greater than 0. Similarly to the previous example, you use the `$` symbol to specify which column of data (which emotion) you are interested in, indicating that you want entries with a value greater than 0. + +If working on a machine running Windows, you will have to indicate to the programme if your text contains accented characters using the following approach: + +### On Mac and Linux + +```R +cloud_emotions_data <- c( + paste(text_words[sentiment_scores$sadness> 0], collapse = " "), + paste(text_words[sentiment_scores$joy > 0], collapse = " "), + paste(text_words[sentiment_scores$anger > 0], collapse = " "), + paste(text_words[sentiment_scores$fear > 0], collapse = " ")) +``` + +### On Windows + +Windows needs an additional step to indicate the text is in UTF-8 format, which is done using the `iconv` function. + +```R +cloud_emotions_data <- c( + paste(text_words[sentiment_scores$sadness> 0], collapse = " "), + paste(text_words[sentiment_scores$joy > 0], collapse = " "), + paste(text_words[sentiment_scores$anger > 0], collapse = " "), + paste(text_words[sentiment_scores$fear > 0], collapse = " ")) + +cloud_emotions_data <- iconv(cloud_emotions_data, "latin1", "UTF-8") +``` + +Once you have collected the data for the four target emotions, you can organise it into four separate `documents` to use as the basis for creating each of your four word clouds: + +```R +cloud_corpus <- Corpus(VectorSource(cloud_emotions_data)) +``` + +Next, you transform the corpus into a term-document matrix using the `TermDocumentMatrix()` function. Then you specify that you want the data organised as a matrix using the `as.matrix()` function. + +To see the first few entries of this output, use the `head` function: + +```R +cloud_tdm <- TermDocumentMatrix(cloud_corpus) +cloud_tdm <- as.matrix(cloud_tdm) +head(cloud_tdm) + +> Docs +> Terms 1 2 3 4 +> abandonado 4 0 4 0 +> abandonar 1 0 0 0 +> abandonará 2 0 0 0 +> abandonaré 1 0 0 0 +> abandonarías 1 0 0 0 +> abandono 3 0 3 0 + +``` + +Now, rename the numbered columns with the relevant emotion words so that the output is more human-readable. Again, you can see the state of your dataset with the `head` function: + +```R +colnames(cloud_tdm) <- c('sadness', 'happiness', 'anger', 'joy') +head(cloud_tdm) + +> Docs +> Terms sadness happiness anger trust +> abandonado 4 0 4 4 +> abandonar 1 0 0 1 +> abandonará 2 0 0 2 +> abandonaré 1 0 0 1 +> abandonarías 1 0 0 1 +> abandono 3 0 3 3 +``` + +Finally, you can visualise these results as a word cloud. The font size of a word in a word cloud is linked to the frequency of its appearance in the document. We can also control a number of other aspects of the word cloud's presentation. + +To start, use the `set.seed()` function to ensure that while following along your outputs will look the same as in the example (if you don't do this your output will have a randomised pattern and may not match the screenshots herein - which may not be important for your own research results but is helpful when following along). + +To generate the cloud itself, use the [comparison.cloud](https://perma.cc/6QRY-5KBG) function from the R `wordcloud` package. In this example, you will indicate that the object `cloud_tdm` will have a non-random word order. You will also specify the colour scheme of each group of words, the title size, and general scale of the visualisation. To make the cloud readable, you will also specify a maximum number of terms. These parameters are all adjustable. + +```R +set.seed(757) # this can be set to any integer +comparison.cloud(cloud_tdm, random.order = FALSE, + colors = c("green", "red", "orange", "blue"), + title.size = 1, max.words = 50, scale = c(2.5, 1), rot.per = 0.4) +``` + +You should get an image similar to Figure 2 although with the location of the words altered since it is generated according to the size of the canvas. + +{% include figure.html filename="tr-en-analisis-de-sentimientos-r-2.png" alt="Word Cloud of most frequent words corresponding to sadness, happiness, anger, and joy in the novel ‘Miau’ by Pérez Galdós. The words are colour-coded to show that they correspond with one of the four emotions, and use a cartesian coordinate system so that all words most closely associated with happiness are in the top left quadrant, sadness in the top right, and so on. Words that are most prevalent in the text appear closest to the centre of the graph. The word ‘muy’ (Spanish for ‘very’) is the largest word, and is associated with sadness. This is included because it shows which words are prevalent, and which emotions they are most closely associated with according to the sentiment analysis algorithm." caption="Figure 2: Word Cloud of most frequent words corresponding to sadness, happiness, anger, and joy in the novel 'Miau' by Pérez Galdós." %} + +What does the word cloud suggest to you? Surely the connection of 'very' (muy) to the sadness emotion and of 'money' (dinero) to the anger emotion needs further consideration. These less obvious results are exactly what many scholars warn about when thinking about sentiment analysis, and demonstrate why a researcher must always ask if the outcomes of the analysis make sense before trying to draw any research conclusions from them. As noted, the sentiment analysis vocabulary used in this tutorial uses a vocabulary that's been automatically translated from English, and is thus not perfect when used on Spanish-language text. + +## Visualising Emotion and Sentiment Across the Progression of a Text + +To complement the isolated readings of emotions as above, you can also study the fluctuation of positive and negative sentiment across the text (Figure 3). R provides a way to both normalise and visualise this time-series sentiment analysis data. Since the sentiment analysis algorithm assigns both positive and negative sentiment scores, you need to generate data between a range of -1 (most negative moments) and 1 (most positive moments); 0 is considered neutral. To calculate these scores, you multiply the values in the negative values of the original `sentiment_scores` data table by -1 and then add the result to the positive values. + + +```R +sentiment_valence <- (sentiment_scores$negative *-1) + sentiment_scores$positive +``` + +Finally, you can generate a graph with the `simple_plot()` function, which is built into the `syuzhet` package, and which offers you a choice of two different graphs; the first presents the various measurements calculated by the algorithm, and the second is a normalisation of those measures. The horizontal axis (X axis) presents the text in 100 normalised fragments and the vertical axis (Y axis) shows the strength of the sentiment in the text. Depending on the computing power of your machine, the graph may take 20 to 30 minutes to finish rendering. + +```R +simple_plot(sentiment_valence) +``` + +> Make sure your graph display window is sized large enough to actually draw the graph. If it isn't you will see the error message: `Error in plot.new() : figure margins too large.` + +{% include figure.html filename="tr-en-analisis-de-sentimientos-r-3.png" alt="A pair of line charts that show the rough emotional intensity of positive and negative sentiment across the whole novel. The graphs use a line graph with a solid curving line moving left-to-right to represent the beginning, middle, and end. In this particular novel, a simplified chart shows that the sentiment rises through the first quarter of the story, before diving in the middle and staying low until the end, representing quite a depressing story. A less simplified version shows that the sentiment picks up a few times later in the novel, but dips well into negative sentiment a number of times. This is included because it shows the emotional intensity of the novel over time." caption="Figure 3: Evolution of the use of positive and negative sentiment through the novel 'Miau' by Pérez Galdós" %} + +Based on Figure 3, you might conclude that the novel *Miau* begins with fairly neutral language, transitions into moments of happiness early on, and moves into some quite negative description in the remaining pages, ending on a negative note, as indicated by the sample sentence we drew upon earlier in the lesson in which Villaamil dies. Anyone who has read the novel will know well the protagonist's despair, so in this case the analysis matches a traditional reading of the text, which answers our research question about whether or not the automated sentiment analysis reflects a close reading of the text. + + +# Save Your Data + +If you want to save the data so that you can come back to it later, you can archive it in comma separates values ([CSV](https://perma.cc/64FY-NTSU)) format, using the function `write.csv()`. This will save your main data table, `sentiment_scores`, which contains the results of the eight emotions and two sentiments we generated, and puts that into a CSV file. You can also add the keyword associated with each row in the left-most column to act as helpful labels. + + +```R +write.csv(sentiment_scores, file = "analysis_sent_miau.csv", row.names = text_words) +``` + +Now you have all of the tools and knowledge you need to start to analyse your own texts and compare them with each other. + +# Loading your own Sentiment Lexicon + +While the above introduction provides you with many tools for exploring sentiment analysis, this tutorial has not presented an exhaustive list of possibilities. + +You may be working on a project in which you have already created a sentiment dictionary that you would like to use. Or perhaps you need to be able to customise a vocabulary and its corresponding sentiment scores to apply to a particular cultural or temporal context related to your research. Maybe you're looking to improve upon the automatically translated results of the NRC lexicon used here. In each of those cases, as of mid 2022, you can also load your own lexicon dataset into the software using the `custom` function to repeat some of the calculations and visualisations used in this lesson. + +To load your own sentiment lexicon, you first have to create or modify a dataframe containing at minimum a column of words and a column containing the corresponding scores for those words, which the author recommends saving in a CSV file format. + +Try this example: + +```R +|word|value| +|---|---| +|amor|1| +|cólera|-1| +|alfombra|0| +|catástrofe|-2| +``` + +Next, to load your saved data from a CSV file, use the `read.csv` function, which will create a new dataset that you can access in R just as you have in the above examples (change 'FILEPATH' to the full location of your CSV file): + +```R +personalised_vocabulary <- read.csv("FILEPATH") +method <- "custom" +sentiments_sentences <- get_sentiment(sentences_vector, method = method, lexicon = personalised_vocabulary) +``` + +
    +Warning: If you get an error message 'incomplete final line found by readTableHeader', this indicates that your CSV file has not formatted properly and lacks an 'end of line' character at the end of the file. The easiest way to correct this is to open your CSV file in a text editor (not MS Word), scroll to the end of the file, press return, and re-save the file. A fuller explanation of this error is available on Stack Overflow. +
    + +If you want to visualise sentiment across the progression of a text, you can use the `plot` function, which uses the same graphing parameters that you've already learned: + +```R +plot(sentiments_sentences, + type = "l", + main = "'Miau' by Benito Pérez Galdós, 1907 edition", + sub = "Analysis by Dr Jennifer Isasi", + xlab="emotions", ylab = " " + ) +``` + +Keep in mind that this form of customised analysis is limited, and that you may not be able to perform all of the same operations that we introduced above. For example, following the model example with your own dictionary, as you would not have information about emotions you would not be able to make a word cloud in the same way. + + +# Works Cited + +* Arnold, Taylor, and Lauren Tilton. 'Basic Text Processing in R', *Programming Historian* 6 (2017), https://doi.org/10.46430/phen0061 +* Damasio, Antonio R. *El error de Descartes: La razón de las emociones* (Andres Bello, 1999). +* Dewar, Taryn. 'R Basics with Tabular Data', *Programming Historian* 5 (2016), https://doi.org/10.46430/phen0056. +* Gottschalk, Louis, and Goldine Gleser. *The Measurement of Psychological States through the Content Analysis of Verbal Behaviour* (University of California, 1969). +* Heuser, Ryan, Franco Moretti, Erik Steiner. 'The Emotions of London' *Stanford Literary Lab*, Pamphlet 13 (2016) 1-10. +* Hu, Minqing, and Bing Liu, 'Mining and Summarizing Customer Reviews.', *Proceedings of the ACM SIGKDD International Conference on Knowledge Discovery & Data Mining* (KDD-2004), 2004. +* Jockers, Matthew. 'Introduction to the Syuzhet Package' *CRAN* (2020), [https://cran.r-project.org/web/packages/syuzhet/vignettes/syuzhet-vignette.html](https://cran.r-project.org/web/packages/syuzhet/vignettes/syuzhet-vignette.html +). +* Jockers, Matthew. 'Some thoughts on Annie's thoughts...about Syuzhet' *Matthew L. Jockers* (2015), [https://www.matthewjockers.net/page/2/](https://web.archive.org/web/20190708100723/https://www.matthewjockers.net/page/2/). +* Leemans, Inger, Janneke M. van der Zwaan, Isa Maks, Erika Kujpers, Kristine Steenberge. 'Mining Embodied Emotions: A Comparative Analysis of Sentiment and Emotion in Dutch Texts, 1600-1800' *Digital Humanities Quarterly* 11 (2017). +* Liu, Bing. *Sentiment Analysis and Opinion Mining* (Morgan & Claypool, 2012). +* Meder, Theo, Dong Nguyen, Rilana Gravel. ‘The Apocalypse on Twitter’ *Digital Scholarship in the Humanities* 31 (2016), 398-410. +* Mohammad, Saif. 'NRC Word-Emotion Association Lexicon', *National Research Council Canada* (2010), [https://saifmohammad.com/WebPages/NRC-Emotion-Lexicon.htm](https://perma.cc/A8M5-2SDG). +* Mohammad, Saif, and Peter D. Turney. 'Crowdsourcing a Word–Emotion Association Lexicon' *Computational Intelligence* 29 (2013): 436-465, doi: 10.1111/j.1467-8640.2012.00460.x. +* Nguyen, Thein Hai, Kiyoaki Shirai, Julien Velcin. 'Sentiment Analysis on Social Media for Stock Movement Prediction' *Expert Systems with Applications* 42 (2015), 9603-9611. +* Nielsen, Finn Årup. 'AFINN Sentiment Lexicon' (2009-2011). +* Pereira Zazo, Óscar. *El analisis de la comunicación en español* (Kendal Hunt, 2015). +* Pérez Galdós, Benito. *Miau* (La Guirnalda, 1888). +* Pérez Galdós, Benito. *Miau* (Sucesores de Hernando, 1907). +* Rodríguez Aldape, Fernando Manuel. *Cuantificación del Interés de un usuario en un tema mediante minería de texto y análisis de sentimiento.* (MA Thesis, Universidad Autónoma de Nuevo León, 2013). +* Schmidt, Thomas, Manuel Burghardt, Christian Wolff. 'Towards Multimodal Sentiment Analysis of Historic Plays: A Case Study with Text and Audio for Lessing's Emilia Galotti' *4th Conference of the Association of Digital Humanities in the Nordic Countries* (2019). +* Siddiqui, Nabeel. 'Data Wrangling and Management in R', *Programming Historian* 6 (2017), https://doi.org/10.46430/phen0063. +* Sprugnoli, Rachele, Sara Tonelli, Alessandro Marchetti, Giovanni Moretti. 'Towards Sentiment Analysis for Historical Texts' *Digital Scholarship in the Humanities* 31 (2016): 762-772. +* Stone, Philip, Dexter Dunphy, Marshall Smith. ‘The General Inquirer: A Computer Approach to Content Analysis’ (M.I.T. Press, 1966). +* Swafford, Annie. 'Problems with the Syuzhet Package' *Anglophile in Academia* (2015), [https://annieswafford.wordpress.com/2015/03/02/syuzhet/](https://perma.cc/TYT3-5DTU). +* Wilkinson Saldaña, Zoë. 'Sentiment Analysis for Exploratory Data Analysis,' *Programming Historian* 7 (2018), https://doi.org/10.46430/phen0079 + + + +# Notes + +[^1]: For example, see: Louis Gottschalk, Goldine Gleser (1969) *The Measurement of Psychological States through the Content Analysis of Verbal Behaviour* (University of California); Philip Stone, Dexter Dunphy, Marshall Smith (1966) ‘The General Inquirer: A Computer Approach to Content Analysis’ (M.I.T. Press); Bing Liu, (2012) *Sentiment Analysis and Opinion Mining* (Morgan & Claypool); Thein Hai Nguyen, Kiyoaki Shirai, Julien Velcin (2015). ‘Sentiment Analysis on Social Media for Stock Movement Prediction’ *Expert Systems with Applications* 42: 9603-9611; Theo Meder, Dong Nguyen, Rilana Gravel (2016). ‘The Apocalypse on Twitter’ *Digital Scholarship in the Humanities* 31 (2): 398-410. +[^2]: For some examples in English, see: Inger Leemans, Janneke M. van der Zwaan, Isa Maks, Erika Kujpers, Kristine Steenberge (2017). 'Mining Embodied Emotions: A Comparative Analysis of Sentiment and Emotion in Dutch Texts, 1600-1800' *Digital Humanities Quarterly* 11 (4); Rachele Sprugnoli, Sara Tonelli, Alessandro Marchetti, Giovanni Moretti (2016). 'Towards Sentiment Analysis for Historical Texts' *Digital Scholarship in the Humanities* 31 (4): 762-772; Thomas Schmidt, Manuel Burghardt, Christian Wolff (2019). 'Towards Multimodal Sentiment Analysis of Historic Plays: A Case Study with Text and Audio for Lessing's Emilia Galotti' *4th Conference of the Association of Digital Humanities in the Nordic Countries*; Ryan Heuser, Franco Moretti, Erik Steiner (2016). 'The Emotions of London' *Stanford Literary Lab*, Pamphlet 13: 1-10. +[^3]: Antonio R. Damasio, *El Error de Descartes: La razón de las emociones*. (Barcelona: Andres Bello, 1999). +[^4]: Óscar Pereira Zazo, *El analisis de la comunicación en español* (Iowa: Kendal Hunt, 2015), 32. +[^5]: 'Bing': Minqing Hu and Bing Liu, 'Mining and summarizing customer reviews.', *Proceedings of the ACM SIGKDD International Conference on Knowledge Discovery & Data Mining* (KDD-2004), 2004; 'Afinn': Finn Årup Nielsen, 'AFINN Sentiment Lexicon' (2009-2011); 'NRC': Saif Mohammad, '[NRC Word-Emotion Association Lexicon](https://perma.cc/A8M5-2SDG)', *National Research Council Canada* (2010). +[^6]: Saif Mohammad and Peter D. Turney, 'Crowdsourcing a Word–Emotion Association Lexicon', *Computational intelligence* 29 (2013): 436-465, doi: 10.1111/j.1467-8640.2012.00460.x +[^7]: Richard Socher, 'Recursive Deep Learning for Natural Language Processing and Computer Vision' PhD diss., (Stanford University, 2014). +[^8]: Thanks to Mounika Puligurthi, intern at the University of Texas (UT) Digital Scholarship Office (during the spring of 2019), for her help interpreting this calculation. +[^9]: There are more words assigned to the emotion *sadness* than to *joy*, both in total number of words (2,061 vs 1,552) and in unique words (349 vs 263). The word 'Mother' appears under both sadness and joy with a value of 33 points. What do you think the significance of that classification decision is? diff --git a/en/lessons/sentiment-analysis.md b/en/lessons/sentiment-analysis.md index eded4c83fd..78b3ef1ee9 100755 --- a/en/lessons/sentiment-analysis.md +++ b/en/lessons/sentiment-analysis.md @@ -29,7 +29,7 @@ doi: 10.46430/phen0079 This lesson uses [sentiment analysis](https://en.wikipedia.org/wiki/Sentiment_analysis) as the basis for an [exploratory data analysis](https://en.wikipedia.org/wiki/Exploratory_data_analysis) of a large textual corpus. It is appropriate for readers with some basic prior experience programming with [Python](https://www.python.org/). If you have no experience with Python or computer programming, the author recommends working through the first few lessons in the [Introduction to Python series](/lessons/introduction-and-installation). By the end of this lesson, you will be able to: * Devise appropriate research questions that use [Natural Language Processing](https://en.wikipedia.org/wiki/Natural_language_processing) (NLP) on a textual corpus. -* Use Python and the [Natural Language Processing Toolkit](http://www.nltk.org/) (NLTK) to generate sentiment scores for a text. +* Use Python and the [Natural Language Processing Toolkit](https://www.nltk.org/) (NLTK) to generate sentiment scores for a text. * Critically evaluate the sentiment analysis scores and adjust [parameters](https://en.wikipedia.org/wiki/Parameter) and methodology as appropriate. * Identify next steps to continue learning about exploratory data analysis and programmatic approaches to qualitative data. @@ -67,7 +67,7 @@ For researchers, the Enron Scandal resulted in the creation of one of the larges When the organized and redacted [Enron E-mail Dataset](https://www.cs.cmu.edu/~./enron/) was released in 2004, researchers discovered an unprecedented opportunity: direct access to the spontaneous, largely uncensored way employees in a doomed corporation communicated with one another. Suddenly, researchers had access to how people communicated at work at an unprecedented scale. This mattered for researchers interested in the special case of the Enron scandal and collapse, but also for researchers interested in a wide spectrum of questions about everyday communication at work. -In the following decade, hundreds of new studies sprouted up from the e-mails pursuing questions as diverse as [social network theory](https://en.wikipedia.org/wiki/Social_network), community and [anomaly detection](https://en.wikipedia.org/wiki/Anomaly_detection), gender and communication within organizations, behavioral change during an organizational crisis, and insularity and community formation. The use of social network theory in the humanities proposes some [fascinating possibilities](http://journals.sagepub.com/doi/abs/10.1177/1749975514542486), but is not without [significant debate](http://www.emeraldinsight.com/doi/abs/10.1108/S0733-558X%282014%290000040001). +In the following decade, hundreds of new studies sprouted up from the e-mails pursuing questions as diverse as [social network theory](https://en.wikipedia.org/wiki/Social_network), community and [anomaly detection](https://en.wikipedia.org/wiki/Anomaly_detection), gender and communication within organizations, behavioral change during an organizational crisis, and insularity and community formation. The use of social network theory in the humanities proposes some [fascinating possibilities](https://journals.sagepub.com/doi/abs/10.1177/1749975514542486), but is not without [significant debate](https://www.emeraldinsight.com/doi/abs/10.1108/S0733-558X%282014%290000040001). In addition to the sheer quantity of messages included (the corpus contains over 600,000 messages), the Enron E-mail Corpus also includes the metadata necessary for researchers to pursue a number of research questions. Just as the presence of envelopes with legible sender and recipient addresses would be a wonderful asset for researchers of historic letter correspondences, the presence of sender and recipient e-mail addresses allows researchers to associate e-mails with particular known individuals within the corporation. As some individuals had multiple e-mail addresses, or more than one individual may have shared the same address, the metadata is not fool proof, but it is incredibly insightful. The rest of the tutorial will go through how to apply and interpret sentiment analysis of e-mails in this corpus. @@ -84,7 +84,7 @@ In this tutorial, you will be using [Python](https://www.python.org/) along with To complete the example below, you will need to install the following: * Python 3 (ideally 3.5 or higher) - [Download & install instructions from the Python wiki](https://wiki.python.org/moin/BeginnersGuide/Download) -* NLTK (3.2.5 or higher) - [Download & install instructions from NLTK.org](http://www.nltk.org/install.html) +* NLTK (3.2.5 or higher) - [Download & install instructions from NLTK.org](https://www.nltk.org/install.html) ## Getting Started with NLTK @@ -94,7 +94,7 @@ If you need any help downloading and installing the module for [Python 3](https: In our case, we will be using two NLTK tools in particular: -* The '[VADER Sentiment Analysis](http://www.nltk.org/_modules/nltk/sentiment/vader.html)' tool (generates positive, negative, and neutral sentiment scores for a given input) +* The '[VADER Sentiment Analysis](https://www.nltk.org/_modules/nltk/sentiment/vader.html)' tool (generates positive, negative, and neutral sentiment scores for a given input) * The 'word_tokenize' tokenizer tool (splits a large text into a sequence of smaller units, like sentences or words) To use VADER and word_tokenize, we first need to download and install a little extra data for NLTK. NLTK is a very large toolkit, and several of its tools actually require a second download step to gather the necessary collection of data (often coded lexicons) to function correctly. @@ -115,7 +115,7 @@ You can save this file as "`installation.py`". If you are unsure how to save and If you do know how to run Python scripts, run the file using Python 3. -[*VADER*](http://www.nltk.org/_modules/nltk/sentiment/vader.html "Vader page in the NLTK Documentation") (Valence Aware Dictionary and sEntiment Reasoner) is a sentiment intensity tool added to NLTK in 2014. Unlike other techniques that require training on related text before use, *VADER* is ready to go for analysis without any special setup. *VADER* is unique in that it makes fine-tuned distinctions between varying degrees of positivity and negativity. For example, *VADER* scores "comfort" moderately positively and "euphoria" extremely positively. It also attempts to capture and score textual features common in informal online text such as capitalizations, exclamation points, and emoticons, as shown in the table below: +[*VADER*](https://www.nltk.org/_modules/nltk/sentiment/vader.html "Vader page in the NLTK Documentation") (Valence Aware Dictionary and sEntiment Reasoner) is a sentiment intensity tool added to NLTK in 2014. Unlike other techniques that require training on related text before use, *VADER* is ready to go for analysis without any special setup. *VADER* is unique in that it makes fine-tuned distinctions between varying degrees of positivity and negativity. For example, *VADER* scores "comfort" moderately positively and "euphoria" extremely positively. It also attempts to capture and score textual features common in informal online text such as capitalizations, exclamation points, and emoticons, as shown in the table below: {% include figure.html filename="sentiment-analysis1.png" caption="Vader captures slight gradations in enthusiasm. (Hutto and Gilbert, 2014)" %} @@ -420,4 +420,4 @@ Klimt, B., & Yang, Y. (2004). The Enron corpus: A new dataset for email classifi Tukey, J.W. (1977). *Exploratory Data Analysis*. Addison-Wesley Publishing Company -Quinn, J. (2006, November 14). Ex-Enron man goes back into energy. Retrieved January 10, 2018, from http://www.telegraph.co.uk/finance/2950645/Ex-Enron-man-goes-back-into-energy.html +Quinn, J. (2006, November 14). Ex-Enron man goes back into energy. Retrieved January 10, 2018, from https://www.telegraph.co.uk/finance/2950645/Ex-Enron-man-goes-back-into-energy.html diff --git a/en/lessons/simulating-historical-communication-networks-python.md b/en/lessons/simulating-historical-communication-networks-python.md index 4ceecfb53b..232a590953 100644 --- a/en/lessons/simulating-historical-communication-networks-python.md +++ b/en/lessons/simulating-historical-communication-networks-python.md @@ -925,13 +925,13 @@ Do not hesitate to get in touch with us if you want to be part of this discussio [^1]: Hotson, Howard, and Thomas Wallnig, [Eds.] (2019), Reassembling the Republic of Letters in the Digital Age: Standards, Systems, Scholarship. Göttingen, Germany: Göttingen University Press. [https://doi.org/10.17875/gup2019-1146](https://doi.org/10.17875/gup2019-1146). -[^2]: Ureña-Carrion, Javier, Petri Leskinen, Jouni Tuominen, Charles van den Heuvel, Eero Hyvönen, and Mikko Kivelä (2021), Communication Now and Then: Analyzing the Republic of Letters as a Communication Network. [http://arxiv.org/abs/2112.04336](http://arxiv.org/abs/2112.04336). +[^2]: Ureña-Carrion, Javier, Petri Leskinen, Jouni Tuominen, Charles van den Heuvel, Eero Hyvönen, and Mikko Kivelä (2021), Communication Now and Then: Analyzing the Republic of Letters as a Communication Network. [https://arxiv.org/abs/2112.04336](https://arxiv.org/abs/2112.04336). [^3]: Miert, Dirk van (2014), “What was the Republic of Letters? A brief introduction to a long history.” Groniek, no. 204/5 (2014). [https://ugp.rug.nl/groniek/article/view/27601](https://perma.cc/36K9-7LUU). [^4]: Schmitz, Jascha Merijn: Simulation. In: AG Digital Humanities Theorie des Verbandes Digital Humanities im deutschsprachigen Raum e. V. (Hg.): Begriffe der Digital Humanities. Ein diskursives Glossar (= Zeitschrift für digitale Geisteswissenschaften / Working Papers, 2). Wolfenbüttel 2023. 25.05.2023. Version 2.0 vom 16.05.2024. HTML / XML / PDF. [https://doi.org/10.17175/wp_2023_011_v2](https://doi.org/10.17175/wp_2023_011_v2). -[^5]: Gavin, Michael. Agent-Based Modeling and Historical Simulation. Digital Humanities Quarterly, 008(4):195, December 2014. [http://www.digitalhumanities.org/dhq/vol/8/4/000195/000195.html](https://perma.cc/S3WG-SMXR). +[^5]: Gavin, Michael. Agent-Based Modeling and Historical Simulation. Digital Humanities Quarterly, 008(4):195, December 2014. [https://www.digitalhumanities.org/dhq/vol/8/4/000195/000195.html](https://perma.cc/S3WG-SMXR). [^6]: Romein, C. A., Max Kemman, Julie M. Birkholz, J. Baker, M. D. Gruijter, Albert Meroño-Peñuela, T. Ries, Ruben Ros, S. Scagliola (2020). State of the Field: Digital History. In: Journal of the Historical Association 105 (365), pp. 291-312. diff --git a/en/lessons/sonification.md b/en/lessons/sonification.md index 1ae3b1a554..53c213a191 100755 --- a/en/lessons/sonification.md +++ b/en/lessons/sonification.md @@ -31,11 +31,11 @@ doi: 10.46430/phen0057 I am too tired of seeing the past. There are any number of guides that will help you _visualize_ that past which cannot be seen, but often we forget what a creative act visualization is. We are perhaps too tied to our screens, too much invested in ‘seeing’. Let me hear something of the past instead. -While there is a deep history and literature on archaeoacoustics and soundscapes that try to capture the sound of a place _as it was_ ([see for instance the Virtual St. Paul's](https://www.digitalstudies.org/articles/10.16995/dscn.58) or the work of [Jeff Veitch on ancient Ostia](https://jeffdveitch.wordpress.com/)), I am interested instead to ’sonify' what I have _right now_, the data themselves. I want to figure out a grammar for representing data in sound that is appropriate for history. [Drucker](#Drucker) [famously reminds us](http://web.archive.org/web/20190203083307/http://www.digitalhumanities.org/dhq/vol/5/1/000091/000091.html) that ‘data’ are not really things given, but rather things captured, things transformed: that is to say, ‘capta’. In sonifying data, I literally perform the past in the present, and so the assumptions, the transformations, I make are foregrounded. The resulting aural experience is a literal ‘deformance’ (portmanteau of ‘deform’ and ‘perform’) that makes us hear modern layers of the past in a new way. +While there is a deep history and literature on archaeoacoustics and soundscapes that try to capture the sound of a place _as it was_ ([see for instance the Virtual St. Paul's](https://www.digitalstudies.org/articles/10.16995/dscn.58) or the work of [Jeff Veitch on ancient Ostia](https://jeffdveitch.wordpress.com/)), I am interested instead to ’sonify' what I have _right now_, the data themselves. I want to figure out a grammar for representing data in sound that is appropriate for history. [Drucker](#Drucker) [famously reminds us](https://web.archive.org/web/20190203083307/https://www.digitalhumanities.org/dhq/vol/5/1/000091/000091.html) that ‘data’ are not really things given, but rather things captured, things transformed: that is to say, ‘capta’. In sonifying data, I literally perform the past in the present, and so the assumptions, the transformations, I make are foregrounded. The resulting aural experience is a literal ‘deformance’ (portmanteau of ‘deform’ and ‘perform’) that makes us hear modern layers of the past in a new way. I want to hear the meaning of the past, but I know that I can’t. Nevertheless, when I hear an instrument, I can imagine the physicality of the player playing it; in its echoes and resonances I can discern the physical space. I can feel the bass; I can move to the rhythm. The music engages my whole body, my whole imagination. Its associations with sounds, music, and tones I’ve heard before create a deep temporal experience, a system of embodied relationships between myself and the past. Visual? We have had visual representations of the past for so long, we have almost forgotten the artistic and performative aspect of those grammars of expression. -In this tutorial, you will learn to make some noise from your data about the past. The _meaning_ of that noise, well... that's up to you. Part of the point of this tutorial is to make your data unfamiliar again. By translating it, transcoding it, [_remediating_](http://blog.taracopplestone.co.uk/making-things-photobashing-as-archaeological-remediation/) it, we begin to see elements of the data that our familiarity with visual modes of expression have blinded us to. This deformation, this deformance, is in keeping with arguments made by for instance Mark Sample on [breaking things](http://www.samplereality.com/2012/05/02/notes-towards-a-deformed-humanities/), or Bethany Nowviskie on the '[resistance in the materials](http://nowviskie.org/2013/resistance-in-the-materials/)'. Sonification moves us along the continuum from data to capta, social science to art, [glitch to aesthetic](http://nooart.org/post/73353953758/temkin-glitchhumancomputerinteraction). So let's see what this all sounds like. +In this tutorial, you will learn to make some noise from your data about the past. The _meaning_ of that noise, well... that's up to you. Part of the point of this tutorial is to make your data unfamiliar again. By translating it, transcoding it, [_remediating_](https://blog.taracopplestone.co.uk/making-things-photobashing-as-archaeological-remediation/) it, we begin to see elements of the data that our familiarity with visual modes of expression have blinded us to. This deformation, this deformance, is in keeping with arguments made by for instance Mark Sample on [breaking things](https://www.samplereality.com/2012/05/02/notes-towards-a-deformed-humanities/), or Bethany Nowviskie on the '[resistance in the materials](https://nowviskie.org/2013/resistance-in-the-materials/)'. Sonification moves us along the continuum from data to capta, social science to art, [glitch to aesthetic](https://nooart.org/post/73353953758/temkin-glitchhumancomputerinteraction). So let's see what this all sounds like. ## Objectives @@ -46,9 +46,9 @@ In the first, we will use a freely available and free-to-use system developed by You will see that 'sonification' moves us along the spectrum from mere 'visualization/auralization' to actual performance. ### Tools -+ Musicalgorithms [http://musicalgorithms.org/](http://musicalgorithms.org/) ++ Musicalgorithms [https://musicalgorithms.org/](https://musicalgorithms.org/) + MIDITime [https://github.com/cirlabs/miditime](https://github.com/cirlabs/miditime) (I have forked a copy [here](https://github.com/shawngraham/miditime)) -+ Sonic Pi [http://sonic-pi.net/](http://sonic-pi.net/) ++ Sonic Pi [https://sonic-pi.net/](https://sonic-pi.net/) ### Example Data @@ -58,7 +58,7 @@ You will see that 'sonification' moves us along the spectrum from mere 'visualiz # Some Background on Sonification -Sonification is the practice of mapping aspects of the data to produce sound signals. In general, a technique can be called ‘sonification’ if it meets certain conditions. These include reproducibility (the same data can be transformed the same ways by other researchers and produce the same results) and what might be called intelligibility - that the ‘objective’ elements of the original data are reflected systematically in the resulting sound (see [Hermann](#Hermann) [2008](http://www.icad.org/Proceedings/2008/Hermann2008.pdf) for a taxonomy of sonification). [Last and Usyskin](#Last) [(2015)](https://www.researchgate.net/publication/282504359_Listen_to_the_Sound_of_Data) designed a series of experiments to determine what kinds of data-analytic tasks could be performed when the data were sonified. Their experimental results (Last and Usyskin 2015) have shown that even untrained listeners (listeners with no formal training in music) can make useful distinctions in the data. They found listeners could discriminate in the sonified data common data exploration tasks such as classification and clustering. (Their sonified outputs mapped the underlying data to the Western musical scale.) +Sonification is the practice of mapping aspects of the data to produce sound signals. In general, a technique can be called ‘sonification’ if it meets certain conditions. These include reproducibility (the same data can be transformed the same ways by other researchers and produce the same results) and what might be called intelligibility - that the ‘objective’ elements of the original data are reflected systematically in the resulting sound (see [Hermann](#Hermann) [2008](https://www.icad.org/Proceedings/2008/Hermann2008.pdf) for a taxonomy of sonification). [Last and Usyskin](#Last) [(2015)](https://www.researchgate.net/publication/282504359_Listen_to_the_Sound_of_Data) designed a series of experiments to determine what kinds of data-analytic tasks could be performed when the data were sonified. Their experimental results (Last and Usyskin 2015) have shown that even untrained listeners (listeners with no formal training in music) can make useful distinctions in the data. They found listeners could discriminate in the sonified data common data exploration tasks such as classification and clustering. (Their sonified outputs mapped the underlying data to the Western musical scale.) Last and Usyskin focused on time-series data. They argue that time-series data are particularly well suited to sonification because there are natural parallels with musical sound. Music is sequential, it has duration, and it evolves over time; so too with time-series data [(Last and Usyskin 2015: 424)](https://www.researchgate.net/publication/282504359_Listen_to_the_Sound_of_Data). It becomes a problem of matching the data to the appropriate sonic outputs. In many applications of sonification, a technique called ‘parameter mapping’ is used to marry aspects of the data along various auditory dimensions such as [pitch](#pitch), variation, brilliance, and onset. The problem with this approach is that where there is no temporal relationship (or rather, no non-linear relationship) between the original data points, the resulting sound can be ‘confusing’ (2015: 422). @@ -69,7 +69,7 @@ It is noisy; yet we perceive meaning. Consider the video below: -What's going on here? If that song was already known to you, you probably heard the actual 'words'. Yet, no words are present in the song! If the song was not already familiar to you, it sounded like garbled nonsense (see more examples on [Andy Baio's](#Baio) [website](http://waxy.org/2015/12/if_drake_was_born_a_piano/)). This effect is sometimes called an 'auditory hallucination'(cf. [Koebler, 2015](#Koebler)). This example shows how in any representation of data we can hear/see what is not, strictly speaking, there. We fill the holes with our own expectations. +What's going on here? If that song was already known to you, you probably heard the actual 'words'. Yet, no words are present in the song! If the song was not already familiar to you, it sounded like garbled nonsense (see more examples on [Andy Baio's](#Baio) [website](https://waxy.org/2015/12/if_drake_was_born_a_piano/)). This effect is sometimes called an 'auditory hallucination'(cf. [Koebler, 2015](#Koebler)). This example shows how in any representation of data we can hear/see what is not, strictly speaking, there. We fill the holes with our own expectations. Consider the implications for history. If we sonify our data, and begin to hear patterns in the sound, or odd outliers, our cultural expectations about how music works (our memories of similar snippets of music, heard in particular contexts) are going to colour our interpretation. This I would argue is true about all representations of the past, but sonifying is just odd enough to our regular methods that this self-awareness will help us identify or communicate the critical patterns in the (data of the) past. @@ -81,7 +81,7 @@ In each section, I will give a conceptual introduction, followed by a walkthroug There are a wide variety of tools out there to sonify data. Some for instance are packages for the widely-used [R statistical environment](https://cran.r-project.org/), such as ‘[playitbyR](https://cran.r-project.org/web/packages/playitbyr/index.html)’ and ‘[AudiolyzR](https://cran.r-project.org/web/packages/audiolyzR/index.html)’. The first of these however has not been maintained or updated to work with the current version of R (its last update was a number of years ago), and the second requires considerable configuration of extra software to make it work properly. -By contrast, the [Musicalgorithms](http://musicalgorithms.org/) site is quite easy to use. The Musicalgorithms site has been online for over a decade. Though it is not open source, it represents a long-term research project in computational music by its creator, Jonathan Middleton. It is currently in its third major iteration (earlier iterations remain usable online). We will begin with Musicalalgorithms because it allows us to quickly enter and tweak our data to produce a MIDI file representation. Make sure to select '[Version 3](http://musicalgorithms.org/3.0/index.html).' +By contrast, the [Musicalgorithms](https://musicalgorithms.org/) site is quite easy to use. The Musicalgorithms site has been online for over a decade. Though it is not open source, it represents a long-term research project in computational music by its creator, Jonathan Middleton. It is currently in its third major iteration (earlier iterations remain usable online). We will begin with Musicalalgorithms because it allows us to quickly enter and tweak our data to produce a MIDI file representation. Make sure to select '[Version 3](https://musicalgorithms.org/3.0/index.html).' {% include figure.html filename="sonification-musicalgorithms-main-site-1.png" caption="The Musicalgorithms Website as it appeared on February 2nd, 2016" %} @@ -109,7 +109,7 @@ The key field for us is ‘areaPitch1,’ which contains the space-delimited inp {% include figure.html filename="sonification-musicalgorithms-pitch-mapping-2.png" caption="After you load your data, you can select the different operations across the top menu bar of the site. In the screenshot, the information mouseover is explaining what happens to the scaling of your data if you select the division operation to scale your data to the range of notes selected." %} -Now, as you page across the various tabs in the interface (‘[duration](#duration) input’, ‘[pitch mapping](#pitch mapping)’, ‘duration mapping’, ‘scale options’) you can effect various transformations. In ‘pitch mapping’, there are a number of mathematical options for mapping the data against the full 88 keys/pitches of a piano keyboard (in a linear mapping, the _mean_ of one’s data would be mapped to middle C, or 40). One can also choose the kind of scale, whether it is a minor or major and so on. At this point, once you've selected your various transformations, you should save the text file. On the file tab, ‘play’, one can download a midi file. Your default audio program can play midi files (often defaulting to a piano tone). More complicated instrumentation can be assigned by opening the midi file in music mixing programs such as GarageBand (Mac) or [LMMS](https://lmms.io/) (Windows, Mac, Linux). (Using Garageband or LMMS are outside the scope of this tutorial. A video tutorial on LMMS is available [here](https://youtu.be/4dYxV3tqTUc), while Garageband tutorials proliferate online. Lynda.com has [an excellent one](http://www.lynda.com/GarageBand-tutorials/Importing-audio-tracks/156620/164050-4.html)) +Now, as you page across the various tabs in the interface (‘[duration](#duration) input’, ‘[pitch mapping](#pitch mapping)’, ‘duration mapping’, ‘scale options’) you can effect various transformations. In ‘pitch mapping’, there are a number of mathematical options for mapping the data against the full 88 keys/pitches of a piano keyboard (in a linear mapping, the _mean_ of one’s data would be mapped to middle C, or 40). One can also choose the kind of scale, whether it is a minor or major and so on. At this point, once you've selected your various transformations, you should save the text file. On the file tab, ‘play’, one can download a midi file. Your default audio program can play midi files (often defaulting to a piano tone). More complicated instrumentation can be assigned by opening the midi file in music mixing programs such as GarageBand (Mac) or [LMMS](https://lmms.io/) (Windows, Mac, Linux). (Using Garageband or LMMS are outside the scope of this tutorial. A video tutorial on LMMS is available [here](https://youtu.be/4dYxV3tqTUc), while Garageband tutorials proliferate online. Lynda.com has [an excellent one](https://www.lynda.com/GarageBand-tutorials/Importing-audio-tracks/156620/164050-4.html)) If you had several columns of data for the same points - say, in our example from Roman Britain, we also wanted to sonify counts of a pottery type for those same towns - you can reload your next data series, effect the transformations and mappings, and generate another MIDI file. Since Garageband and LMMS allow for overlaying of voices, you can begin to build up complicated sequences of music. @@ -133,12 +133,12 @@ The [sample dataset](/assets/sonification/sonification-roman-data.csv) provided ``` ...so that your data follows immediately after that last comma (as like [this](/assets/sonification/sonification-romancoin-data-music.csv)). Save the file with a useful name like `coinsounds1.csv`. -3. Go to the [Musicalgorithms](http://musicalgorithms.org/3.0/index.html) site (version 3), and hit the load button. In the pop-up, click the blue 'load' button and select the file saved in step 2. The site will load your materials and display a green check mark if it loaded successfully. If it did not, make sure that your values are separated by spaces, and that they follow immediately the last comma in the code block in step 2. You may also try loading up the [demo file for this tutorial](/assets/sonification/sonification-romancoin-data-music.csv) instead.{% include figure.html filename="sonification-musicalgorithms-upload-4.png" caption="Click 'load' on the main screen to get this dialogue box. Then 'load csv'. Select your file; it will appear in the box. Then click the bottom load button." %} +3. Go to the [Musicalgorithms](https://musicalgorithms.org/3.0/index.html) site (version 3), and hit the load button. In the pop-up, click the blue 'load' button and select the file saved in step 2. The site will load your materials and display a green check mark if it loaded successfully. If it did not, make sure that your values are separated by spaces, and that they follow immediately the last comma in the code block in step 2. You may also try loading up the [demo file for this tutorial](/assets/sonification/sonification-romancoin-data-music.csv) instead.{% include figure.html filename="sonification-musicalgorithms-upload-4.png" caption="Click 'load' on the main screen to get this dialogue box. Then 'load csv'. Select your file; it will appear in the box. Then click the bottom load button." %} 4. Click on 'Pitch Input'. You'll see the values of your data. For now, **do not select** any further options on this page (thus using the site's default values). 5. Click on 'Duration Input'. **Do not select any options here for now**. The options here will map various transformations against your data that will alter the duration for each note. Do not worry about these options for now; move on. 6. Click on 'Pitch Mapping'. This is the most crucial choice, as it will transform (that is, scale) your raw data to a mapping against the keys of the keyboard. Leave the `mapping` set to 'division'. (The other options are modulo or logarithmic). The option `Range` 1 to 88 uses the full 88 keys of the keyboard; thus your lowest value would accord to the deepest note on the piano and your highest value with the highest note. You might wish instead to constrain your music around middle C, so enter 25 to 60 as your range. The output should change to: `31,34,34,34,25,28,30,60,28,25,26,26,25,25,60,25,25,38,33,26,25,25,25` These are no longer your counts; they are notes on the keyboard.{% include figure.html filename="sonification-musicalgorithms-settings-for-pitch-mapping-5.png" caption="Click into the 'range' box and set it to 25. The values underneath will change automatically. Click into the 'to' box and set it to 60. Click back into the other box; the values will update." %} 7. Click on 'Duration Mapping'. Like Pitch Mapping, this takes a range of times that you specify and uses the various mathematical options to map that range of possibilities against your notes. If you mouse over the `i` you will see how the numbers correspond with whole notes, quarter notes, eigth notes, and so on. Leave the default values for now. -8. Click on 'Scale Options'. Here we can begin to select something of what might be called the 'emotional' aspect to sound. We commonly think of major scales being 'happy' while minor scales are 'sad'; for an accessible discussion see [this blog post](http://www.ethanhein.com/wp/2010/scales-and-emotions/). For now, select 'scale by: major'. Leave the 'scale' as `C`. +8. Click on 'Scale Options'. Here we can begin to select something of what might be called the 'emotional' aspect to sound. We commonly think of major scales being 'happy' while minor scales are 'sad'; for an accessible discussion see [this blog post](https://www.ethanhein.com/wp/2010/scales-and-emotions/). For now, select 'scale by: major'. Leave the 'scale' as `C`. You have now sonified one column of data! Click on the 'save' button, then 'save csv'. {% include figure.html filename="sonification-musicalgorithms-save-6.png" caption="The save data dialogue box." %}You'll have a file that looks something like this: @@ -173,11 +173,11 @@ The next section of this tutorial requires Python. If you haven't experimented w Mac users will already have Python installed on their machine. You can test this by holding down the COMMAND button and the spacebar; in the search window, type `terminal` and click on the terminal application. At the prompt, eg, the cursor blinking at `$` type `python --version` and the computer will respond with what version of python you have. _This next section of the tutorial assumes Python 2.7; it has not been tested on Python 3_. -For Windows users, Python is not installed by default on your machine so [this page](http://docs.python-guide.org/en/latest/starting/install/win/) will help you get started, though things are a bit more complicated than that page makes out. First, download the `.msi` file that that page recommends (Python 2.7). Double click the file, and it should install itself in a new directory, eg `C:\Python27\`. Then, we have to tell Windows the location of where to look for Python whenever you run a python program; that is, you put the location of that directory into your 'path', or the environment variable that windows always checks when confronted with a new command. There are a couple ways of doing this, but perhaps the easiest is to search your computer for the program `Powershell` (type 'powershell' into your windows computer search). Open Powershell, and at the `>` prompt, paste this entire line: +For Windows users, Python is not installed by default on your machine so [this page](https://docs.python-guide.org/en/latest/starting/install/win/) will help you get started, though things are a bit more complicated than that page makes out. First, download the `.msi` file that that page recommends (Python 2.7). Double click the file, and it should install itself in a new directory, eg `C:\Python27\`. Then, we have to tell Windows the location of where to look for Python whenever you run a python program; that is, you put the location of that directory into your 'path', or the environment variable that windows always checks when confronted with a new command. There are a couple ways of doing this, but perhaps the easiest is to search your computer for the program `Powershell` (type 'powershell' into your windows computer search). Open Powershell, and at the `>` prompt, paste this entire line: `[Environment]::SetEnvironmentVariable("Path", "$env:Path;C:\Python27\;C:\Python27\Scripts\", "User")` -You can close powershell when you're done. You'll know it worked if nothing very much happens once you've pressed 'enter'. To test that everything is okay, open a command prompt (here are [10 ways to do this](http://www.howtogeek.com/235101/10-ways-to-open-the-command-prompt-in-windows-10/)) and type at the `>` prompt `python --version`. It should tell you `Python 2.7.10` or similar. +You can close powershell when you're done. You'll know it worked if nothing very much happens once you've pressed 'enter'. To test that everything is okay, open a command prompt (here are [10 ways to do this](https://www.howtogeek.com/235101/10-ways-to-open-the-command-prompt-in-windows-10/)) and type at the `>` prompt `python --version`. It should tell you `Python 2.7.10` or similar. The last piece of the puzzle that all users will need is a program called `Pip`. Mac users can install it by typing at the terminal :`sudo easy_install pip`. Windows users have a bit of a harder time. First, right-click and save-as this link: [https://bootstrap.pypa.io/get-pip.py](https://bootstrap.pypa.io/get-pip.py) (If you just click on the link, it will show you the code in your browser). Save it somewhere handy. Open a command prompt in the directory where you saved `get-pip.py`. Then, type at the command prompt `python get-pip.py`. Conventionally, in tutorials, you will see `>` or `$` at points where you are required to enter something at the command prompt or the terminal. You don't ever have to type those two characters. @@ -238,13 +238,13 @@ D, D, A, A, B, B, B, B, A Baa, Baa, black, sheep, have, you, any, wool? ``` -Can you make your computer play this song? (This [chart](https://web.archive.org/web/20171211192102/http://www.electronics.dit.ie/staff/tscarff/Music_technology/midi/midi_note_numbers_for_octaves.htm) will help). +Can you make your computer play this song? (This [chart](https://web.archive.org/web/20171211192102/https://www.electronics.dit.ie/staff/tscarff/Music_technology/midi/midi_note_numbers_for_octaves.htm) will help). -**By the way** There is a text file specification for describing music called '[ABC Notation](http://abcnotation.com/wiki/abc:standard:v2.1)'. It is beyond us for now, but one could write a sonification script in say a spreadsheet, mapping values to note names in the ABC specification (if you've ever used an IF - THEN in Excel to convert percentage grades to letter grades, you'll have a sense of how this might be done) and then using a site like [this one](http://trillian.mit.edu/~jc/music/abc/ABCcontrib.html) to convert the ABC notation into a .mid file. +**By the way** There is a text file specification for describing music called '[ABC Notation](https://abcnotation.com/wiki/abc:standard:v2.1)'. It is beyond us for now, but one could write a sonification script in say a spreadsheet, mapping values to note names in the ABC specification (if you've ever used an IF - THEN in Excel to convert percentage grades to letter grades, you'll have a sense of how this might be done) and then using a site like [this one](https://trillian.mit.edu/~jc/music/abc/ABCcontrib.html) to convert the ABC notation into a .mid file. ### Getting your own data in -[This file](/assets/sonification/sonification-diary.csv) is a selection from the topic model fitted to John Adams' Diaries for[The Macroscope](http://themacroscope.org). Only the strongest signals have been preserved by rounding the values in the columns to two decimal places (remembering that .25 for instance would indicate that that topic is contributing to a quarter of that diary entry's composition). To get this data into your python script, it has to be formatted in a particular away. The tricky bit is getting the date field right. +[This file](/assets/sonification/sonification-diary.csv) is a selection from the topic model fitted to John Adams' Diaries for[The Macroscope](https://themacroscope.org). Only the strongest signals have been preserved by rounding the values in the columns to two decimal places (remembering that .25 for instance would indicate that that topic is contributing to a quarter of that diary entry's composition). To get this data into your python script, it has to be formatted in a particular away. The tricky bit is getting the date field right. _For the purposes of this tutorial, we are going to leave the names of variables and so on unchanged from the sample script. The sample script was developed with earthquake data in mind; so where it says 'magnitude' we can think of it as equating to '% topic composition.'_ @@ -369,13 +369,13 @@ For each column of data in your original data, **have a unique script and rememb # Sonic Pi -Having unique midifiles that you arrange (in Garageband or some other music composition program) moves you from 'sonifying' towards composition and sound art. In this final section, I do not offer you a full tutorial on using [Sonic Pi](http://sonic-pi.net), but rather point you towards this environment that allows for the actual live-coding and performance of your data (see [this video](https://www.youtube.com/watch?v=oW-3HVOeUQA) for an actual live-coding performance). Sonic Pi's built-in tutorials will show you something of the potential of using your computer as an actual musical instrument (where you type Ruby code into its built-in editor while the interpreter plays what you encode). +Having unique midifiles that you arrange (in Garageband or some other music composition program) moves you from 'sonifying' towards composition and sound art. In this final section, I do not offer you a full tutorial on using [Sonic Pi](https://sonic-pi.net), but rather point you towards this environment that allows for the actual live-coding and performance of your data (see [this video](https://www.youtube.com/watch?v=oW-3HVOeUQA) for an actual live-coding performance). Sonic Pi's built-in tutorials will show you something of the potential of using your computer as an actual musical instrument (where you type Ruby code into its built-in editor while the interpreter plays what you encode). Why would you want to do this? As has progressively become clear in tutorial, when you sonify your data you begin to make choices about how the data maps into sound, and these choices reflect implicit or explicit decisions about which data matter. There is a continuum of 'objectivity', if you will. At one end, a sonification that supports an argument about the past; at the other, a performance about the past as riveting and personal as any well-done public lecture. Sonification moves our data off the page and into the ears of our listeners: it is a kind of public history. Performing our data... imagine that! Here, I offer simply a code snippet that will allow you to import your data, where your data is simply a list of values saved as csv. I am indebted to George Washington University librarian Laura Wrubel who posted to [gist.github.com](https://gist.github.com/lwrubel) her experiments in sonifying her library's circulation transactions. -In this [sample file](/assets/sonification/sonification-jesuittopics.csv)(a topic model generated from the [Jesuit Relations](http://puffin.creighton.edu/jesuit/relations/)), there are two topics. The first row contains the headers: topic1, topic2. +In this [sample file](/assets/sonification/sonification-jesuittopics.csv)(a topic model generated from the [Jesuit Relations](https://puffin.creighton.edu/jesuit/relations/)), there are two topics. The first row contains the headers: topic1, topic2. ### Practice @@ -439,14 +439,14 @@ The code is pretty clear: loop the 'bd_boom' sample with the reverb sound effect By the way, 'live-coding'? What makes this a 'live-coding' environment is that you can make changes to the code _while Sonic Pi is turning it into music_. Don't like what you're hearing? Change the code up on the fly! -For more on Sonic Pi, [this workshop website](https://www.miskatonic.org/music/access2015/) is a good place to start. See also Laura Wrubel's [report on attending that workshop, and her and her colleague's work in this area](http://library.gwu.edu/scholarly-technology-group/posts/sound-library-work). +For more on Sonic Pi, [this workshop website](https://www.miskatonic.org/music/access2015/) is a good place to start. See also Laura Wrubel's [report on attending that workshop, and her and her colleague's work in this area](https://library.gwu.edu/scholarly-technology-group/posts/sound-library-work). # Nihil Novi Sub Sole -Again, lest we think that we are at the cutting edge in our algorithmic generation of music, a salutary reminder was published in 1978 on 'dice music games' of the eighteenth century, where rolls of the dice determined the recombination of pre-written snippets of music. [Some of these games have been explored and re-coded for the Sonic-Pi by Robin Newman](https://rbnrpi.wordpress.com/project-list/mozart-dice-generated-waltz-revisited-with-sonic-pi/). Newman also uses a tool that could be described as Markdown+Pandoc for musical notation, [Lilypond](http://www.lilypond.org/) to score these compositions. The antecedents for everything you will find at _The Programming Historian_ are deeper than you might suspect! +Again, lest we think that we are at the cutting edge in our algorithmic generation of music, a salutary reminder was published in 1978 on 'dice music games' of the eighteenth century, where rolls of the dice determined the recombination of pre-written snippets of music. [Some of these games have been explored and re-coded for the Sonic-Pi by Robin Newman](https://rbnrpi.wordpress.com/project-list/mozart-dice-generated-waltz-revisited-with-sonic-pi/). Newman also uses a tool that could be described as Markdown+Pandoc for musical notation, [Lilypond](https://www.lilypond.org/) to score these compositions. The antecedents for everything you will find at _The Programming Historian_ are deeper than you might suspect! # Conclusion -Sonifying our data forces us to confront the ways our data are often not so much about the past, but rather our constructed versions of it. It does so partly by virtue of its novelty and the art and artifice required to map data to sound. But it does so also by its contrast with our received notions of visualization of data. It may be that the sounds one generates never rise to the level of 'music'; but if it helps transform how we encounter the past, and how others engage with the past, then the effort will be worth it. As Trevor Owens might have put it, 'Sonfication is about [discovery, not justification'](http://www.trevorowens.org/2012/11/discovery-and-justification-are-different-notes-on-sciencing-the-humanities/). +Sonifying our data forces us to confront the ways our data are often not so much about the past, but rather our constructed versions of it. It does so partly by virtue of its novelty and the art and artifice required to map data to sound. But it does so also by its contrast with our received notions of visualization of data. It may be that the sounds one generates never rise to the level of 'music'; but if it helps transform how we encounter the past, and how others engage with the past, then the effort will be worth it. As Trevor Owens might have put it, 'Sonfication is about [discovery, not justification'](https://www.trevorowens.org/2012/11/discovery-and-justification-are-different-notes-on-sciencing-the-humanities/). ## Terms @@ -459,14 +459,14 @@ Sonifying our data forces us to confront the ways our data are often not so much + **Amplitude**, roughly, the loudness of the note # References -Baio, Andy. 2015. 'If Drake Was Born A Piano'. Waxy. [http://waxy.org/2015/12/if_drake_was_born_a_piano/](http://waxy.org/2015/12/if_drake_was_born_a_piano/) +Baio, Andy. 2015. 'If Drake Was Born A Piano'. Waxy. [https://waxy.org/2015/12/if_drake_was_born_a_piano/](https://waxy.org/2015/12/if_drake_was_born_a_piano/) -Drucker, Johanna. 2011. Humanities Approaches to Graphical Display. DHQ 5.1 [http://web.archive.org/web/20190203083307/http://www.digitalhumanities.org/dhq/vol/5/1/000091/000091.html](http://web.archive.org/web/20190203083307/http://www.digitalhumanities.org/dhq/vol/5/1/000091/000091.html) +Drucker, Johanna. 2011. Humanities Approaches to Graphical Display. DHQ 5.1 [https://web.archive.org/web/20190203083307/https://www.digitalhumanities.org/dhq/vol/5/1/000091/000091.html](https://web.archive.org/web/20190203083307/https://www.digitalhumanities.org/dhq/vol/5/1/000091/000091.html) -Hedges, Stephen A. 1978. “Dice Music in the Eighteenth Century”. Music & Letters 59 (2). Oxford University Press: 180–87. [http://www.jstor.org/stable/734136](http://www.jstor.org/stable/734136). +Hedges, Stephen A. 1978. “Dice Music in the Eighteenth Century”. Music & Letters 59 (2). Oxford University Press: 180–87. [https://www.jstor.org/stable/734136](https://www.jstor.org/stable/734136). -Hermann, T. 2008. "Taxonomy and definitions for sonification and auditory display". In P. Susini and O. Warusfel (eds.) Proceedings of the 14th international conference on auditory display (ICAD 2008). IRCAM, Paris. [http://www.icad.org/Proceedings/2008/Hermann2008.pdf](http://www.icad.org/Proceedings/2008/Hermann2008.pdf) +Hermann, T. 2008. "Taxonomy and definitions for sonification and auditory display". In P. Susini and O. Warusfel (eds.) Proceedings of the 14th international conference on auditory display (ICAD 2008). IRCAM, Paris. [https://www.icad.org/Proceedings/2008/Hermann2008.pdf](https://www.icad.org/Proceedings/2008/Hermann2008.pdf) -Koebler, Jason. 2015. "The Strange Acoustic Phenomenon Behind These Wacked-Out Versions of Pop Songs" Motherboard, Dec 18. [https://web.archive.org/web/20161023223029/http://motherboard.vice.com/read/the-strange-acoustic-phenomenon-behind-these-wacked-out-versions-of-pop-songs](https://web.archive.org/web/20161023223029/http://motherboard.vice.com/read/the-strange-acoustic-phenomenon-behind-these-wacked-out-versions-of-pop-songs) +Koebler, Jason. 2015. "The Strange Acoustic Phenomenon Behind These Wacked-Out Versions of Pop Songs" Motherboard, Dec 18. [https://web.archive.org/web/20161023223029/https://motherboard.vice.com/read/the-strange-acoustic-phenomenon-behind-these-wacked-out-versions-of-pop-songs](https://web.archive.org/web/20161023223029/https://motherboard.vice.com/read/the-strange-acoustic-phenomenon-behind-these-wacked-out-versions-of-pop-songs) Last and Usyskin, 2015. "Listen to the Sound of Data". In Aaron K. Baughman et al. (eds.) Multimedia Data Mining and Analytics. Springer: Heidelberg. Pp. 419-446 [https://www.researchgate.net/publication/282504359_Listen_to_the_Sound_of_Data](https://www.researchgate.net/publication/282504359_Listen_to_the_Sound_of_Data) diff --git a/en/lessons/space-place-gazetteers.md b/en/lessons/space-place-gazetteers.md index b6a75db296..e050c72d4a 100644 --- a/en/lessons/space-place-gazetteers.md +++ b/en/lessons/space-place-gazetteers.md @@ -79,7 +79,7 @@ The first task for anybody embarking on a digital spatial history project is to A project emphasizing the conflicting, contested, and dynamic characteristics of places, as well as spatial information reflected in textual attestations, should begin with a gazetteer. An example of such a project would be the [Heritage Gazetteer of Libya](https://perma.cc/KLV5-FTRL), which aims to provide information about unique identifiers, locations, and monuments within modern Libya that were important to its history before 1950. The emphasis of this project is on compiling names and variants produced by the research of the Society for Libyan Studies. -A GIS is only the logical starting point for a spatial history project centered on geography and spatial relations *per se*. Both gazetteers and GIS are based on spatial data structured in particular formats, but the focus of a GIS is primarily on the projection of geospatial geometries, in the form of points, lines, and polygons. An example GIS project would be the [Bomb Site: Mapping the WW2 bomb census](http://bombsight.org/#17/51.50595/-0.10680) project, which prioritizes the visualization of targets of the Luftwaffe Blitz bombing raids in London from October 7, 1940 to June 6, 1941. While a gazetteer may also contain geographical information, its primary focus is on depicting more information about places then merely points, lines, or polygons on a map base. +A GIS is only the logical starting point for a spatial history project centered on geography and spatial relations *per se*. Both gazetteers and GIS are based on spatial data structured in particular formats, but the focus of a GIS is primarily on the projection of geospatial geometries, in the form of points, lines, and polygons. An example GIS project would be the [Bomb Site: Mapping the WW2 bomb census](https://bombsight.org/#17/51.50595/-0.10680) project, which prioritizes the visualization of targets of the Luftwaffe Blitz bombing raids in London from October 7, 1940 to June 6, 1941. While a gazetteer may also contain geographical information, its primary focus is on depicting more information about places then merely points, lines, or polygons on a map base. Indeed, although geometry is necessary for making maps, the symbols on maps only tell a small part of the story of a place. The way to model rich, multivocal data about place-making events and contestations of power, about places as settings for social events, and about the sense of place and its representations, is with a gazetteer, not a map. Gazetteers are excellent for collecting information about what a place has been called, by whom, why, and when; who has been there; what has occurred there; who has contended for authority over it; or what texts have referred to it. Gazetteers often use a controlled vocabulary to designate the supplementary feature types associated with places: whether a place is a settlement, a waypoint on a travel itinerary, or a geographical feature such as a mountain or river. diff --git a/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.md b/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.md index f19669a024..ec4b6ef956 100755 --- a/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.md +++ b/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.md @@ -102,7 +102,7 @@ as means for writing scholarly papers but as a convention for online editing in general. Popular general purpose plain text editors include [Atom](https://atom.io/) -(all platforms) and [Notepad++](http://notepad-plus-plus.org) (Windows only). +(all platforms) and [Notepad++](https://notepad-plus-plus.org) (Windows only). It is important to understand that Markdown is merely a convention. Markdown files are stored as plain text, further adding to the @@ -370,7 +370,7 @@ nice PDF: If you'd like to get an idea of how this kind of markup will be interpreted as HTML formatting, try [this online -sandbox](http://daringfireball.net/projects/markdown/dingus) and play +sandbox](https://daringfireball.net/projects/markdown/dingus) and play around with various kinds of syntax. Remember that certain elements of *Pandoc*-flavored Markdown (such as the title block and footnotes) will not work in this web form, which only accepts the basics. @@ -561,7 +561,7 @@ this: @article{fyfe_digital_2011, title = {Digital Pedagogy Unplugged}, volume = {5}, - url = {http://digitalhumanities.org/dhq/vol/5/3/000106/000106.html}, + url = {https://digitalhumanities.org/dhq/vol/5/3/000106/000106.html}, number = {3}, urldate = {2013-09-28}, author = {Fyfe, Paul}, @@ -628,7 +628,7 @@ Style Language" (yet another plain-text convention, in this case for describing citation styles) and denoted by the .csl file extension. Luckily, the CSL project maintains a repository of common citation styles, some even tailored for specific journals. Visit - to find the .csl file for + to find the .csl file for Modern Language Association, download `modern-language-association.csl`, and save to your project directory as `mla.csl`. Now we need to tell Pandoc to use the MLA stylesheet instead of the default Chicago. We do @@ -693,33 +693,33 @@ for support than John MacFarlane's [Pandoc site](https://pandoc.org/) and the affiliated [mailing list](https://groups.google.com/forum/#!forum/pandoc-discuss). At least two "Question and Answer" type sites can field questions on Pandoc: -[Stack Overflow](http://stackoverflow.com/questions/tagged/pandoc) and -[Digital Humanities Q&A](http://web.archive.org/web/20190203062832/http://digitalhumanities.org/answers/). +[Stack Overflow](https://stackoverflow.com/questions/tagged/pandoc) and +[Digital Humanities Q&A](https://web.archive.org/web/20190203062832/https://digitalhumanities.org/answers/). Questions may also be asked live, on Freenode IRC, \#Pandoc channel, frequented by a friendly group of regulars. As you learn more about Pandoc, you can also explore one of its most powerful features: [filters](https://github.com/jgm/pandoc/wiki/Pandoc-Filters). Although we suggest starting out with a simple editor, many (70+, according to [this blog -post](http://web.archive.org/web/20140120195538/http://mashable.com/2013/06/24/markdown-tools/)) +post](https://web.archive.org/web/20140120195538/https://mashable.com/2013/06/24/markdown-tools/)) other, Markdown-specific alternatives to MS Word are available online, and often free of cost. From the standalone ones, we liked -[Mou](http://mouapp.com/), [Write Monkey](http://writemonkey.com), and -[Sublime Text](http://www.sublimetext.com/). Several web-based platforms +[Mou](https://mouapp.com/), [Write Monkey](https://writemonkey.com), and +[Sublime Text](https://www.sublimetext.com/). Several web-based platforms have recently emerged that provide slick, graphic interfaces for collaborative writing and version tracking using Markdown. These -include: [prose.io](http://prose.io), -[Authorea](http://www.authorea.com), -[Draft](http://www.draftin.com), and +include: [prose.io](https://prose.io), +[Authorea](https://www.authorea.com), +[Draft](https://www.draftin.com), and [StackEdit](https://stackedit.io). -But the ecosystem is not limited to editors. [Gitit](http://gitit.net/) +But the ecosystem is not limited to editors. [Gitit](https://gitit.net/) and [Ikiwiki](https://github.com/dubiousjim/pandoc-iki) support authoring in Markdown with Pandoc as parser. To this list we may a range of tools that generate fast, static webpages, [Yst](https://github.com/jgm/yst), -[Jekyll](http://github.com/fauno/jekyll-pandoc-multiple-formats), -[Hakyll](http://jaspervdj.be/hakyll/), and [bash shell +[Jekyll](https://github.com/fauno/jekyll-pandoc-multiple-formats), +[Hakyll](https://jaspervdj.be/hakyll/), and [bash shell script](https://github.com/wcaleb/website) by the historian Caleb McDaniel. @@ -728,14 +728,14 @@ Markdown. Markdown to marketplace platform [Leanpub](https://leanpub.com) could be an interesting alternative to the traditional publishing model. And we ourselves are experimenting with academic journal design based on GitHub and -[readthedocs.org](http://readthedocs.org) (tools usually used for technical +[readthedocs.org](https://readthedocs.org) (tools usually used for technical documentation). [^1]: Don't worry if you don't understand some of of this terminology yet! [^2]: The source files for this document can be [downloaded from GitHub](https://github.com/dhcolumbia/pandoc-workflow). Use the "raw" option when viewing in GitHub to see the source Markdown. The authors would like to thank Alex Gil and his colleagues from Columbia's Digital Humanities Center, and the participants of openLab at the Studio in the Butler library for testing the code in this tutorial on a variety of platforms. -[^3]: See Charlie Stross's excellent discussion of this topic in [Why Microsoft Word Must Die](http://www.antipope.org/charlie/blog-static/2013/10/why-microsoft-word-must-die.html). +[^3]: See Charlie Stross's excellent discussion of this topic in [Why Microsoft Word Must Die](https://www.antipope.org/charlie/blog-static/2013/10/why-microsoft-word-must-die.html). [^4]: Note that the .bib extension may be "registered" to Zotero in your operating system. That means when you click on a .bib file it is likely that Zotero will be called to open it, whereas we want to open it within a text editor. Eventually, you may want to associate the .bib extension with your text editor. diff --git a/en/lessons/temporal-network-analysis-with-r.md b/en/lessons/temporal-network-analysis-with-r.md index b75d6eaeba..afea5ad561 100644 --- a/en/lessons/temporal-network-analysis-with-r.md +++ b/en/lessons/temporal-network-analysis-with-r.md @@ -429,7 +429,7 @@ Let's take a step back and reflect on what we've learned. At this point, we have If there is one thing that I hope you will take away from this tutorial, it is the idea that adding temporal data to nodes and edges transforms a general social science tool into a powerful method for historical argument. Comparing network structures and metrics from one timeslice to another gives them historical significance that can be difficult, if not impossible, to discern in conventional static social network analysis. -This tutorial introduced only a few of the many tools and techniques made possible by temporal network analysis. One especially exciting area of this field is in dynamic simulations that model the transmission of something, for example a disease or an idea, among individuals within a given temporal network. If that sounds interesting, take a look at the [EpiModel](http://www.epimodel.org/) package or other tools created by epidemiologists to model diffusion within dynamic networks. +This tutorial introduced only a few of the many tools and techniques made possible by temporal network analysis. One especially exciting area of this field is in dynamic simulations that model the transmission of something, for example a disease or an idea, among individuals within a given temporal network. If that sounds interesting, take a look at the [EpiModel](https://www.epimodel.org/) package or other tools created by epidemiologists to model diffusion within dynamic networks. Depending on the historical data that you're working with, temporal network analysis may offer important insights into how the properties of nodes, edges, and the overall network change over time. Whether or not you decide to make the leap to temporal network analysis, it is helpful to remember that networks of all kinds are complex historical phenomena that emerge, develop, transform beyond recognition, and disappear over the course of time. @@ -443,7 +443,7 @@ Maybe you made it through this tutorial but you are still more comfortable with - Ken Cherven has a good overview of Dynamic Network Analysis with Gephi in his book _Mastering Gephi Network Visualization_ (2015) -If you are hungry for more temporal network analysis with R, [this tutorial](https://web.archive.org/web/20180423112846/http://statnet.csde.washington.edu/workshops/SUNBELT/current/ndtv/ndtv_workshop.html) by Skye Bender-deMoll explains additional functions and features of the packages used here. It served as my own guide to learning about temporal network analysis and formed the inspiration for the tutorial above. +If you are hungry for more temporal network analysis with R, [this tutorial](https://web.archive.org/web/20180423112846/https://statnet.csde.washington.edu/workshops/SUNBELT/current/ndtv/ndtv_workshop.html) by Skye Bender-deMoll explains additional functions and features of the packages used here. It served as my own guide to learning about temporal network analysis and formed the inspiration for the tutorial above. You can also dive deeper into the documentation to learn more about the [networkDynamic package](https://cran.r-project.org/web/packages/networkDynamic/index.html), the [TSNA package](https://cran.r-project.org/web/packages/tsna/index.html), and the [NDTV package](https://cran.r-project.org/web/packages/networkDynamic/index.html). diff --git a/en/lessons/text-mining-with-extracted-features.md b/en/lessons/text-mining-with-extracted-features.md index 3064151d9e..aada2979d3 100755 --- a/en/lessons/text-mining-with-extracted-features.md +++ b/en/lessons/text-mining-with-extracted-features.md @@ -60,9 +60,9 @@ Though it is relatively new, the Extracted Features dataset is already seeing us [Underwood](https://doi.org/10.6084/m9.figshare.1279201) leveraged the features for identifying genres, such as fiction, poetry, and drama (2014). Associated with this work, he has released a dataset of 178k books classified by genre alongside genre-specific word counts ([Underwood 2015](https://doi.org/10.13012/J8JW8BSJ)). -The Underwood subset of the Extracted Features dataset was used by Forster (2015) to [observe gender in literature](https://web.archive.org/web/20160105003327/http://cforster.com/2015/09/gender-in-hathitrust-dataset/), illustrating the decline of woman authors through the 19th century. +The Underwood subset of the Extracted Features dataset was used by Forster (2015) to [observe gender in literature](https://web.archive.org/web/20160105003327/https://cforster.com/2015/09/gender-in-hathitrust-dataset/), illustrating the decline of woman authors through the 19th century. -The Extracted Features dataset also underlies higher-level analytic tools. [Mimno](http://mimno.infosci.cornell.edu/wordsim/nearest.html) processed word co-occurrence tables per year, allowing others to view how correlations between topics change over time (2014). The [HT Bookworm](https://analytics.hathitrust.org/bookworm) project has developed an API and visualization tools to support exploration of trends within the HathiTrust collection across various classes, genres, and languages. Finally, we have developed an approach to [within-book topic modelling](https://github.com/organisciak/htrc-book-models) which functions as a mnemonic accompaniment to a previously-read book (Organisciak 2014). +The Extracted Features dataset also underlies higher-level analytic tools. [Mimno](https://mimno.infosci.cornell.edu/wordsim/nearest.html) processed word co-occurrence tables per year, allowing others to view how correlations between topics change over time (2014). The [HT Bookworm](https://analytics.hathitrust.org/bookworm) project has developed an API and visualization tools to support exploration of trends within the HathiTrust collection across various classes, genres, and languages. Finally, we have developed an approach to [within-book topic modelling](https://github.com/organisciak/htrc-book-models) which functions as a mnemonic accompaniment to a previously-read book (Organisciak 2014). ## Suggested Prior Skills @@ -111,7 +111,7 @@ This command installs the HTRC Feature Reader and its necessary dependencies. We That's it! At this point you have everything necessary to start reading HTRC Feature Reader files. -> *psst*, advanced users: You can install the HTRC Feature Reader *without* Anaconda with `pip install htrc-feature-reader`, though for this lesson you'll need to install two additional libraries `pip install matplotlib jupyter`. Also, note that not all manual installations are alike because of hard-to-configure system optimizations: this is why we recommend Anaconda. If you think your code is going slow, you should check that Numpy has access to [BLAS and LAPACK libraries](http://stackoverflow.com/a/19350234/233577) and install [Pandas recommended packages](http://pandas.pydata.org/pandas-docs/version/0.15.2/install.html#recommended-dependencies). The rest is up to you, advanced user! +> *psst*, advanced users: You can install the HTRC Feature Reader *without* Anaconda with `pip install htrc-feature-reader`, though for this lesson you'll need to install two additional libraries `pip install matplotlib jupyter`. Also, note that not all manual installations are alike because of hard-to-configure system optimizations: this is why we recommend Anaconda. If you think your code is going slow, you should check that Numpy has access to [BLAS and LAPACK libraries](https://stackoverflow.com/a/19350234/233577) and install [Pandas recommended packages](https://pandas.pydata.org/pandas-docs/version/0.15.2/install.html#recommended-dependencies). The rest is up to you, advanced user! ## Start a Notebook @@ -232,7 +232,7 @@ The volume id can be used to pull more information from other sources. The scann print(vol.handle_url) ``` - http://hdl.handle.net/2027/nyp.33433075749246 + https://hdl.handle.net/2027/nyp.33433075749246 {% include figure.html filename="June-cover.PNG" caption="Digital copy of sample book" %} @@ -327,7 +327,7 @@ tokens.plot() On some systems, this may take some time the first time. It is clear that pages at the start of a book have fewer words per page, after which the count is fairly steady except for occasional valleys. -You may have some guesses for what these patterns mean. A look at the [scans](http://hdl.handle.net/2027/nyp.33433074811310) confirms that the large valleys are often illustration pages or blank pages, small valleys are chapter headings, and the upward pattern at the start is from front matter. +You may have some guesses for what these patterns mean. A look at the [scans](https://hdl.handle.net/2027/nyp.33433074811310) confirms that the large valleys are often illustration pages or blank pages, small valleys are chapter headings, and the upward pattern at the start is from front matter. Not all books will have the same patterns so we can't just codify these correlations for millions of books. However, looking at this plot makes clear an inportant assumption in text and data mining: that there are patterns underlying even the basic statistics derived from a text. The trick is to identify the consistent and interesting patterns and teach them to a computer. @@ -441,7 +441,7 @@ Look at the following list of commands: can you guess what the output will look - `vol.tokenlist(section='header')` - `vol.tokenlist(section='group')` -Details for these arguments are available in the code [documentation](http://htrc.github.io/htrc-feature-reader/htrc_features/feature_reader.m.html#htrc_features.feature_reader.Volume.tokenlist) for the Feature Reader. +Details for these arguments are available in the code [documentation](https://htrc.github.io/htrc-feature-reader/htrc_features/feature_reader.m.html#htrc_features.feature_reader.Volume.tokenlist) for the Feature Reader. Jupyter provides another convenience here. Documentation can be accessed within the notebook by adding a '?' to the start of a piece of code. Try it with `?vol.tokenlist`, or with other objects or variables. @@ -1022,7 +1022,7 @@ The output is a count of how often each part-of-speech tag ("pos") occurs in the - *Apply* with `sum()`: These groups were sent to an apply function, `sum()`. Sum is an aggregation function, so it sums all the information in the 'count' column for each group. For example, all the rows of data in the adverb group are summed up into a single count of all adverbs. - *Combine*: The combine step is implicit: the DataFrame knows from the `groupby` pattern to take everything that the apply function gives back (in the case of 'sum', just one row for every group) and stick it together. -`sum()` is one of many convenient functions [built-in](http://pandas.pydata.org/pandas-docs/stable/groupby.html) to Pandas. Other useful functions are `mean()`, `count()`, `max()`. It is also possible to send your groups to any function that you write with `apply()`. +`sum()` is one of many convenient functions [built-in](https://pandas.pydata.org/pandas-docs/stable/groupby.html) to Pandas. Other useful functions are `mean()`, `count()`, `max()`. It is also possible to send your groups to any function that you write with `apply()`. > groupby can be used on data columns or an index. To run against an index, use `level=[index_level_name]` as above. To group against columns, use `by=[column_name]`. @@ -1143,7 +1143,7 @@ Like iterating over `FeatureReader.volumes()` to get Volume objects, it is possi # Next Steps -Now that you know the basics of the HTRC Feature Reader, you can learn more about the [Extracted Features dataset](https://analytics.hathitrust.org/features). The [Feature Reader home page](https://github.com/htrc/htrc-feature-reader/blob/master/README.ipynb) contains a lesson similar to this one but for more advanced users (that's you now!), and the [code documentation](http://htrc.github.io/htrc-feature-reader/htrc_features/feature_reader.m.html) gives exact information about what types of information can be called. +Now that you know the basics of the HTRC Feature Reader, you can learn more about the [Extracted Features dataset](https://analytics.hathitrust.org/features). The [Feature Reader home page](https://github.com/htrc/htrc-feature-reader/blob/master/README.ipynb) contains a lesson similar to this one but for more advanced users (that's you now!), and the [code documentation](https://htrc.github.io/htrc-feature-reader/htrc_features/feature_reader.m.html) gives exact information about what types of information can be called. Underwood (2015) has released [genre classifications of public-domain texts in the HTRC EF Dataset](https://analytics.hathitrust.org/genre), comprised of fiction, poetry, and drama. Though many historians will be interested in other corners of the dataset, fiction is a good place to tinker with text mining ideas because of its expressiveness and relative format consistency. @@ -1156,9 +1156,9 @@ Finally, the repository for the HTRC Feature Reader has [advanced tutorial noteb Boris Capitanu, Ted Underwood, Peter Organisciak, Timothy Cole, Maria Janina Sarol, J. Stephen Downie (2016). The HathiTrust Research Center Extracted Feature Dataset (1.0) [Dataset]. *HathiTrust Research Center*. [https://doi.org/10.13012/J8X63JT3](https://doi.org/10.13012/J8X63JT3) -Chris Forster. "A Walk Through the Metadata: Gender in the HathiTrust Dataset." Blog. [http://cforster.com/2015/09/gender-in-hathitrust-dataset/](https://web.archive.org/web/20160105003327/http://cforster.com/2015/09/gender-in-hathitrust-dataset/). +Chris Forster. "A Walk Through the Metadata: Gender in the HathiTrust Dataset." Blog. [https://cforster.com/2015/09/gender-in-hathitrust-dataset/](https://web.archive.org/web/20160105003327/https://cforster.com/2015/09/gender-in-hathitrust-dataset/). -Matthew L. Jockers (Feb 2015). "Revealing Sentiment and Plot Arcs with the Syuzhet Package". *Matthew L. Jockers*. Blog. http://www.matthewjockers.net/2015/02/02/syuzhet/. +Matthew L. Jockers (Feb 2015). "Revealing Sentiment and Plot Arcs with the Syuzhet Package". *Matthew L. Jockers*. Blog. https://www.matthewjockers.net/2015/02/02/syuzhet/. Peter Organisciak, Loretta Auvil, J. Stephen Downie (2015). “Remembering books: A within-book topic mapping technique.” Digital Humanities 2015. Sydney, Australia. @@ -1192,7 +1192,7 @@ rsync -azv data.analytics.hathitrust.org::features/listing/htrc-ef-all-files.txt ``` -Finally, it is possible to download many files from a list. To try, we've put together lists for public-domain [fiction](http://data.analytics.hathitrust.org/genre/fiction_paths.txt), [drama](http://data.analytics.hathitrust.org/genre/drama_paths.txt), and [poetry](http://data.analytics.hathitrust.org/genre/poetry_paths.txt) (Underwood 2014). For example: +Finally, it is possible to download many files from a list. To try, we've put together lists for public-domain [fiction](https://data.analytics.hathitrust.org/genre/fiction_paths.txt), [drama](https://data.analytics.hathitrust.org/genre/drama_paths.txt), and [poetry](https://data.analytics.hathitrust.org/genre/poetry_paths.txt) (Underwood 2014). For example: ```bash rsync -azv --files-from=fiction_paths.txt data.analytics.hathitrust.org::features/ . diff --git a/en/lessons/text-mining-youtube-comments.md b/en/lessons/text-mining-youtube-comments.md index 96e75f7d0f..1670840246 100644 --- a/en/lessons/text-mining-youtube-comments.md +++ b/en/lessons/text-mining-youtube-comments.md @@ -334,7 +334,7 @@ Now that the comment data is reduced to the essentials, you can transform the da ## Modeling -An increasingly wide range of text mining and machine learning algorithms are available for scholars looking to create models and visualizations of big data. Many of these algorithms are described in other _Programming Historian_ lessons, for example, [word frequency analysis](/en/lessons/counting-frequencies) and [topic modeling](/en/lessons/topic-modeling-and-mallet). As noted above, the text mining algorithm central to this lesson is called Wordfish. For information on the machine learning algorithm itself and to explore Wordfish's base code, visit [the Wordfish website](http://www.Wordfish.org/software.html) and [the Wordfish Github repository](http://www.wordfish.org/). +An increasingly wide range of text mining and machine learning algorithms are available for scholars looking to create models and visualizations of big data. Many of these algorithms are described in other _Programming Historian_ lessons, for example, [word frequency analysis](/en/lessons/counting-frequencies) and [topic modeling](/en/lessons/topic-modeling-and-mallet). As noted above, the text mining algorithm central to this lesson is called Wordfish. For information on the machine learning algorithm itself and to explore Wordfish's base code, visit [the Wordfish website](https://www.Wordfish.org/software.html) and [the Wordfish Github repository](https://www.wordfish.org/). Developed by and for political scientists, Wordfish was originally created as a method for extracting the ideological leaning of documents expected to contain latent political perspectives (such as party manifestos or politician speeches). For example, Wordfish can be a useful tool for identifying whether United States representatives' speeches were made by [Democrats](https://perma.cc/G7U3-X2FB) or [Republicans](https://perma.cc/5WKD-YKY9), as well as for measuring the extremity of the ideological leaning conveyed in those speeches. @@ -364,7 +364,7 @@ The key difference between Wordfish scaling and topic modeling, however, are the ### Creating a Corpus in R -The [Wordfish](http://www.wordfish.org/) algorithm was initially distributed as a stand-alone R package (still available on the [Wordfish website](http://www.Wordfish.org/software.html)), but it is now also available in the [`quanteda` package](https://perma.cc/WYV4-Y884). The `quanteda` Wordfish package has certain advantages, including that it enables seamless wrangling of YouTube comment data into a useful format [to build the Wordfish model](https://perma.cc/7736-5QHV). Visit the [docs and tutorials](https://quanteda.org/quanteda/) on the `quanteda` website for more background. +The [Wordfish](https://www.wordfish.org/) algorithm was initially distributed as a stand-alone R package (still available on the [Wordfish website](https://www.Wordfish.org/software.html)), but it is now also available in the [`quanteda` package](https://perma.cc/WYV4-Y884). The `quanteda` Wordfish package has certain advantages, including that it enables seamless wrangling of YouTube comment data into a useful format [to build the Wordfish model](https://perma.cc/7736-5QHV). Visit the [docs and tutorials](https://quanteda.org/quanteda/) on the `quanteda` website for more background. To run the Wordfish model in `quanteda`, you must create three types of text data objects: a corpus, tokens, and a DFM. For more detail on how these objects work together, refer to `quanteda`'s [quick start page](https://perma.cc/QR2C-RCUH). diff --git a/en/lessons/topic-modeling-and-mallet.md b/en/lessons/topic-modeling-and-mallet.md index 0be99e3788..a128cd9cfd 100755 --- a/en/lessons/topic-modeling-and-mallet.md +++ b/en/lessons/topic-modeling-and-mallet.md @@ -161,7 +161,7 @@ the instructions appropriate for you below: ### Windows Instructions -1. Go to the [MALLET][] project page. You can [download MALLET here](http://mallet.cs.umass.edu/download.php). +1. Go to the [MALLET][] project page. You can [download MALLET here](https://mallet.cs.umass.edu/download.php). 2. You will also need the [Java developer's kit][] – that is, not the regular Java that's on every computer, but the one that lets you program things. Install this on your computer. @@ -236,7 +236,7 @@ You are now ready to skip ahead to the next section. Many of the instructions for OS X installation are similar to Windows, with a few differences. In fact, it is a bit easier. -1. Download and [install MALLET](http://mallet.cs.umass.edu/download.php). +1. Download and [install MALLET](https://mallet.cs.umass.edu/download.php). 2. Download the [Java Development Kit][Java developer's kit]. Unzip MALLET into a directory on your system (for ease of following @@ -600,29 +600,29 @@ report. preparing text for this sort of analysis. [Bash Command Line]: /lessons/intro-to-bash - [discussion list]: http://mallet.cs.umass.edu/mailinglist.php - [Distant Reading]: http://www.cs.umbc.edu/~hillol/NGDM07/abstracts/talks/MKirschenbaum.pdf - [Reading Machines]: http://www.worldcat.org/title/reading-machines-toward-an-algorithmic-criticism/oclc/708761605&referer=brief_results - [Voyant Tools]: http://voyant-tools.org + [discussion list]: https://mallet.cs.umass.edu/mailinglist.php + [Distant Reading]: https://www.cs.umbc.edu/~hillol/NGDM07/abstracts/talks/MKirschenbaum.pdf + [Reading Machines]: https://www.worldcat.org/title/reading-machines-toward-an-algorithmic-criticism/oclc/708761605&referer=brief_results + [Voyant Tools]: https://voyant-tools.org [dangers]: https://web.archive.org/web/20240602215348/https://www.scottbot.net/HIAL/index.html@p=16713.html - [zombies using Google Trends]: http://arxiv.org/abs/1003.6087/ - [David Blei and friends]: http://en.wikipedia.org/wiki/Latent_Dirichlet_allocation - [Mining the Dispatch]: http://dsl.richmond.edu/dispatch/ + [zombies using Google Trends]: https://arxiv.org/abs/1003.6087/ + [David Blei and friends]: https://en.wikipedia.org/wiki/Latent_Dirichlet_allocation + [Mining the Dispatch]: https://dsl.richmond.edu/dispatch/ [Topic Modeling Martha Ballard's Diary]: https://perma.cc/39CG-MNLH - [MALLET]: http://mallet.cs.umass.edu/index.php - [*Gibbs sampling*]: http://en.wikipedia.org/wiki/Gibbs_sampling - [`download MALLET`]: http://mallet.cs.umass.edu/download.php - [Java developer's kit]: http://www.oracle.com/technetwork/java/javase/downloads/index.html - [automate this process]: http://electricarchaeology.ca/2012/07/09/mining-a-day-of-archaeology/ - [Mining the Open Web with Looted Heritage Draft]: http://electricarchaeology.ca/2012/06/08/mining-the-open-web-with-looted-heritage-draft/ + [MALLET]: https://mallet.cs.umass.edu/index.php + [*Gibbs sampling*]: https://en.wikipedia.org/wiki/Gibbs_sampling + [`download MALLET`]: https://mallet.cs.umass.edu/download.php + [Java developer's kit]: https://www.oracle.com/technetwork/java/javase/downloads/index.html + [automate this process]: https://electricarchaeology.ca/2012/07/09/mining-a-day-of-archaeology/ + [Mining the Open Web with Looted Heritage Draft]: https://electricarchaeology.ca/2012/06/08/mining-the-open-web-with-looted-heritage-draft/ [Figshare.com]: https://ndownloader.figshare.com/files/90972 [Guided Tour to Topic Modeling]: https://web.archive.org/web/20240520155820/https://www.scottbot.net/HIAL/index.html@p=19113.html - [Topic modeling made just simple enough]: http://tedunderwood.wordpress.com/2012/04/07/topic-modeling-made-just-simple-enough/ - [Some Assembly Required]: http://web.archive.org/web/20160704150726/http://www.lisarhody.com:80/some-assembly-required/ - [Topic Modeling in the Humanities: An Overview | Maryland Institute for Technology in the Humanities]: https://web.archive.org/web/20130116223500/http://mith.umd.edu/topic-modeling-in-the-humanities-an-overview/ - [Latent dirichlet allocation]: http://dl.acm.org/citation.cfm?id=944937 - [bibliography of topic modeling articles]: http://mimno.infosci.cornell.edu/topics.html - [Computational Historiography]: http://www.perseus.tufts.edu/publications/02-jocch-mimno.pdf + [Topic modeling made just simple enough]: https://tedunderwood.wordpress.com/2012/04/07/topic-modeling-made-just-simple-enough/ + [Some Assembly Required]: https://web.archive.org/web/20160704150726/https://www.lisarhody.com:80/some-assembly-required/ + [Topic Modeling in the Humanities: An Overview | Maryland Institute for Technology in the Humanities]: https://web.archive.org/web/20130116223500/https://mith.umd.edu/topic-modeling-in-the-humanities-an-overview/ + [Latent dirichlet allocation]: https://dl.acm.org/citation.cfm?id=944937 + [bibliography of topic modeling articles]: https://mimno.infosci.cornell.edu/topics.html + [Computational Historiography]: https://www.perseus.tufts.edu/publications/02-jocch-mimno.pdf [Windows]: /lessons/windows-installation [Mac]: /lessons/mac-installation [Linux]: /lessons/linux-installation diff --git a/en/lessons/transcribing-handwritten-text-with-python-and-azure.md b/en/lessons/transcribing-handwritten-text-with-python-and-azure.md index b21a394826..33e83b2d25 100644 --- a/en/lessons/transcribing-handwritten-text-with-python-and-azure.md +++ b/en/lessons/transcribing-handwritten-text-with-python-and-azure.md @@ -541,12 +541,12 @@ As capabilities grow, so the potential uses of this type of transcription for Di ## Bibliography -Cahill, Barry. "White, William Andrew," in Dictionary of Canadian Biography, vol. 16, University of Toronto/Université Laval, 2003–, [http://www.biographi.ca/en/bio/white_william_andrew_16E.html](https://perma.cc/AU2P-GBCA). Accessed August 18, 2023. +Cahill, Barry. "White, William Andrew," in Dictionary of Canadian Biography, vol. 16, University of Toronto/Université Laval, 2003–, [https://www.biographi.ca/en/bio/white_william_andrew_16E.html](https://perma.cc/AU2P-GBCA). Accessed August 18, 2023. Dombrowski, Quinn, Tassie Gniady, and David Kloster, "Introduction to Jupyter Notebooks," _Programming Historian_ 8 (2019), [https://doi.org/10.46430/phen0087](https://doi.org/10.46430/phen0087). Graham, Shawn. Detecting and Extracting Hand-written text. Jan 28, 2020. [https://shawngraham.github.io/dhmuse/detecting-handwriting/](https://perma.cc/J7BV-V6ME). Accessed 25 December, 2021. -White, William. 1917. William Andrew White fonds, R15535-0-8-E, "1917 Diary", Item ID number 4818067. Library and Archives Canada. [http://central.bac-lac.gc.ca/.redirect?app=fonandcol&id=4818067&lang=eng](https://perma.cc/9LQJ-XBEW). Accessed August 18, 2023. +White, William. 1917. William Andrew White fonds, R15535-0-8-E, "1917 Diary", Item ID number 4818067. Library and Archives Canada. [https://central.bac-lac.gc.ca/.redirect?app=fonandcol&id=4818067&lang=eng](https://perma.cc/9LQJ-XBEW). Accessed August 18, 2023. Cognitive-services-quickstart-code, June 22, 2021, [https://docs.microsoft.com/en-us/azure/cognitive-services/computer-vision/quickstarts-sdk/python-sdk](https://perma.cc/FQ4Z-J9JU). Accessed 25 December, 2021. diff --git a/en/lessons/transforming-xml-with-xsl.md b/en/lessons/transforming-xml-with-xsl.md index 21ba61884d..ae92da2b95 100644 --- a/en/lessons/transforming-xml-with-xsl.md +++ b/en/lessons/transforming-xml-with-xsl.md @@ -239,7 +239,7 @@ The command line code examples we will show here will assume that this is the ca # Choosing and Preparing XML Data -In order to begin transforming XML, you will need to obtain a well-formed dataset. Many online historical databases are built upon XML and provide their data freely. This tutorial will make use of the [Scissors and Paste Database](http://scissors-and-paste.net). +In order to begin transforming XML, you will need to obtain a well-formed dataset. Many online historical databases are built upon XML and provide their data freely. This tutorial will make use of the [Scissors and Paste Database](https://scissors-and-paste.net). The *Scissors and Paste Database* is a collaborative and growing collection of articles from British and imperial newspapers in the 18th and 19th centuries. Its original purpose was to allow for careful comparisons of reprints (copies) that appeared in multiple newspapers as well as to detect similarly themed articles across different English-language publications. Like many XML databases, *Scissors and Paste* contains both data (the article's text), formatting information (such as italics and justification), and metadata. This metadata includes documentation about the particular article, such as its pagination and printing date, information about the newspaper in which it was published, and the themes, individuals or locations mentioned in the text. @@ -318,7 +318,7 @@ The first three lines of your XSL file should be the following: ``` The first line documents that this is an XML document encoded as UTF-8. -The second line states that the document is an XSL document version 1.0 and the standards (or [namespace](https://en.wikipedia.org/wiki/Namespace)) established by the [World Wide Web Consortium](http://www.w3.org/), whose web address you have listed. +The second line states that the document is an XSL document version 1.0 and the standards (or [namespace](https://en.wikipedia.org/wiki/Namespace)) established by the [World Wide Web Consortium](https://www.w3.org/), whose web address you have listed. (Note that an XSL document is ultimately an XML document!) Finally, the third line tells your transformer what sort of output you would like to create. In this case, you are indicating that you will be creating a plain-text file. (You could also have written `xml` or `html`, instead of `text`, in order to produce an XML or and HTML document, respectively.) diff --git a/en/lessons/transliterating.md b/en/lessons/transliterating.md index f7263ab570..c5d709d344 100644 --- a/en/lessons/transliterating.md +++ b/en/lessons/transliterating.md @@ -468,30 +468,30 @@ dealing with lots of names or for people who prefer or need to use ASCII characters. It is a simple tool but one that can be an enormous time saver. - [ASCII]: http://en.wikipedia.org/wiki/Ascii + [ASCII]: https://en.wikipedia.org/wiki/Ascii [Viewing HTML Files]: /lessons/viewing-html-files [Working with Web Pages]: /lessons/working-with-web-pages [From HTML to List of Words (part 1)]: /lessons/from-html-to-list-of-words-1 [Intro to Beautiful Soup]: /lessons/intro-to-beautiful-soup - [Memorial]: http://lists.memo.ru - [Cyrillic]: http://en.wikipedia.org/wiki/Cyrillic_script - [Latin characters]: http://en.wikipedia.org/wiki/Latin_script - [Unicode]: http://en.wikipedia.org/wiki/Unicode - [Terminal]: http://en.wikipedia.org/wiki/Terminal_%28OS_X%29 - [IDLE]: http://en.wikipedia.org/wiki/IDLE_%28Python%29 - [Komodo Edit]: http://www.activestate.com/komodo-edit - [ALA-LC]: http://en.wikipedia.org/wiki/ALA-LC_romanization_for_Russian - [Beautiful Soup in Python.]: http://www.crummy.com/software/BeautifulSoup/ - [Glasnost]: http://en.wikipedia.org/wiki/Glasnost - [here]: http://lists.memo.ru/d1/f1.htm + [Memorial]: https://lists.memo.ru + [Cyrillic]: https://en.wikipedia.org/wiki/Cyrillic_script + [Latin characters]: https://en.wikipedia.org/wiki/Latin_script + [Unicode]: https://en.wikipedia.org/wiki/Unicode + [Terminal]: https://en.wikipedia.org/wiki/Terminal_%28OS_X%29 + [IDLE]: https://en.wikipedia.org/wiki/IDLE_%28Python%29 + [Komodo Edit]: https://www.activestate.com/komodo-edit + [ALA-LC]: https://en.wikipedia.org/wiki/ALA-LC_romanization_for_Russian + [Beautiful Soup in Python.]: https://www.crummy.com/software/BeautifulSoup/ + [Glasnost]: https://en.wikipedia.org/wiki/Glasnost + [here]: https://lists.memo.ru/d1/f1.htm [Automated Downloading with Wget]: /lessons/automated-downloading-with-wget - [What is Unicode]: http://www.unicode.org/standard/WhatIsUnicode.html - [comma separated value]: http://en.wikipedia.org/wiki/Comma-separated_values + [What is Unicode]: https://www.unicode.org/standard/WhatIsUnicode.html + [comma separated value]: https://en.wikipedia.org/wiki/Comma-separated_values [Counting Frequencies]: /lessons/counting-frequencies - [Library of Congress]: http://web.archive.org/web/20170312041508/http://www.lcweb.loc.gov/catdir/cpso/romanization/russian.pdf - [Wikipedia has a table]: http://en.wikipedia.org/wiki/Cyrillic_script_in_Unicode - [Unicode website]: http://www.unicode.org/charts/ + [Library of Congress]: https://web.archive.org/web/20170312041508/https://www.lcweb.loc.gov/catdir/cpso/romanization/russian.pdf + [Wikipedia has a table]: https://en.wikipedia.org/wiki/Cyrillic_script_in_Unicode + [Unicode website]: https://www.unicode.org/charts/ [Manipulating Strings in Python]: /lessons/manipulating-strings-in-python [Installing Python Modules with pip]: /lessons/installing-python-modules-pip - [Cascading Style Sheets]: http://www.w3schools.com/css/ + [Cascading Style Sheets]: https://www.w3schools.com/css/ [Code Academy’s]: https://www.codecademy.com/catalog/subject/web-development diff --git a/en/lessons/understanding-creating-word-embeddings.md b/en/lessons/understanding-creating-word-embeddings.md index 4079889863..3849f4604b 100644 --- a/en/lessons/understanding-creating-word-embeddings.md +++ b/en/lessons/understanding-creating-word-embeddings.md @@ -497,4 +497,4 @@ We would like to thank Mark Algee-Hewitt and Julia Flanders for their contributi [^3]: Many research questions in the humanities address bigger-picture concepts like gender, identity, or justice. A corpus the size of the one we are using here would be poorly suited to these kinds of research questions, because relevant terms are used in a diffuse set of contexts. As a general guideline, a million words is a minimum starting point for these kinds of queries. In our example, we are looking at a set of terms that appear with some frequency in a very consistent set of contexts, which makes it possible to produce reasonable results with a smaller corpus. Weavers and Koolen lay out a set of considerations around corpus size in greater detail, and the piece is worth consulting as you consider your own corpus. See Wevers, Melvin and Koolwen, Marijn. "Digital begriffsgeschichte: Tracing semantic change using word embeddings." _Historical Methods: A Journal of Quantitative and Interdisciplinary History_ 53, no. 4 (2020): 226-243. [https://doi.org/10.1080/01615440.2020.1760157](https://doi.org/10.1080/01615440.2020.1760157). -[^4]: For example, see Cordell, Ryan. "‘Q i-Jtb the Raven’: Taking Dirty OCR Seriously." _Book History_ 20, no. 1 (2017): 188–225. [https://doi.org/10.1353/bh.2017.0006](https://doi.org/10.1353/bh.2017.0006) for a discussion of how OCR errors can provide useful information in research. See also Rawson, Katie, and Muñoz, Trevor. "Against Cleaning." _Curating Menus_, July 2016.[http://www.curatingmenus.org/articles/against-cleaning/](https://perma.cc/QPW7-ZJ7U) for a discussion on the many and significant complexities that are often obscured under the concept of 'cleaning' data. +[^4]: For example, see Cordell, Ryan. "‘Q i-Jtb the Raven’: Taking Dirty OCR Seriously." _Book History_ 20, no. 1 (2017): 188–225. [https://doi.org/10.1353/bh.2017.0006](https://doi.org/10.1353/bh.2017.0006) for a discussion of how OCR errors can provide useful information in research. See also Rawson, Katie, and Muñoz, Trevor. "Against Cleaning." _Curating Menus_, July 2016.[https://www.curatingmenus.org/articles/against-cleaning/](https://perma.cc/QPW7-ZJ7U) for a discussion on the many and significant complexities that are often obscured under the concept of 'cleaning' data. diff --git a/en/lessons/understanding-regular-expressions.md b/en/lessons/understanding-regular-expressions.md index 7f9a89db72..120a7a01a6 100755 --- a/en/lessons/understanding-regular-expressions.md +++ b/en/lessons/understanding-regular-expressions.md @@ -81,7 +81,7 @@ any year from 1850 to 1899. In this exercise we will use LibreOffice Writer and LibreOffice Calc, which are free software desktop applications for word processing and spreadsheets, respectively. Installation packages for Linux, Mac, or -Windows can be downloaded from . +Windows can be downloaded from . Other word processing software and programming languages have similar pattern-matching capabilities. This exercise uses LibreOffice because it is freely available, and its regular expression syntax is closer to what @@ -115,7 +115,7 @@ textual resources that are useful in many kinds of historical research. For our exercise, we will use a five-page report of monthly morbidity and mortality statistics for states and cities in the United States, published in February 1908, available at -. +. Take a moment to scan the pages through the [Read Online][] link to become familiar with it. This document is organized as paragraphs rather @@ -733,9 +733,9 @@ the University of Pittsburg, has some good materials on how to work with [regular expressions and XML tools][] to help mark up plain-text files in TEI XML. - [Read Online]: http://archive.org/stream/jstor-4560629/4560629#page/n0/mode/2up - [Full Text]: http://archive.org/stream/jstor-4560629/4560629_djvu.txt + [Read Online]: https://archive.org/stream/jstor-4560629/4560629#page/n0/mode/2up + [Full Text]: https://archive.org/stream/jstor-4560629/4560629_djvu.txt [List of Regular Expressions]: https://help.libreoffice.org/Common/List_of_Regular_Expressions - [regular expressions]: http://en.wikipedia.org/wiki/Regular_expressions - [Rubular]: http://rubular.com/ - [regular expressions and XML tools]: http://dh.obdurodon.org/regex.html + [regular expressions]: https://en.wikipedia.org/wiki/Regular_expressions + [Rubular]: https://rubular.com/ + [regular expressions and XML tools]: https://dh.obdurodon.org/regex.html diff --git a/en/lessons/up-and-running-with-omeka.md b/en/lessons/up-and-running-with-omeka.md index 830343dc76..6f8f7c4d38 100755 --- a/en/lessons/up-and-running-with-omeka.md +++ b/en/lessons/up-and-running-with-omeka.md @@ -27,14 +27,14 @@ doi: 10.46430/phen0060 -[Omeka.net](http://www.omeka.net) makes it easy to create websites that show off collections of items. +[Omeka.net](https://www.omeka.net) makes it easy to create websites that show off collections of items. Sign up for an Omeka account ---------------------------- {% include figure.html filename="up-and-running-01.png" caption="Sign up for a trial account" %} -Go to [www.omeka.net](http://www.omeka.net) and click on **Sign Up**. Choose the Trial plan. Fill in the sign-up form. Check your email for the link to activate your account. +Go to [www.omeka.net](https://www.omeka.net) and click on **Sign Up**. Choose the Trial plan. Fill in the sign-up form. Check your email for the link to activate your account. Create your new Omeka site -------------------------- @@ -56,7 +56,7 @@ An empty Omeka site ------------------- {% include figure.html filename="up-and-running-04.png" caption="Public view" %} -This is your empty Omeka site, waiting to be filled in. To get back to your dashboard, click the **Back** button or enter **http://www.omeka.net/dashboard**. This time, click on **Manage Site**. +This is your empty Omeka site, waiting to be filled in. To get back to your dashboard, click the **Back** button or enter **https://www.omeka.net/dashboard**. This time, click on **Manage Site**. Switch themes ------------- @@ -180,4 +180,4 @@ Now that you've added some items and grouped them into a collection, take some t Further Resources ----------------------------- -The Omeka team has put together great resources on the software's [help pages](http://info.omeka.net) +The Omeka team has put together great resources on the software's [help pages](https://info.omeka.net) diff --git a/en/lessons/urban-demographic-data-r-ggplot2.md b/en/lessons/urban-demographic-data-r-ggplot2.md index 41865dfa27..dc43a4f5ce 100644 --- a/en/lessons/urban-demographic-data-r-ggplot2.md +++ b/en/lessons/urban-demographic-data-r-ggplot2.md @@ -29,7 +29,7 @@ After [World War II](https://perma.cc/89BN-3NCG), European cities faced a monume Sister-city relationships present historians with both an opportunity and a challenge. The opportunity lies in their potential to reveal patterns of post-war reconciliation and diplomacy. The challenge comes from their scale and complexity: there are many hundreds of European cities to analyze, and each one might have formed dozens of partnerships across multiple decades. By converting these complex networks of sister-city relationships into visual patterns, we can explore questions that are difficult to answer through traditional methods alone. For example, did cities of [West Germany](https://perma.cc/ALL6-TWXA) preferentially form partnerships with French cities immediately after the war? Did the [Iron Curtain](https://perma.cc/XH8M-XCJ9) create distinct patterns of sister-city relationships between Eastern and Western Europe? How did city size and geographic distance influence diplomatic connections? This case is a good example of how useful data visualization can be for historical research. -The R package [ggplot2](http://ggplot2.tidyverse.org) provides powerful tools for investigating such questions through data visualization. While spreadsheets and basic charts can obscure patterns, ggplot2's sophisticated visualization capabilities allow historians to uncover hidden relationships in data. For example, [scatter plots](https://perma.cc/47QY-KL2V) can reveal correlations between numerical variables like population sizes and geographic distances, [bar charts](https://perma.cc/H58M-6UDU) can show the distribution of partnerships across different categories of cities, and [histograms](https://perma.cc/W7TW-9V52) can expose patterns in demographic data that might otherwise remain invisible. +The R package [ggplot2](https://ggplot2.tidyverse.org) provides powerful tools for investigating such questions through data visualization. While spreadsheets and basic charts can obscure patterns, ggplot2's sophisticated visualization capabilities allow historians to uncover hidden relationships in data. For example, [scatter plots](https://perma.cc/47QY-KL2V) can reveal correlations between numerical variables like population sizes and geographic distances, [bar charts](https://perma.cc/H58M-6UDU) can show the distribution of partnerships across different categories of cities, and [histograms](https://perma.cc/W7TW-9V52) can expose patterns in demographic data that might otherwise remain invisible. This lesson differs from standard ggplot2 guides by focusing specifically on the needs of urban historians. Rather than using generic datasets, we'll work with historical data about sister-city relationships to demonstrate how visualization techniques can illuminate historical patterns and processes. Through this approach, you'll learn to create visualizations that reveal complex partnerships and make historical findings more accessible to a broader audience. @@ -68,7 +68,7 @@ We have many reasons for chosing to use ggplot2 for this analysis. The package h - It relies on a theoretical framework (detailed below) that ensures your graphs meaningfully convey information, which is particularly important when working with complex urban and demographic datasets. - It is relatively simple to use while remaining powerful. - It creates publication-ready graphs. -- It comes with community-developed [extensions](http://www.ggplot2-exts.org/) which further enhance its capabilities, such as additional functions, graphs, and themes. +- It comes with community-developed [extensions](https://www.ggplot2-exts.org/) which further enhance its capabilities, such as additional functions, graphs, and themes. - It is versatile, as it can handle various data structures, including: * Numerical data (continuous and discrete) * Categorical data (factors and character strings) @@ -339,7 +339,7 @@ p1 + {% include figure.html filename="en-or-urban-demographic-data-r-ggplot2-11.png" alt="Scatter plot that uses scale_colour_manual() to change the colors of the scatterplot points." caption="Figure 11. Using scale_colour_manual() to specify the colors of the scatter plot's points." %} -However, you can also simply rely on predefined color scales, such as the [color brewer palettes](http://colorbrewer2.org). It's better to use these whenever possible, because choosing the right colors for visualizations is a very complicated issue (for instance, avoiding colors that are not distinguishable by people with impaired vision). Fortunately, ggplot2 comes with `scale_colour_brewer()` already [integrated](https://perma.cc/BST9-7GMG): +However, you can also simply rely on predefined color scales, such as the [color brewer palettes](https://colorbrewer2.org). It's better to use these whenever possible, because choosing the right colors for visualizations is a very complicated issue (for instance, avoiding colors that are not distinguishable by people with impaired vision). Fortunately, ggplot2 comes with `scale_colour_brewer()` already [integrated](https://perma.cc/BST9-7GMG): ``` p1 + @@ -447,7 +447,7 @@ p3 + ### Extending ggplot2 with Other Packages -One of ggplot2's strengths is its extensive collection of [extensions](http://www.ggplot2-exts.org/) that can help enhance your analysis with specialized visualizations like network graphs (useful for showing relationships between cities, for example), time series graphs (for tracking demographic changes over time), and ridgeline plots (for comparing population distributions across different urban areas). +One of ggplot2's strengths is its extensive collection of [extensions](https://www.ggplot2-exts.org/) that can help enhance your analysis with specialized visualizations like network graphs (useful for showing relationships between cities, for example), time series graphs (for tracking demographic changes over time), and ridgeline plots (for comparing population distributions across different urban areas). Let's explore an example showcasing a ggplot2 extension that creates more advanced and visually striking plots. In this case, we will create a [ridgeline plot](https://perma.cc/D9Z2-XHAV) – also known as a 'joyplot' – designed to visualize changes in distributions over time, across different categories. Ridgeline plots are particularly effective for comparing multiple distributions in a compact and aesthetically pleasing manner. @@ -482,7 +482,7 @@ To gain a more thorough understanding of ggplot2, we recommend you explore some * The [official ggplot2 site](https://ggplot2.tidyverse.org/). -* Hadley Wickham's books [`ggplot2`: _Elegant Graphics for Data Analysis_](https://ggplot2-book.org/) and [_R for Data Science_](http://r4ds.hadley.nz/). +* Hadley Wickham's books [`ggplot2`: _Elegant Graphics for Data Analysis_](https://ggplot2-book.org/) and [_R for Data Science_](https://r4ds.hadley.nz/). * Hadley Wickham's [original paper](https://doi.org/10.1198/jcgs.2009.07098) on the grammar of graphics. @@ -494,13 +494,13 @@ To gain a more thorough understanding of ggplot2, we recommend you explore some * UC Business Analytics' [R Programming Guide](https://perma.cc/KZT6-GW9C). -* The official ggplot2 [extensions page](https://www.ggplot2-exts.org/) and [accompanying gallery](http://www.ggplot2-exts.org/gallery/). +* The official ggplot2 [extensions page](https://www.ggplot2-exts.org/) and [accompanying gallery](https://www.ggplot2-exts.org/gallery/). * R Project’s [overview about extending ggplot2](https://perma.cc/465N-F9WU). * The [general documentation](https://ggplot2.tidyverse.org/reference/). -* The [Cookbook for R](http://www.cookbook-r.com/Graphs/) book (based on Winston Chang's [_R Graphics Cookbook. Practical Recipes for Visualizing Data_](http://shop.oreilly.com/product/0636920023135.do)). +* The [Cookbook for R](https://www.cookbook-r.com/Graphs/) book (based on Winston Chang's [_R Graphics Cookbook. Practical Recipes for Visualizing Data_](https://shop.oreilly.com/product/0636920023135.do)). * This official [R cheatsheet](https://www.rstudio.com/resources/cheatsheets/). diff --git a/en/lessons/using-javascript-to-create-maps.md b/en/lessons/using-javascript-to-create-maps.md index 00537a57b0..e6337c010e 100755 --- a/en/lessons/using-javascript-to-create-maps.md +++ b/en/lessons/using-javascript-to-create-maps.md @@ -32,13 +32,13 @@ doi: 10.46430/phen0071 The mapping software this lesson demonstrates grew out of a need to create a program that was easy to use and designed for the smaller and less uniform geospatial datasets used by historians. While working on a book manuscript on female abolitionists and early feminism in Britain and the United States, the question arose of how to determine the extent of transnational connections in women's antislavery work. We were interested not only in the number of letters that crossed the Atlantic, but also the specific locations the letters were sent from and to and how those international connections changed over time. -To solve this problem, we decided to plot the correspondence of Boston-area abolitionist women on a map and do network analysis of women's correspondence, starting with a single woman's letters as a test project. When we set out to map nineteenth-century abolitionist Maria Weston Chapman's correspondence, there was already an easy way to do [network analysis](http://wcm1.web.rice.edu/mining-bpl-antislavery.html), but we struggled to find software to do the mapping portion of the project.[^1] To remedy this, we wrote a simple JavaScript mapping tool based on [Leaflet](http://leafletjs.com/) which allowed us to display correspondence networks on a browser-based map. This lesson explains not only how to write your own script (or adapt the one we wrote) for your own project, but also explores why creating your own tool is sometimes more effective than using commercially available software to analyze historical data. +To solve this problem, we decided to plot the correspondence of Boston-area abolitionist women on a map and do network analysis of women's correspondence, starting with a single woman's letters as a test project. When we set out to map nineteenth-century abolitionist Maria Weston Chapman's correspondence, there was already an easy way to do [network analysis](https://wcm1.web.rice.edu/mining-bpl-antislavery.html), but we struggled to find software to do the mapping portion of the project.[^1] To remedy this, we wrote a simple JavaScript mapping tool based on [Leaflet](https://leafletjs.com/) which allowed us to display correspondence networks on a browser-based map. This lesson explains not only how to write your own script (or adapt the one we wrote) for your own project, but also explores why creating your own tool is sometimes more effective than using commercially available software to analyze historical data. -Originally, when we set out to study Chapman's correspondence with digital tools, we intended to use [PostGIS](http://postgis.net/) and [Gephi](https://gephi.org/) to examine the geographic connections and to analyze the network itself. While cleaning the data, it quickly became clear that [PostGIS](http://postgis.net/) was not going to be the ideal tool for the geospatial analysis as it required re-loading all the data into the software every time a change was made. Chapman's correspondence data, obtained from the [Boston Public Library's Antislavery Collection available from the Internet Archive](https://archive.org/details/bplscas) and the [Digital Public Library of America (DPLA)](http://dp.la), required extensive cleaning to standardize and complete dates, names and locations. +Originally, when we set out to study Chapman's correspondence with digital tools, we intended to use [PostGIS](https://postgis.net/) and [Gephi](https://gephi.org/) to examine the geographic connections and to analyze the network itself. While cleaning the data, it quickly became clear that [PostGIS](https://postgis.net/) was not going to be the ideal tool for the geospatial analysis as it required re-loading all the data into the software every time a change was made. Chapman's correspondence data, obtained from the [Boston Public Library's Antislavery Collection available from the Internet Archive](https://archive.org/details/bplscas) and the [Digital Public Library of America (DPLA)](https://dp.la), required extensive cleaning to standardize and complete dates, names and locations. Many of the mistakes, misspellings, and incorrect data points only became noticeable after the data was run through the software and a map generated, but having to reload all of the data from scratch was not a sustainable option. So we began drafting the code for the JavaScript map, which allowed us to easily run the visualization on our local system using a local web server to catch problems and errors as we worked. The script we ended up writing also allows the map to be interactive, making it a more useful tool for research and discovery than a static visualization. Being able to easily update the map was also important as the Boston Public Library was not finished digitizing their antislavery collection at the time of writing, and we hoped to expand the dataset to include the correspondence of other abolitionists if our test was successful. Creating our own tool allowed us the flexibility to adapt our project to the constraints of the data. -After we began working on this project, several other options for building online interactive maps became available, most notably [Carto](https://carto.com/platform) and [Palladio](http://hdlab.stanford.edu/palladio/) (a Stanford University project). Neither of these products were available when we began and they both have some limitations that may be problematic for some scholars. Carto only allows you to keep your data private if you pay for a subscription. Palladio, which creates a map very similar to the one we built, only allows you to share your maps via screenshots, so other people cannot easily use your interactive map. Running your own script via a local or web server is the easiest and most straightforward way to control your data and the look of your map without purchasing a subscription to an online service. But if you do decide to use Carto, Palladio, or another online mapping service, this lesson can still be useful to you as you collect and clean your data and analyze the resulting map. +After we began working on this project, several other options for building online interactive maps became available, most notably [Carto](https://carto.com/platform) and [Palladio](https://hdlab.stanford.edu/palladio/) (a Stanford University project). Neither of these products were available when we began and they both have some limitations that may be problematic for some scholars. Carto only allows you to keep your data private if you pay for a subscription. Palladio, which creates a map very similar to the one we built, only allows you to share your maps via screenshots, so other people cannot easily use your interactive map. Running your own script via a local or web server is the easiest and most straightforward way to control your data and the look of your map without purchasing a subscription to an online service. But if you do decide to use Carto, Palladio, or another online mapping service, this lesson can still be useful to you as you collect and clean your data and analyze the resulting map. ### Lesson Goals @@ -52,7 +52,7 @@ Note: This lesson requires using the command line (or Command Prompt). If you ha Before you begin, [download the ZIP file]({{site.baseurl}}/assets/using-javascript-to-create-maps/using-javascript-to-create-maps.zip) for this lesson and double click on it to "unzip." Inside you will find all the folders and files you will need to get a correspondence map working. -The `css` folder contains the code that explains how parts of the map look. The `js` folder contains the actual code that drives the map and its interactive functions. The basic map script contains a timeline function which allows you to "play" the data, generating the map one data point at a time so you can watch the correspondence network grow. The same feature allows you to filter your data by date, and display only a certain range of dates on the map. The `jquery` and `leaflet` folders in each of these locations are third party tools that add functionality to the map. [Leaflet](http://leafletjs.com/) helps create the map and [jQuery](http://jqueryui.com/) makes it easy to add interactive elements like the time line. The other files are as follows: +The `css` folder contains the code that explains how parts of the map look. The `js` folder contains the actual code that drives the map and its interactive functions. The basic map script contains a timeline function which allows you to "play" the data, generating the map one data point at a time so you can watch the correspondence network grow. The same feature allows you to filter your data by date, and display only a certain range of dates on the map. The `jquery` and `leaflet` folders in each of these locations are third party tools that add functionality to the map. [Leaflet](https://leafletjs.com/) helps create the map and [jQuery](https://jqueryui.com/) makes it easy to add interactive elements like the time line. The other files are as follows: * `controls.js` contains functions that handle the time line slider and filters. * `data.js` contains functions that load and handle the initial formatting of the CSV file. @@ -66,7 +66,7 @@ The Customizing the Map section explain how each of these scripts work together In order to do geospatial analysis on correspondence, you need several pieces of data about each letter. At the bare minimum, you need the sender, the recipient, the date, the sender's address, and the recipient's address. However, historians often have a lot more information about each piece of correspondence, including summaries of the content, keywords, and links to the letter in an online repository. Writing your own script allows you to display or access the other information about the letter within the interactive visualization as well as be able to display subsets of the data to help with your analysis. -There are several ways to collect or compile data about correspondence. Many historians often have large databases listing correspondence details from their research or have entered research data into Endnote or [Zotero](http://zotero.org), and archival finding aids and digitized archival collections often contain much of the information needed for doing a geospatial analysis. To obtain the basic information about Maria Weston Chapman's correspondence, we parsed the data from an API and then hand entered the missing information.[^2] +There are several ways to collect or compile data about correspondence. Many historians often have large databases listing correspondence details from their research or have entered research data into Endnote or [Zotero](https://zotero.org), and archival finding aids and digitized archival collections often contain much of the information needed for doing a geospatial analysis. To obtain the basic information about Maria Weston Chapman's correspondence, we parsed the data from an API and then hand entered the missing information.[^2] Several APIs contain the metadata for the Boston Public Library's antislavery collection, including the Internet Archive (where the metadata closely mirrors the data on the original index cards created in the 1880s when the collection was compiled and indexed) and Digital Public Library of America.[^3] A separate lesson on *The Programming Historian* shows [how to mine data from the antislavery collection on the Internet Archive]({{site.baseurl}}/lessons/data-mining-the-internet-archive). We chose to use the DPLA's API instead. @@ -90,7 +90,7 @@ If you want to use the timeline function of the script, you will also need to ch Once you have identified all of the locations for the letters in your collection, you will need to convert the addresses to coordinates. The first step to this process is to create a master list of locations included in your dataset. To do this, copy the sent and received locations into single column in a new spreadsheet. Sort them alphabetically and then remove the duplicates (this is also a good way to find spelling or data entry errors if you had to hand-compile the locations). Use this new list to do your coordinate search. -There are many websites that allow you to search for coordinates, but most of them limit the number of searches you can request. If you need to search for several thousand locations, you may want to get an API key for the GPS search engine you decide to use, but for most correspondence sets, you will only end up looking up a few hundred locations even if you are mapping thousands of letters. We used [GPS Visualizer](http://www.gpsvisualizer.com/geocoder/), which allows you to search Google Maps, Bing, and Map Quest. Simply paste the list of addresses you made into the input section of GPS Visualizer (99 at a time unless you get an API key), select the type of data (raw list, 1 address per line), select your search engine, and set the field separator output to comma. Click run and wait for your results. +There are many websites that allow you to search for coordinates, but most of them limit the number of searches you can request. If you need to search for several thousand locations, you may want to get an API key for the GPS search engine you decide to use, but for most correspondence sets, you will only end up looking up a few hundred locations even if you are mapping thousands of letters. We used [GPS Visualizer](https://www.gpsvisualizer.com/geocoder/), which allows you to search Google Maps, Bing, and Map Quest. Simply paste the list of addresses you made into the input section of GPS Visualizer (99 at a time unless you get an API key), select the type of data (raw list, 1 address per line), select your search engine, and set the field separator output to comma. Click run and wait for your results. When the results appear in the second box on the screen, copy and paste them into the spreadsheet containing the list of addresses. Make sure you capture both pieces of each coordinate (latitude and longitude). Depending on the addresses in your dataset, you may find one of the search engines gives better results than the others. In our case, we found Bing to be the most accurate. You will need to double-check each location you find coordinates for to make sure they are correct by running them through the mapping script (we had several notable mistakes when a search returned coordinates for a street in Paris, France, in the middle of Africa, and an estate in the suburbs of London, England, in the American Midwest). @@ -150,7 +150,7 @@ The map data is loaded in `data.js`. If you want to change the available columns Mapping software uses several layers of information to create a map. The first layer is a simple grid of latitude and longitude. The second layer contains the information that displays the map itself. These are called vector tiles. Vector tiles are the information on roads or other geographical features you want to appear on your map plus the actual images used to render the map. These can be modern features or historical ones, depending on the tile set you use to display your information. -For our project, we began with a basic set of map tiles from [MapBox](http:///www.mapbox.com). MapBox provides a number of different tile sets so that you can customize your map's appearance. You can use existing tiles or even design your own (what we ended up doing). The script is currently set up to use our custom map tiles, but you can edit the script to use other map tiles by changing the following section of `map.js` in the `js` folder to use your tiles. You are not limited to MapBox either; any tile server will work: +For our project, we began with a basic set of map tiles from [MapBox](https:///www.mapbox.com). MapBox provides a number of different tile sets so that you can customize your map's appearance. You can use existing tiles or even design your own (what we ended up doing). The script is currently set up to use our custom map tiles, but you can edit the script to use other map tiles by changing the following section of `map.js` in the `js` folder to use your tiles. You are not limited to MapBox either; any tile server will work: ``` var tileURL = 'http://{s}.tiles.mapbox.com/v3/ttavenner.e7ef536d/{z}/{x}/{y}.png' @@ -231,8 +231,8 @@ Now that you have an idea about what can be done with JavaScript as a programmin [^3]: See Lee V. Chambers, *The Weston Sisters: An American Abolitionist Family*, (Chapel Hill, University of North Carolina Press, 2015), 175. The BPL began a transcription project at Digital Commonwealth in 2017, images of the collection are also available there and the images are searchable by place. -[^4]: If you are having permissions errors installing `npm`, check the solutions [on Stack Overflow](http://stackoverflow.com/questions/16151018/npm-throws-error-without-sudo/24404451#24404451). +[^4]: If you are having permissions errors installing `npm`, check the solutions [on Stack Overflow](https://stackoverflow.com/questions/16151018/npm-throws-error-without-sudo/24404451#24404451). -[^5]: Stephen Robertson, "The Differences between Digital Humanities and Digital History," *Debates in the Digital Humanities, 2016*. Matthew K. Gold and Lauren F. Klein, eds. (Minneapolis: University of Minnesota Press, 2016). Available Online: http://dhdebates.gc.cuny.edu/debates/text/76 +[^5]: Stephen Robertson, "The Differences between Digital Humanities and Digital History," *Debates in the Digital Humanities, 2016*. Matthew K. Gold and Lauren F. Klein, eds. (Minneapolis: University of Minnesota Press, 2016). Available Online: https://dhdebates.gc.cuny.edu/debates/text/76 [^6]: Chambers, *Weston Sisters*, Chapter 6. diff --git a/en/lessons/vector-layers-qgis.md b/en/lessons/vector-layers-qgis.md index 30a1b47b6d..8de0a9dd3c 100755 --- a/en/lessons/vector-layers-qgis.md +++ b/en/lessons/vector-layers-qgis.md @@ -143,7 +143,7 @@ appears Specify CRS button, and select NAD83(CSRS98) / Prince Edward Isl. Stereographic (EPSG: 2291), and then click OK (for information on understanding and selecting UTM zone: - ) + ) {% include figure.html filename="pei6.png" caption="Figure 6: Click to see full size image." %} @@ -422,5 +422,5 @@ work!** [Installing QGIS 2.0 and Adding Layers]: /lessons/qgis-layers [PEI_Holland map]: /assets/vector-layers-qgis/PEI_HollandMap1798_compLZW.tif [Georeferencing in QGIS 2.0]: /lessons/georeferencing-qgis - [Wikipedia entry]: http://en.wikipedia.org/wiki/Prince_Royalty,_Prince_Edward_Island - [Geospatial Historian]: http://geospatialhistorian.wordpress.com/ + [Wikipedia entry]: https://en.wikipedia.org/wiki/Prince_Royalty,_Prince_Edward_Island + [Geospatial Historian]: https://geospatialhistorian.wordpress.com/ diff --git a/en/lessons/viewing-html-files.md b/en/lessons/viewing-html-files.md index bb2f94dfb2..0fc485f251 100755 --- a/en/lessons/viewing-html-files.md +++ b/en/lessons/viewing-html-files.md @@ -142,5 +142,5 @@ text editor (which does not). - [W3 Schools HTML Tutorial][W3 Schools HTML tutorial] - [W3 Schools HTML5 Tutorial][] - [W3 Schools HTML tutorial]: http://www.w3schools.com/html/default.asp - [W3 Schools HTML5 Tutorial]: http://www.w3schools.com/html/html5_intro.asp + [W3 Schools HTML tutorial]: https://www.w3schools.com/html/default.asp + [W3 Schools HTML5 Tutorial]: https://www.w3schools.com/html/html5_intro.asp diff --git a/en/lessons/visualizing-with-bokeh.md b/en/lessons/visualizing-with-bokeh.md index e72c1a29d0..6bc109c6dd 100644 --- a/en/lessons/visualizing-with-bokeh.md +++ b/en/lessons/visualizing-with-bokeh.md @@ -149,7 +149,7 @@ Within the virtual environment, you can run your code by typing: python filename.py ``` -A Jupyter Notebook containing the code used in this tutorial is also [available](https://github.com/programminghistorian/ph-submissions/tree/gh-pages/assets/visualizing-with-bokeh/visualizing-with-bokeh.ipynb) in case you prefer to work through the tutorial without installing a virtual environment. You can learn more about Jupyter Notebook [here](http://jupyter.org). If you have created a virtual environment using Miniconda, as discussed above, you can install Jupyter Notebook in the environment by typing `conda install jupyter` +A Jupyter Notebook containing the code used in this tutorial is also [available](https://github.com/programminghistorian/ph-submissions/tree/gh-pages/assets/visualizing-with-bokeh/visualizing-with-bokeh.ipynb) in case you prefer to work through the tutorial without installing a virtual environment. You can learn more about Jupyter Notebook [here](https://jupyter.org). If you have created a virtual environment using Miniconda, as discussed above, you can install Jupyter Notebook in the environment by typing `conda install jupyter` # The Basics of Bokeh @@ -583,7 +583,7 @@ Thankfully, Pandas offers a quick and easy way to do this. By modifying a single Resampling time-series data can involve either upsampling (creating more records) or downsampling (creating fewer records). For example, a list of daily temperatures could be upsampled to a list of hourly temperatures or downsampled to a list of weekly temperatures. We'll only be downsampling in this tutorial, but upsampling is very useful when you're trying to match a sporadically-measured dataset with one that's more periodically measured. -To resample our data, we use a Pandas `Grouper` object, to which we pass the column name holding our datetimes and a code representing the desired resampling frequency. In the case of our data, the statement `pd.Grouper(key='MSNDATE', freq='M') ` will be used to resample our MSNDATE column by *M*onth. We could equally resample by *W*eek, *Y*ear, *H*our, and [so forth](http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases). These frequency designations can also be prefaced with numbers so that, for example, `freq='2W'` resamples at two week intervals! +To resample our data, we use a Pandas `Grouper` object, to which we pass the column name holding our datetimes and a code representing the desired resampling frequency. In the case of our data, the statement `pd.Grouper(key='MSNDATE', freq='M') ` will be used to resample our MSNDATE column by *M*onth. We could equally resample by *W*eek, *Y*ear, *H*our, and [so forth](https://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases). These frequency designations can also be prefaced with numbers so that, for example, `freq='2W'` resamples at two week intervals! To complete the process of resampling and plotting our data, we pass the above `Grouper` object to our `groupby` function in place of the raw column name. The `groupby` statement from the previous code example should now look like this: @@ -642,7 +642,7 @@ show(p) A few patterns emerge in the ETO data. First we see a very clear escalation of overall bombings leading up to June 6, 1944 and a notable dip during the winter of 1944/1945. Incendiary munitions show three spikes and confirm that the fourth spike seen in the preceding example was directed at the bombing of Japan after Germany's surrender. The pattern of fragmentation bombs is harder to read, but it's now clear that they were only seriously used in the European Theater after D-Day. -{% include alert.html text="Try your hand at resampling this data using any of [Pandas' time frequencies ](http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases) to see what other trends might emerge. Remember, you can preface these frequencies with numbers as well (e.g. if you were working with historical stock market data, 2Q would give you bi-quarterly data!)" %} +{% include alert.html text="Try your hand at resampling this data using any of [Pandas' time frequencies ](https://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases) to see what other trends might emerge. Remember, you can preface these frequencies with numbers as well (e.g. if you were working with historical stock market data, 2Q would give you bi-quarterly data!)" %} Since we have established that 6 June 1944 and the winter of 1944/1945 mark changes to the bombing patterns in the ETO, let's highlight these trends using Bokeh's annotation features. diff --git a/en/lessons/windows-installation.md b/en/lessons/windows-installation.md index c8e9befa54..fbfd83ba70 100755 --- a/en/lessons/windows-installation.md +++ b/en/lessons/windows-installation.md @@ -181,7 +181,7 @@ Now that you and your computer are up and running, we can move onto some more interesting tasks. If you are working through the Python lessons in order, we suggest you next try ‘[Understanding Web Pages and HTML][]‘ - [Python website]: http://www.python.org/ - [other text editing options]: http://wiki.python.org/moin/PythonEditors/ - [UTF-8]: http://en.wikipedia.org/wiki/UTF-8 + [Python website]: https://www.python.org/ + [other text editing options]: https://wiki.python.org/moin/PythonEditors/ + [UTF-8]: https://en.wikipedia.org/wiki/UTF-8 [Understanding Web Pages and HTML]: /lessons/viewing-html-files diff --git a/en/lessons/working-with-text-files.md b/en/lessons/working-with-text-files.md index ce419c3639..4786422359 100755 --- a/en/lessons/working-with-text-files.md +++ b/en/lessons/working-with-text-files.md @@ -278,6 +278,6 @@ Suggested Readings [Windows Installation]: /lessons/windows-installation [Linux Installation]: /lessons/linux-installation [print]: https://docs.python.org/2/reference/simple_stmts.html#the-print-statement - [reserved word]: http://docs.python.org/release/2.5.4/ref/keywords.html + [reserved word]: https://docs.python.org/release/2.5.4/ref/keywords.html [File Objects]: https://docs.python.org/2/library/stdtypes.html#bltin-file-objects - [Non-Programmer’s Tutorial for Python 2.6/Hello, World]: http://en.wikibooks.org/wiki/Non-Programmer%27s_Tutorial_for_Python_2.6/Hello,_World + [Non-Programmer’s Tutorial for Python 2.6/Hello, World]: https://en.wikibooks.org/wiki/Non-Programmer%27s_Tutorial_for_Python_2.6/Hello,_World diff --git a/en/lessons/working-with-web-pages.md b/en/lessons/working-with-web-pages.md index 2419ff324a..333641a1ba 100755 --- a/en/lessons/working-with-web-pages.md +++ b/en/lessons/working-with-web-pages.md @@ -143,10 +143,10 @@ Unfortunately, not all websites have such readable and reliable URLs. Spend a few minutes looking at Benjamin Bowsey’s trial page. Here we are not so much interested in what the transcript says, but what features -the page has. Notice the [View as XML](http://www.oldbaileyonline.org/browse.jsp?foo=bar&path=sessionsPapers/17800628.xml&div=t17800628-33&xml=yes) link at the bottom that takes +the page has. Notice the [View as XML](https://www.oldbaileyonline.org/browse.jsp?foo=bar&path=sessionsPapers/17800628.xml&div=t17800628-33&xml=yes) link at the bottom that takes you to a heavily marked up version of the text which may be useful to certain types of research. You can also look at a [scan of the original -document](http://www.oldbaileyonline.org/images.jsp?doc=178006280084), which was transcribed to make this resource. +document](https://www.oldbaileyonline.org/images.jsp?doc=178006280084), which was transcribed to make this resource. Now let's try opening the page using Python. Copy the following program into Komodo Edit and save it as `open-webpage.py`. When you execute the @@ -258,8 +258,8 @@ file to make sure you have the correct code. - programming-historian-1 ([zip][]) - [Old Bailey Online]: http://www.oldbaileyonline.org/ + [Old Bailey Online]: https://www.oldbaileyonline.org/ [Downloading Multiple Records Using Query Strings]: /lessons/downloading-multiple-records-using-query-strings [Old]: /images/old-bailey.png "Old" - [Gordon Riots]: http://en.wikipedia.org/wiki/Gordon_Riots + [Gordon Riots]: https://en.wikipedia.org/wiki/Gordon_Riots [zip]: /assets/python-lessons1.zip diff --git a/en/research.md b/en/research.md index 42ef802037..f27ea7aa9c 100755 --- a/en/research.md +++ b/en/research.md @@ -10,17 +10,17 @@ The project team and members of the wider community are involved in a number of ## Original Programming Historian -* William J. Turkel and Alan MacEachern, [_The Programming Historian_](http://niche-canada.org/wp-content/uploads/2013/09/programming-historian-1.pdf) 1st edition (Network in Canadian History & Environment: 2007-2008). +* William J. Turkel and Alan MacEachern, [_The Programming Historian_](https://niche-canada.org/wp-content/uploads/2013/09/programming-historian-1.pdf) 1st edition (Network in Canadian History & Environment: 2007-2008). * Japanese translation of William J. Turkel and Alan MacEachern, [_The Programming Historian_](https://www.dh.ku-orcas.kansai-u.ac.jp/?cat=2), 1st edition (Network in Canadian History & Environment: 2007-2008). ## Reviews -* Björn Ekström, Elisa Tattersall Wallin and Hana Marčetić, '[_Programming Historian_: Novice-friendly tutorials on digital methods](http://www.diva-portal.org/smash/record.jsf?pid=diva2%3A1508542&dswid=7551)', _Tidskrift för ABM_, Vol. 5, no 1 (2020), pp. 71-75. +* Björn Ekström, Elisa Tattersall Wallin and Hana Marčetić, '[_Programming Historian_: Novice-friendly tutorials on digital methods](https://www.diva-portal.org/smash/record.jsf?pid=diva2%3A1508542&dswid=7551)', _Tidskrift för ABM_, Vol. 5, no 1 (2020), pp. 71-75. * Dries Daems, '[A Review and Roadmap of Online Learning Platforms and Tutorials in Digital Archaeology](https://doi.org/10.1017/aap.2019.47)', _Advances in Archaeological Practice_, vol. 8, issue 1 (2020), pp. 87-92. * Martin Dröge, '[Review of: The Programming Historian](https://www.hsozkult.de/webreview/id/rezwww-184)', _H-Soz-Kult_ (2019). * Priscila Pilatowsky Goñi, '[Reseña a The programming historian](https://revistas.uned.es/index.php/RHD/article/view/22420)', _Revista de Humanidades Digitales_, vol. 2 (2018). * Lincoln Mullen, '[Review of the Programming Historian](https://academic.oup.com/jah/article-abstract/103/1/299/1751315)', _The Journal of American History_, vol. 103, no. 1 (2016), pp. 299-301. -* Cameron Blevins, '[Review of the Programming Historian](http://jitp.commons.gc.cuny.edu/review-of-the-programming-historian/)', _The Journal of Interactive Technology & Pedagogy_, vol. 8 (2015). +* Cameron Blevins, '[Review of the Programming Historian](https://jitp.commons.gc.cuny.edu/review-of-the-programming-historian/)', _The Journal of Interactive Technology & Pedagogy_, vol. 8 (2015). ## Published Research @@ -30,22 +30,22 @@ The project team and members of the wider community are involved in a number of * Jennifer Isasi, Riva Quiroga, Nabeel Sidiqqui, Joana Vieira Paulino, Alex Wermer-Colan, [“A Model for Multilingual and Multicultural Digital Scholarship Methods Publishing"](https://www.taylorfrancis.com/chapters/edit/10.4324/9781003393696-3/model-multilingual-multicultural-digital-scholarship-methods-publishing-jennifer-isasi-riva-quiroga-nabeel-siddiqui-joana-vieira-paulino-alex-wermer-colan), in _Multilingual Digital Humanities_, edited by Viola, L., & Spence, P., Routledge, 2023. * Adam Crymble & Charlotte M. H. Im, ['Measuring digital humanities learning requirements in Spanish & English-speaking practitioner communities'](https://doi.org/10.1007/s42803-023-00066-x), International Journal of Digital Humanities, (2023). * Eric Brasil, '[_pyHDB - Ferramenta Heurística para a Hemeroteca Digital Brasileira: utilizando técnicas de web scraping para a pesquisa em História_'](https://doi.org/10.15848/hh.v15i40.1904), _História Da Historiografia: International Journal of Theory and History of Historiography_, 15(40) (2022), 186–217. -* Matthew Lincoln, Sarah Melton, Jennifer Isasi, François Dominic Laramée, '[Relocating Complexity: The Programming Historian and Multilingual Static Site Generation](http://www.digitalhumanities.org/dhq/vol/16/2/000585/000585.html)', _Digital Humanities Quarterly_ 16, 2 (2022). +* Matthew Lincoln, Sarah Melton, Jennifer Isasi, François Dominic Laramée, '[Relocating Complexity: The Programming Historian and Multilingual Static Site Generation](https://www.digitalhumanities.org/dhq/vol/16/2/000585/000585.html)', _Digital Humanities Quarterly_ 16, 2 (2022). * Jennifer Isasi and Antonio Rojas Castro, ‘[¿Sin equivalencia? Una reflexión sobre la traducción al español de recursos educativos abiertos](https://muse.jhu.edu/article/842253)’, _Hispania_, 104, no. 4 (2021), 613-624. * Adam Crymble and Maria José Afanador Llach, ‘The Globally Unequal Promise of Digital Tools for History: UK and Colombia Case Study’ in _Teaching History for the Contemporary World_, edited by Adele Nye, 85-98, Springer, 2021. * Daniel Alves, '[Ensinar Humanidades Digitais sem as Humanidades Digitais: um olhar a partir das licenciaturas em História](https://novaresearch.unl.pt/files/32228034/Ensinar_Humanidades_Digitais.pdf)', _Revista EducaOnline_, v. 15, n. 2 (2021). * Adam Crymble, [_Technology & the Historian: Transformations in the Digital Age_](https://www.press.uillinois.edu/books/catalog/57hxp7wr9780252043710.html), (University of Illinois Press, 2021). * Anna-Maria Sichani, James Baker, Maria José Afanador Llach, and Brandon Walsh, [‘Diversity and Inclusion in Digital Scholarship and Pedagogy: The Case of The Programming Historian’](https://doi.org/10.1629/uksg.465), _Insights_, (2019). -* Katrina Navickas and Adam Crymble, ['From Chartist Newspaper to Digital Map of Grass-roots Meetings, 1841-44: Documenting Workflows'](http://www.tandfonline.com/doi/full/10.1080/13555502.2017.1301179), _Journal of Victorian Culture_, (2017). +* Katrina Navickas and Adam Crymble, ['From Chartist Newspaper to Digital Map of Grass-roots Meetings, 1841-44: Documenting Workflows'](https://www.tandfonline.com/doi/full/10.1080/13555502.2017.1301179), _Journal of Victorian Culture_, (2017). * Adam Crymble, ['Identifying and Removing Gender Barriers in Open Learning Communities: The Programming Historian'](https://www.herts.ac.uk/__data/assets/pdf_file/0016/138013/Blip-2016-Autumn-2016-Final-Autumn-2016.pdf), _Blended Learning in Practice_, (2016), 49-60. [[pre-print pdf](/researchpapers/openLearningCommunities2016.pdf)] -* Fred Gibbs, ['Editorial Sustainability and Open Peer Review at Programming Historian',](http://web.archive.org/web/20180713014622/http://dhcommons.org/journal/issue-1/editorial-sustainability-and-open-peer-review-programming-historian) _DH Commons_, Vol. 1 (2015). -* Shawn Graham, Ian Milligan, and Scott Weingart, [_Exploring Big Historical Data: The Historian's Macroscope_](http://www.themacroscope.org/2.0/), (Imperial College Press, 2015). +* Fred Gibbs, ['Editorial Sustainability and Open Peer Review at Programming Historian',](https://web.archive.org/web/20180713014622/https://dhcommons.org/journal/issue-1/editorial-sustainability-and-open-peer-review-programming-historian) _DH Commons_, Vol. 1 (2015). +* Shawn Graham, Ian Milligan, and Scott Weingart, [_Exploring Big Historical Data: The Historian's Macroscope_](https://www.themacroscope.org/2.0/), (Imperial College Press, 2015). ## Reports * Maria José Afanador-Llach & Andrés Rivera, '[Segundo ciclo de talleres: Herramientas y procesos digitales para la investigación y creación en artes y humanidades](/researchpapers/Informe_final_Talleres%20EHCN_2023-ENG_PH.pdf)', (2023). * Incllewsion and the Programming Historian, 'Initial Accessibility Testing: Summary of Findings', (2021). -* Penny Andrews and the Programming Historian, ['The Programming Historian: developing and sustaining impact in the Global South'](http://doi.org/10.5281/zenodo.3813763) (2020). +* Penny Andrews and the Programming Historian, ['The Programming Historian: developing and sustaining impact in the Global South'](https://doi.org/10.5281/zenodo.3813763) (2020). * Amy Kavanagh and the Programming Historian, 'Programming Historian – Access for visually impaired researchers', (n.d.). ## Workshops & Events @@ -56,7 +56,7 @@ The project team and members of the wider community are involved in a number of * Alex Wermer-Colan, ['Learning Digital Methods with the _Programming Historian_'](https://charlesstudy.temple.edu/event/11953011), Temple University [Online], (22 February 2024). * Carlo Blum, Adam Crymble, Vicky Garnett, Timothée Giraud, Alíz Horváth, Stefan Krebs, Ralph Marschall, Sofia Papastamkou, & Lorella Viola, 'Invisible College of Digital History: Workshop on Multilingual Educational Resources', C²DH [Online], (8 November 2023). * Nabeel Siddiqui, 'Convolutional Neural Networks for Image Classification', University of Edinburgh [Online], (7 November 2023). -* Eric Brasil, '[História Digital e História Digital da Educação: Caminhos Cruzados](http://www.iea.usp.br/eventos/historia-digital-educacao-caminhos-cruzados)', Instituto de Estudos Avançados, USP, São Paulo, Brazil, (17 October 2023). +* Eric Brasil, '[História Digital e História Digital da Educação: Caminhos Cruzados](https://www.iea.usp.br/eventos/historia-digital-educacao-caminhos-cruzados)', Instituto de Estudos Avançados, USP, São Paulo, Brazil, (17 October 2023). * Scott Kleinman, Alex Wermer-Colan, Joana Vieira Paulino, Nabeel Siddiqui, Zoe LeBlanc, 'Developing a Digital Humanities Tutorial', [DH 2023](https://dh2023.adho.org/), Graz, Austria (10 July 2023). * Daphné Mathelier, 'Atelier Markdown', [11e journées du réseau Medici](https://medici2023.sciencesconf.org/resource/page/id/2), Université de Liège, Belgium, (29 June 2023). * María José Afanador Llach, Jennifer Isasi, Riva Quiroga, 'Sobre _Programming Historian en español_ y cómo contribuir a la publicación', Semana de Humanidades Digitales 2023 [Online], (10 May 2023). @@ -142,10 +142,10 @@ The project team and members of the wider community are involved in a number of * Adam Crymble, 'Facilitating Making in Digital Humanities', The Archaeology of Making, University of London, 5 May 2021. * Daniel Alves, Jennifer Isasi, Sarah Melton, Sofia Papastamkou, Jessica Parr, Riva Quiroga, Nabeel Siddiqui, Brandon Walsh, '[The Programming Historian: A Global Case Study in Multilingual Open Access and DH Tutelage/Instruction](https://msuglobaldh.org/abstracts/#programming-historian)' (panel), _Global Digital Humanities Symposium_, Michigan State University, East Lansing, USA, 12 April, 2021. * Jessica Parr, '[Cambridge Cultural Heritage Data School: Final plenary](https://www.cdh.cam.ac.uk/events/cambridge-cultural-heritage-data-school-final-plenary)', University of Cambridge, United Kingdom, 30 March 2021. -* Jennifer Isasi & Riva Quiroga, ['_Programming Historian_: Un proyecto colaborativo para poner la programación al alcance de los humanistas'](http://ixa2.si.ehu.eus/intele/?q=webinars), _INTELE : INfraestructura de TEcnologías del LEnguaje_, Spain, 25 March, 2021. +* Jennifer Isasi & Riva Quiroga, ['_Programming Historian_: Un proyecto colaborativo para poner la programación al alcance de los humanistas'](https://ixa2.si.ehu.eus/intele/?q=webinars), _INTELE : INfraestructura de TEcnologías del LEnguaje_, Spain, 25 March, 2021. * Sofia Papastamkou, Jessica Parr & Riva Quiroga, 'Challenges for Digital Literacy in the Humanities: The Open, Community-Based and Multilinguistic Approach of _The Programming Historian_', NewsEye’s International Conference, Europe, 17 March, 2021. * Riva Quiroga, ['Multilingual Digital Humanites'](https://mediacentral.ucl.ac.uk/Play/59506), Digital Humanities Long View Seminar, UCLDH, UK & CESTA, USA, 10 March, 2021. -* Brandon Walsh, '[The Programming Historian and Editorial Process in Digital Publishing](http://walshbr.com/blog/the-programming-historian-and-editorial-process-in-digital-publishing/)', Modern Languages Association Conference 2021, 7-10 January, 2021. +* Brandon Walsh, '[The Programming Historian and Editorial Process in Digital Publishing](https://walshbr.com/blog/the-programming-historian-and-editorial-process-in-digital-publishing/)', Modern Languages Association Conference 2021, 7-10 January, 2021. * Sofia Papastamkou, François Dominic Laramée, Martin Grandjean, '[Le Programming Historian en français: quelles ressources éducatives libres pour les méthodes numériques ?](https://zenodo.org/record/3819954)', *Humanistica 2020 Conference*, Bordeaux, France, 12-14 May 2020. * Sofia Papastamkou, 'A Beating Heart of Digital History: The Programming Historian', [Teaching Digital History Workshop](https://cas.au.dk/en/cedhar/events/show/artikel/teaching-digital-history-workshop), Center for Digital History Aarhus, University of Aarhus, Denmark, 23 October 2019. * Jennifer Isasi, Maria José Afanador y Antonio Rojas Castro, 'Retos en la producción de tutoriales de HD en contexto hispanohablantes', Conferencia ACH 2019, The Association for Computers and the Humanities, Pittsburgh, USA, 23-26 July, 2019. @@ -158,7 +158,7 @@ The project team and members of the wider community are involved in a number of * Victor Gayol, 'La investigación del pasado y la historia digital: análisis de datos y cómo aprender (The Programming Historian en español)', _Humanidades Digitales_, IV Feria Internacional de Ciencias Sociales y Humanidades, Centro Universitario de Los Lagos - Universidad de Guadalajara, Lagos de Moreno, Jalisco (9 March, 2017). * Victor Gayol, 'The Programming Historian: 'un modelo colaborativo para la investigación y la ensenñanza en ciencias sociales y humanidades digitales', _Mesa de Trabajo sobre Ciencias Sociales y Humanidades Digitales_, El Colegio De Michoacán, Mexico (21 February 2017). * Adam Crymble, 'Bringing Digital Humanities into the University for Free', University of Cape Town, South Africa (27-28 June 2016). -* Fred Gibbs, 'The Programming Historian' (Poster), _American Historical Association_, New York (January 2015). +* Fred Gibbs, 'The Programming Historian' (Poster), _American Historical Association_, New York (January 2015). * Adam Crymble, 'The Programming Historian 2', _Digital History Seminar_, Institute of Historical Research, London (13 October 2013). * Adam Crymble, 'The Programming Historian 2', _Digital Humanities 2012_, Hamburg (July 2012). @@ -170,11 +170,11 @@ The project team and members of the wider community are involved in a number of * Martin Dröge, 'Rezension zu The Programming Historian', _H-Soz-Kult_, 31.08.2019, . * Sue Levine, 'The Early-Stage Ph.D.'s Guide to Summer', _Inside Higher Education_, 10 June 2019, . * 'Championing open access with online digital history journal', _University of Sussex Press Office_, 9 October, 2018, . -* Adam Crymble, 'A Decade of Programming Historians', _Network in Canadian History & Environment_, 23 March, 2018, . -* Fred Gibbs, "Sustainable Publishing: Reflections of a Former Programming Historian Editor", FredGibbs.net, 2017, . -* Anaclet Pons, "The Programming Historian en español", _Clionauta: Blog de historia_, June 14, 2017, . +* Adam Crymble, 'A Decade of Programming Historians', _Network in Canadian History & Environment_, 23 March, 2018, . +* Fred Gibbs, "Sustainable Publishing: Reflections of a Former Programming Historian Editor", FredGibbs.net, 2017, . +* Anaclet Pons, "The Programming Historian en español", _Clionauta: Blog de historia_, June 14, 2017, . * Seth Denbo, “Historian, Program! Self-Help for Digital Neophytes,” _Perspectives on History: The Newsmagazine of the American Historical Association_, May 2017, . -* Víctor Gayol, '*The Programming Historian* en español', *Blog de Humanidades Digitales*, March 17, 2017, . +* Víctor Gayol, '*The Programming Historian* en español', *Blog de Humanidades Digitales*, March 17, 2017, . ## Projects Using the Programming Historian diff --git a/en/supporters.md b/en/supporters.md index af5983b76c..72554cec0d 100644 --- a/en/supporters.md +++ b/en/supporters.md @@ -28,7 +28,7 @@ Contributors to our [Institutional Partner Programme](support-us#institutional-p - [Cambridge Digital Humanities](https://www.cdh.cam.ac.uk/), United Kingdom - [Georg-August-Universität Göttingen](https://www.uni-goettingen.de/), Germany - [MIT Libraries](https://libraries.mit.edu/), United States -- [Center for Digital Research in the Humanities, University of Nebraska-Lincoln](http://cdrh.unl.edu/), United States +- [Center for Digital Research in the Humanities, University of Nebraska-Lincoln](https://cdrh.unl.edu/), United States - [The National Archives](https://www.nationalarchives.gov.uk/), United Kingdom - [College of the Liberal Arts, Penn State University](https://la.psu.edu/), United States - [Purdue University](https://www.purdue.edu/), United States diff --git a/en/translator-guidelines.md b/en/translator-guidelines.md index b57f07e75e..7ad9a2cf22 100644 --- a/en/translator-guidelines.md +++ b/en/translator-guidelines.md @@ -34,7 +34,7 @@ All of our lessons must also be written in Markdown and follow our technical for ## Submitting a Translated Lesson Once your translation file has been prepared to the above specifications, you are ready to submit it for peer review. -We have a [Programming Historian project page at GitHub](https://github.com/programminghistorian), where we maintain two repositories (a repository is a place to store related files and folders–you can think of it as a kind of folder). One of these, called [jekyll](https://github.com/programminghistorian/jekyll), hosts the code for the live version of the site you see at http://programminghistorian.org. The other repository is called [ph-submissions](https://github.com/programminghistorian/ph-submissions). +We have a [Programming Historian project page at GitHub](https://github.com/programminghistorian), where we maintain two repositories (a repository is a place to store related files and folders–you can think of it as a kind of folder). One of these, called [jekyll](https://github.com/programminghistorian/jekyll), hosts the code for the live version of the site you see at https://programminghistorian.org. The other repository is called [ph-submissions](https://github.com/programminghistorian/ph-submissions). Our preferred way for translators to submit a lesson is to add them directly to the [ph-submissions](https://github.com/programminghistorian/ph-submissions) repository (or repo, for short). Thanks to GitHub's features, you can do this using drag-and-drop uploading actions with which you are probably already familiar. As a new translator, here are the steps: diff --git a/es/acerca-de.md b/es/acerca-de.md index 2ee26103a5..8f9baa2685 100644 --- a/es/acerca-de.md +++ b/es/acerca-de.md @@ -14,7 +14,7 @@ Todos los tutoriales publicados en _The Programming Historian en español_ han s Nuestro proceso de revisión es un poco distinto al tradicional: no solicitamos una valoración sobre la calidad del texto, es decir, si merece ser publicado o no, sino que pedimos a nuestros revisores que participen de manera activa para mejorar el tutorial, de tal modo que todas las partes implicadas aprendan. Al hacernos cargo de un tutorial nuevo o bien de una traducción, seguimos un [flujo de trabajo]({{site.baseurl}}/es/guia-para-autores) específico con el objetivo de que la lección se publique en un período de tiempo razonable. Puedes consultar nuestra [guía para revisores]({{site.baseurl}}/es/guia-para-revisores) si deseas más información. ## Código abierto -En _The Programming Historian en español_ estamos comprometidos con el uso de herramientas y lenguajes accesibles para todo el mundo. Todas nuestras lecciones utilizan lenguajes y programas gratuitos. Creemos que cualquier persona debería poder hacer uso de nuestros tutoriales con independencia de la financiación disponible para llevar a cabo su proyecto de investigación. Desde 2016, se ha depositado una versión citable del proyecto _Programming Historian_ en [Zenodo](https://zenodo.org/). El depósito de 2022 está disponible en [doi.org/10.5281/zenodo.7313045](https://doi.org/10.5281/zenodo.7313045). Desde 2018, el [Archivo Web del Reino Unido](https://www.webarchive.org.uk/) rastrea e indexa a _The Programming Historian_ de manera regular. Estos datos son archivados y están disponibles de manera púbica [a través de su sitio web](https://www.webarchive.org.uk/wayback/en/archive/*/http://programminghistorian.org/). +En _The Programming Historian en español_ estamos comprometidos con el uso de herramientas y lenguajes accesibles para todo el mundo. Todas nuestras lecciones utilizan lenguajes y programas gratuitos. Creemos que cualquier persona debería poder hacer uso de nuestros tutoriales con independencia de la financiación disponible para llevar a cabo su proyecto de investigación. Desde 2016, se ha depositado una versión citable del proyecto _Programming Historian_ en [Zenodo](https://zenodo.org/). El depósito de 2022 está disponible en [doi.org/10.5281/zenodo.7313045](https://doi.org/10.5281/zenodo.7313045). Desde 2018, el [Archivo Web del Reino Unido](https://www.webarchive.org.uk/) rastrea e indexa a _The Programming Historian_ de manera regular. Estos datos son archivados y están disponibles de manera púbica [a través de su sitio web](https://www.webarchive.org.uk/wayback/en/archive/*/https://programminghistorian.org/). ## Acceso abierto *Diamond* @@ -26,7 +26,7 @@ _The Programming Historian en español_ (ISSN {{ site.data.snippets.issn[page.la ## Premios -The _Programming Historian_ ha ganado múltiples premios que reconocen y celebran nuestros logros en las esferas de la publicación en acceso abierto y de las humanidades digitales. En 2016, la revista en inglés fue la ganadora del [Digital Humanities Awards](http://dhawards.org/dhawards2016/results/) en la categoría de Mejor Serie de Posts y, al año siguiente, 2017, _Programming Historian en español_ [recibió el mismo galardón](http://dhawards.org/dhawards2017/results/). En 2018, la [Asociación de Humanidades Digitales Hispánicas](http://humanidadesdigitaleshispanicas.es/) otorgó el premio de 'Mejor iniciativa formativa desarrollada durante el año 2018' a _Programming Historian en español_. Recibimos el [Canadian Social Knowledge Institute's Open Scholarship Award](https://etcl.uvic.ca/events-activities/open-scholarship-awards/) en 2020 y en 2021 nuestro trabajo fue reconocido con un [Coko Foundation's Open Publishing Award](https://openpublishingawards.org/results/2021/index.html) en la categoría de Contenido Abierto. En 2022, ganamos la categoría de Mejor material de formación en DH de los [Digital Humanities Awards](http://dhawards.org/dhawards2022/results/). +The _Programming Historian_ ha ganado múltiples premios que reconocen y celebran nuestros logros en las esferas de la publicación en acceso abierto y de las humanidades digitales. En 2016, la revista en inglés fue la ganadora del [Digital Humanities Awards](https://dhawards.org/dhawards2016/results/) en la categoría de Mejor Serie de Posts y, al año siguiente, 2017, _Programming Historian en español_ [recibió el mismo galardón](https://dhawards.org/dhawards2017/results/). En 2018, la [Asociación de Humanidades Digitales Hispánicas](https://humanidadesdigitaleshispanicas.es/) otorgó el premio de 'Mejor iniciativa formativa desarrollada durante el año 2018' a _Programming Historian en español_. Recibimos el [Canadian Social Knowledge Institute's Open Scholarship Award](https://etcl.uvic.ca/events-activities/open-scholarship-awards/) en 2020 y en 2021 nuestro trabajo fue reconocido con un [Coko Foundation's Open Publishing Award](https://openpublishingawards.org/results/2021/index.html) en la categoría de Contenido Abierto. En 2022, ganamos la categoría de Mejor material de formación en DH de los [Digital Humanities Awards](https://dhawards.org/dhawards2022/results/). ## Política de diversidad @@ -40,4 +40,4 @@ Para ver un listado de patrocinadores y ayudas, visita nuestra página '[Apóyan ## Historia del proyecto -*The Programming Historian* fue fundado en 2008 por William J. Turkel y Alan MacEachern. En aquel entonces, Turkel publicó [un blog post](http://digitalhistoryhacks.blogspot.com/2008/01/programming-historian.html) en el que explicó sus ideas para el proyecto. Se centró principalmente en Python y se publicó en acceso abierto como un proyecto de "Infraestructura digital" en la Red de *Historia y Medio Ambiente de Canada* (Network in Canadian History & Environment (NiCHE)). En 2012, *The Programming Historian* expandió su equipo editorial y se presentó como una revista académica de metodología para historiadores digitales, de revisión por pares y de acceso abierto. En 2016 añadimos una publicación en español a la publicación inicial en inglés y en 2017 empezamos a publicar lecciones traducidas bajo el título *The Programming Historian en español*. En 2018 [organizamos nuestro primer taller de escritura en español](/posts/bogota-workshop-report) y [abrimos una convocatoria para lecciones en español](/posts/convocatoria-de-tutoriales). En ese mismo año añadimos una publicación en francés que lanzó *Programming Historian en français* en 2019. Un año después, un equipo de habla portuguesa se nos unió e inauguramos *[Programming Historian em português]({{site.baseurl}}/pt)* a principios de 2021. +*The Programming Historian* fue fundado en 2008 por William J. Turkel y Alan MacEachern. En aquel entonces, Turkel publicó [un blog post](https://digitalhistoryhacks.blogspot.com/2008/01/programming-historian.html) en el que explicó sus ideas para el proyecto. Se centró principalmente en Python y se publicó en acceso abierto como un proyecto de "Infraestructura digital" en la Red de *Historia y Medio Ambiente de Canada* (Network in Canadian History & Environment (NiCHE)). En 2012, *The Programming Historian* expandió su equipo editorial y se presentó como una revista académica de metodología para historiadores digitales, de revisión por pares y de acceso abierto. En 2016 añadimos una publicación en español a la publicación inicial en inglés y en 2017 empezamos a publicar lecciones traducidas bajo el título *The Programming Historian en español*. En 2018 [organizamos nuestro primer taller de escritura en español](/posts/bogota-workshop-report) y [abrimos una convocatoria para lecciones en español](/posts/convocatoria-de-tutoriales). En ese mismo año añadimos una publicación en francés que lanzó *Programming Historian en français* en 2019. Un año después, un equipo de habla portuguesa se nos unió e inauguramos *[Programming Historian em português]({{site.baseurl}}/pt)* a principios de 2021. diff --git a/es/colaboradores.md b/es/colaboradores.md index 7a8cc15fa2..4680a3b48a 100644 --- a/es/colaboradores.md +++ b/es/colaboradores.md @@ -29,7 +29,7 @@ Contribuidores de nuestro [Programa de Instituciones Asociadas](pia): - [Cambridge Digital Humanities](https://www.cdh.cam.ac.uk/), Reino Unido - [Georg-August-Universität Göttingen](https://www.uni-goettingen.de/), Alemania - [MIT Libraries](https://libraries.mit.edu/), Estados Unidos -- [Center for Digital Research in the Humanities, University of Nebraska-Lincoln](http://cdrh.unl.edu/), Estados Unidos +- [Center for Digital Research in the Humanities, University of Nebraska-Lincoln](https://cdrh.unl.edu/), Estados Unidos - [The National Archives](https://www.nationalarchives.gov.uk/), Reino Unido - [College of the Liberal Arts, Penn State University](https://la.psu.edu/), Estados Unidos - [Purdue University](https://www.purdue.edu/), Estados Unidos diff --git a/es/contribuciones.md b/es/contribuciones.md index e35c9e08bf..67983cee86 100644 --- a/es/contribuciones.md +++ b/es/contribuciones.md @@ -14,7 +14,7 @@ _Programming Historian en español_ es posible gracias al esfuerzo de voluntario Si dominas de más de uno de nuestros idiomas (francés, español, inglés, portugués), puedes ponerte en contacto con nosotros para traducir una lección ya publicada en _Programming Historian_ de un idioma a otro. De esta manera nos ayudarás en nuestra contribucion en las comunidades de humanidades digitales en español y francés, y profundizarás en un lenguaje, método o tecnología. -Buscamos traducciones rigurosas y de lectura amena que tengan en cuenta los contextos de investigación hispánico, lusófono y francés así como los recursos disponibles en nuestras respectivas comunidades. Puesto que muchos de lo tecnicismos son nuevos y/o todavía no están recogidos en los diccionarios, también recomendamos el uso de la [Taxonomía sobre Actividades de investigación digital en humanidades](http://vocabularios.caicyt.gov.ar/portalthes/index.php?v=42) de TaDiRAH y el [Glosario de Preservación Archivística Digital (Versión 4.0)](http://www.mecd.gob.es/planes-nacionales/dam/jcr:f20a4ba1-0ed2-445d-9be9-b8b0382562ea/mex-glosario-interpares-total0112.pdf) de Voutssas-M y Barnard Amozorrutia (UNAM). +Buscamos traducciones rigurosas y de lectura amena que tengan en cuenta los contextos de investigación hispánico, lusófono y francés así como los recursos disponibles en nuestras respectivas comunidades. Puesto que muchos de lo tecnicismos son nuevos y/o todavía no están recogidos en los diccionarios, también recomendamos el uso de la [Taxonomía sobre Actividades de investigación digital en humanidades](https://vocabularios.caicyt.gov.ar/portalthes/index.php?v=42) de TaDiRAH y el [Glosario de Preservación Archivística Digital (Versión 4.0)](https://www.mecd.gob.es/planes-nacionales/dam/jcr:f20a4ba1-0ed2-445d-9be9-b8b0382562ea/mex-glosario-interpares-total0112.pdf) de Voutssas-M y Barnard Amozorrutia (UNAM). Si te interesa colaborar, consulta nuestras [instrucciones para autores y traductores](/es/guia-para-autores.html). ## Revisa una lección @@ -63,7 +63,7 @@ Agradecemos de manera especial alertas sobre lecciones que no funcionan. A medid {{ site.data.snippets.library-catalogue-image-alt[page.lang] }} -_Programming Historian_ está registrado en WorldCat en [español](https://www.worldcat.org/title/programming-historian-en-espanol/oclc/1061292935&referer=brief_results), en [inglés](http://www.worldcat.org/title/programming-historian/oclc/951537099), en [francés](https://uva.worldcat.org/title/programming-historian-en-franais/oclc/1104391842), y en [portugués](https://search.worldcat.org/title/1332987197). +_Programming Historian_ está registrado en WorldCat en [español](https://www.worldcat.org/title/programming-historian-en-espanol/oclc/1061292935&referer=brief_results), en [inglés](https://www.worldcat.org/title/programming-historian/oclc/951537099), en [francés](https://uva.worldcat.org/title/programming-historian-en-franais/oclc/1104391842), y en [portugués](https://search.worldcat.org/title/1332987197). Gracias a [University of Purdue library] y a Amanda Visconti, y University of Virginia library. Y ha sido indexado por el [Directory of Open Access Journals]. Este proyecto se propone demostrar cómo deben ser las publicaciones académicas en abierto. Por favor, ayúdanos a difundir nuestro mensaje pidiendo a tu bibliotecario o bibliotecaria que añade este recurso al catálogo de tu biblioteca. @@ -83,6 +83,6 @@ Si se te ocurren más formas de participación, siempre puedes [escribirnos un e [revisores]: /es/guia-para-revisores [Guía para editores]: /es/guia-para-revisores [comentarios]: /es/retroalimentacion -[WorldCat]: http://www.worldcat.org/title/programming-historian/oclc/951537099 -[University of Purdue library]: http://purdue-primo-prod.hosted.exlibrisgroup.com/primo_library/libweb/action/dlDisplay.do?vid=PURDUE&search_scope=everything&docId=PURDUE_ALMA51671812890001081&fn=permalink +[WorldCat]: https://www.worldcat.org/title/programming-historian/oclc/951537099 +[University of Purdue library]: https://purdue-primo-prod.hosted.exlibrisgroup.com/primo_library/libweb/action/dlDisplay.do?vid=PURDUE&search_scope=everything&docId=PURDUE_ALMA51671812890001081&fn=permalink [Directory of Open Access Journals]: https://doaj.org/toc/2397-2068 diff --git a/es/donaciones.md b/es/donaciones.md index a82e6816d5..8ec2c1d320 100644 --- a/es/donaciones.md +++ b/es/donaciones.md @@ -19,7 +19,7 @@ Tu colaboración apoya directamente la infrastructura que de nuestras publicacio
    - + diff --git a/es/guia-editor.md b/es/guia-editor.md index ef97d3d36d..cde0d142b3 100644 --- a/es/guia-editor.md +++ b/es/guia-editor.md @@ -45,7 +45,7 @@ A continuación, el editor creará un *issue* en el [repositorio de GitHub](http Si la lección no es entregada en la [fecha acordada], el editor intentará contactar con el autor o autores de la lección. Si no recibe noticias, el ticket se cerrará. Éste podrá abrirse en el futuro a petición del autor o autores. - El principal contacto para esta lección es [nombre del editor]. Si se produce algún problema, el autor puede contactar con nuestros ’ombudsperson' (Silvia Gutiérrez De la Torre - http://programminghistorian.org/es/equipo-de-proyecto). + El principal contacto para esta lección es [nombre del editor]. Si se produce algún problema, el autor puede contactar con nuestros ’ombudsperson' (Silvia Gutiérrez De la Torre - https://programminghistorian.org/es/equipo-de-proyecto). Este texto, sin embargo, puede editarse y adaptarse a las necesidades para reflejar más objetivos o lo que se ha negociado entre el editor y el autor. @@ -151,7 +151,7 @@ Desde un punto de vista técnico, estas son las áreas en las que tendrás que i El **editor** debe sugerir un nombre para el archivo de la traducción o lección nueva conforme a las siguientes pautas: - El nombre debe ser corto pero descriptivo pues se convertirá en el *slug* de la lección cuando se publique (es decir, la terminación de la URL). -- Una buena URL debería encajar en una diapositiva, debería ser fácil de recordar y debería describir el contenido de la lección. Nuestras URLS tienen el siguiente formato: http://programminghistorian.org/es/lecciones/NOMBRE-DEL-ARCHIVO-AQUI +- Una buena URL debería encajar en una diapositiva, debería ser fácil de recordar y debería describir el contenido de la lección. Nuestras URLS tienen el siguiente formato: https://programminghistorian.org/es/lecciones/NOMBRE-DEL-ARCHIVO-AQUI - No introduzcas espacios en el nombre del archivo; en su lugar utiliza guiones. - La extensión del arhivo debe ser `.md` con el objetivo de que GitHub genere una visualización provisional de la lección. @@ -319,11 +319,11 @@ Las lecciones se representan mediante una imagen `vintage` que refleja algún el Puedes buscar imágenes en los recursos siguientes: - - [Europeana](http://www.europeana.eu/portal/en) + - [Europeana](https://www.europeana.eu/portal/en) - [British Library](https://www.flickr.com/photos/britishlibrary) - [Internet Archive Book Images](https://archive.org/details/bookimages) - [Virtual Manuscript Library of Switzerland](https://www.flickr.com/photos/e-codices) - - [Library of Congress Maps](http://www.loc.gov/maps/collections) + - [Library of Congress Maps](https://www.loc.gov/maps/collections) Si como editor estás buscando una imagen para una lección nueva, asegúrate de que la imagen sigue el mismo estilo que las imágenes anteriores; debería ser una ilustración, no una fotografía, tener al menos 200 píxeles de anchura y altura, y estar libre de derechos. Asegúrate de que la magen no es ofensiva y ten en cuenta nuestro [compromiso con la diversidad](/posts/PH-commitment-to-diversity); en otras palabras, intenta encontrar una imagen que no perpetúe estereotipos o envíe mensajes sutiles sobre la masculinidad y la raza blanca. diff --git a/es/guia-para-revisores.md b/es/guia-para-revisores.md index 01397f7a91..48855f44d0 100644 --- a/es/guia-para-revisores.md +++ b/es/guia-para-revisores.md @@ -68,7 +68,7 @@ De manera más específica, en cuanto a las traducciones, apreciamos el rigor pe - ¿Las capturas de pantalla y trozos de código han sido adaptados? - ¿Se han añadido referencias bibliográficas en español? -Puesto que muchos de lo tecnicismos son nuevos y/o todavía no están recogidos en los diccionarios, recomendamos el uso de la [Taxonomía sobre Actividades de investigación digital en humanidades](http://vocabularios.caicyt.gov.ar/portalthes/index.php?v=42) de TaDiRAH y el [Glosario de Preservación Archivística Digital (Versión 4.0)](http://www.mecd.gob.es/planes-nacionales/dam/jcr:f20a4ba1-0ed2-445d-9be9-b8b0382562ea/mex-glosario-interpares-total0112.pdf) de Voutssas-M y Barnard Amozorrutia (UNAM). +Puesto que muchos de lo tecnicismos son nuevos y/o todavía no están recogidos en los diccionarios, recomendamos el uso de la [Taxonomía sobre Actividades de investigación digital en humanidades](https://vocabularios.caicyt.gov.ar/portalthes/index.php?v=42) de TaDiRAH y el [Glosario de Preservación Archivística Digital (Versión 4.0)](https://www.mecd.gob.es/planes-nacionales/dam/jcr:f20a4ba1-0ed2-445d-9be9-b8b0382562ea/mex-glosario-interpares-total0112.pdf) de Voutssas-M y Barnard Amozorrutia (UNAM). ### Lecciones nuevas Por lo que respecta a las lecciones nuevas, queremos que las explicaciones técnicas (y el nivel de dificultad) sea constante a lo largo del tutorial. En tanto que revisor, queremos que, en un tutorial dirigido a un usuario experimentado, seas capaz de detectar pasajes que explican en detalle un concepto demasiado simple. Y a la inversa: queremos evitar tutoriales dirigidos a principiantees que no explican de manera adecuada un concepto fundamental para entender la lección. Aspectos a tener en cuenta: diff --git a/es/investigacion.md b/es/investigacion.md index 6dbdcebe76..27a7de72ad 100644 --- a/es/investigacion.md +++ b/es/investigacion.md @@ -9,11 +9,11 @@ original: research El equipo del proyecto y los miembros de la comunidad en general están involucrados en una serie de iniciativas académicas relacionadas con nuestro trabajo aquí en *The Programming Historian*. Estas iniciativas incluyen eventos, artículos en revistas académicas, reseñas (de nosotros por la comunidad) y carteles. Si tú estás desarrollando una investigación académica usando los materiales de este proyecto, por favor contacta con nuestra asistente de publicación Anisa Hawes. ## *Programming Historian* original -* William J. Turkel y Alan MacEachern, [_The Programming Historian_](http://niche-canada.org/wp-content/uploads/2013/09/programming-historian-1.pdf) 1a edición (Network in Canadian History & Environment: 2007-2008). +* William J. Turkel y Alan MacEachern, [_The Programming Historian_](https://niche-canada.org/wp-content/uploads/2013/09/programming-historian-1.pdf) 1a edición (Network in Canadian History & Environment: 2007-2008). * Traducción al japonés de William J. Turkel y Alan MacEachern, [_The Programming Historian_](https://www.dh.ku-orcas.kansai-u.ac.jp/?cat=2), 1a edición (Network in Canadian History & Environment: 2007-2008). ### Reseñas -* Björn Ekström, Elisa Tattersall Wallin and Hana Marčetić, '[_Programming Historian_: Novice-friendly tutorials on digital methods](http://www.diva-portal.org/smash/record.jsf?pid=diva2%3A1508542&dswid=7551)', _Tidskrift för ABM_, Vol. 5, no 1 (2020), pp. 71-75. +* Björn Ekström, Elisa Tattersall Wallin and Hana Marčetić, '[_Programming Historian_: Novice-friendly tutorials on digital methods](https://www.diva-portal.org/smash/record.jsf?pid=diva2%3A1508542&dswid=7551)', _Tidskrift för ABM_, Vol. 5, no 1 (2020), pp. 71-75. * Dries Daems, '[A Review and Roadmap of Online Learning Platforms and Tutorials in Digital Archaeology](https://doi.org/10.1017/aap.2019.47)', _Advances in Archaeological Practice_, vol. 8, issue 1 (2020), pp. 87-92. * Martin Dröge, '[Review of: The Programming Historian](https://www.hsozkult.de/webreview/id/rezwww-184)', _H-Soz-Kult_ (2019). * Priscila Pilatowsky Goñi, '[Reseña a The programming historian](https://revistas.uned.es/index.php/RHD/article/view/22420)', _Revista de Humanidades Digitales_, vol. 2 (2018). @@ -28,22 +28,22 @@ El equipo del proyecto y los miembros de la comunidad en general están involucr * Jennifer Isasi, Riva Quiroga, Nabeel Sidiqqui, Joana Vieira Paulino, Alex Wermer-Colan, [“A Model for Multilingual and Multicultural Digital Scholarship Methods Publishing"](https://www.taylorfrancis.com/chapters/edit/10.4324/9781003393696-3/model-multilingual-multicultural-digital-scholarship-methods-publishing-jennifer-isasi-riva-quiroga-nabeel-siddiqui-joana-vieira-paulino-alex-wermer-colan), en _Multilingual Digital Humanities_, editado por Viola, L., & Spence, P., Routledge, 2023. * Adam Crymble & Charlotte M. H. Im, ['Measuring digital humanities learning requirements in Spanish & English-speaking practitioner communities'](https://doi.org/10.1007/s42803-023-00066-x), International Journal of Digital Humanities, (2023). * Eric Brasil, '[_pyHDB - Ferramenta Heurística para a Hemeroteca Digital Brasileira: utilizando técnicas de web scraping para a pesquisa em História_'](https://doi.org/10.15848/hh.v15i40.1904), _História Da Historiografia: International Journal of Theory and History of Historiography_, 15(40) (2022), 186–217. -* Matthew Lincoln, Sarah Melton, Jennifer Isasi, François Dominic Laramée, '[Relocating Complexity: _The Programming Historian_ and Multilingual Static Site Generation](http://www.digitalhumanities.org/dhq/vol/16/2/000585/000585.html)', _Digital Humanities Quarterly_ 16, 2 (2022). +* Matthew Lincoln, Sarah Melton, Jennifer Isasi, François Dominic Laramée, '[Relocating Complexity: _The Programming Historian_ and Multilingual Static Site Generation](https://www.digitalhumanities.org/dhq/vol/16/2/000585/000585.html)', _Digital Humanities Quarterly_ 16, 2 (2022). * Jennifer Isasi y Antonio Rojas Castro, ‘[¿Sin equivalencia? Una reflexión sobre la traducción al español de recursos educativos abiertos](https://muse.jhu.edu/article/842253)’, _Hispania_, 104, no. 4 (2021), 613-624. * Adam Crymble y Maria José Afanador Llach, ‘The Globally Unequal Promise of Digital Tools for History: UK and Colombia Case Study’ en _Teaching History for the Contemporary World_, editado por Adele Nye, 85-98, Springer, 2021. * Daniel Alves, ['Ensinar Humanidades Digitais sem as Humanidades Digitais: um olhar a partir das licenciaturas em História'](https://novaresearch.unl.pt/files/32228034/Ensinar_Humanidades_Digitais.pdf), _Revista EducaOnline_, v. 15, n. 2 (2021). * Adam Crymble, [_Technology & the Historian: Transformations in the Digital Age_](https://www.press.uillinois.edu/books/catalog/57hxp7wr9780252043710.html), (University of Illinois Press, 2021). * Anna-Maria Sichani, James Baker, Maria José Afanador Llach, y Brandon Walsh, [‘Diversity and Inclusion in Digital Scholarship and Pedagogy: The Case of The Programming Historian’](https://doi.org/10.1629/uksg.465), _Insights_, (2019). -* Katrina Navickas y Adam Crymble, ['From Chartist Newspaper to Digital Map of Grass-roots Meetings, 1841-44: Documenting Workflows'](http://www.tandfonline.com/doi/full/10.1080/13555502.2017.1301179), _Journal of Victorian Culture_, (2017). +* Katrina Navickas y Adam Crymble, ['From Chartist Newspaper to Digital Map of Grass-roots Meetings, 1841-44: Documenting Workflows'](https://www.tandfonline.com/doi/full/10.1080/13555502.2017.1301179), _Journal of Victorian Culture_, (2017). * Adam Crymble, ['Identifying and Removing Gender Barriers in Open Learning Communities: The Programming Historian'], _Blended Learning in Practice_, (2016), 49-60. [[pre-print pdf](/researchpapers/openLearningCommunities2016.pdf)] * Fred Gibbs, ‘[Editorial Sustainability and Open Peer Review at Programming Historian]’, *DH Commons*, Vol. 1 (2015). -* Shawn Graham, Ian Milligan, y Scott Weingart, [_Exploring Big Historical Data: The Historian's Macroscope_](http://www.themacroscope.org/2.0/), (Imperial College Press, 2015). +* Shawn Graham, Ian Milligan, y Scott Weingart, [_Exploring Big Historical Data: The Historian's Macroscope_](https://www.themacroscope.org/2.0/), (Imperial College Press, 2015). ### Reportes * Maria José Afanador-Llach & Andrés Rivera, '[Segundo ciclo de talleres: Herramientas y procesos digitales para la investigación y creación en artes y humanidades](/researchpapers/Informe_final_Talleres%20EHCN_2023-ENG_PH.pdf)', (2023). * Incllewsion and the Programming Historian, 'Initial Accessibility Testing: Summary of Findings', (2021). -* Penny Andrews and the Programming Historian, ['The Programming Historian: developing and sustaining impact in the Global South'](http://doi.org/10.5281/zenodo.3813763) (2020). +* Penny Andrews and the Programming Historian, ['The Programming Historian: developing and sustaining impact in the Global South'](https://doi.org/10.5281/zenodo.3813763) (2020). * Amy Kavanagh and the Programming Historian, 'Programming Historian – Access for visually impaired researchers', (n.d.). ### Talleres y eventos @@ -54,7 +54,7 @@ El equipo del proyecto y los miembros de la comunidad en general están involucr * Alex Wermer-Colan, ['Learning Digital Methods with the _Programming Historian_'](https://charlesstudy.temple.edu/event/11953011), Temple University [En línea], (22 de febrero de 2024). * Carlo Blum, Adam Crymble, Vicky Garnett, Timothée Giraud, Alíz Horváth, Stefan Krebs, Ralph Marschall, Sofia Papastamkou, & Lorella Viola, 'Invisible College of Digital History: Workshop on Multilingual Educational Resources', C²DH [En línea], (8 de noviembre de 2023). * Nabeel Siddiqui, 'Convolutional Neural Networks for Image Classification', University of Edinburgh [En línea], (7 de noviembre de 2023). -* Eric Brasil, '[História Digital e História Digital da Educação: Caminhos Cruzados](http://www.iea.usp.br/eventos/historia-digital-educacao-caminhos-cruzados)', Instituto de Estudos Avançados, USP, São Paulo, Brasil, (17 de octubre 2023). +* Eric Brasil, '[História Digital e História Digital da Educação: Caminhos Cruzados](https://www.iea.usp.br/eventos/historia-digital-educacao-caminhos-cruzados)', Instituto de Estudos Avançados, USP, São Paulo, Brasil, (17 de octubre 2023). * Scott Kleinman, Alex Wermer-Colan, Joana Vieira Paulino, Nabeel Siddiqui, Zoe LeBlanc, 'Developing a Digital Humanities Tutorial', [DH 2023](https://dh2023.adho.org/), Graz, Austria, (10 de julio de 2023). * Daphné Mathelier, 'Atelier Markdown', [11e journées du réseau Medici](https://medici2023.sciencesconf.org/resource/page/id/2), Université de Liège, Bélgica, (29 de junio de 2023). * María José Afanador Llach, Jennifer Isasi, Riva Quiroga, 'Sobre _Programming Historian en español_ y cómo contribuir a la publicación', Semana de Humanidades Digitales 2023 [En línea], (10 de mayo de 2023). @@ -139,10 +139,10 @@ El equipo del proyecto y los miembros de la comunidad en general están involucr * Adam Crymble, 'Facilitating Making in Digital Humanities', The Archaeology of Making, University of London, Reino Unido, 5 de mayo 2021. * Daniel Alves, Jennifer Isasi, Sarah Melton, Sofia Papastamkou, Jessica Parr, Riva Quiroga, Nabeel Siddiqui, Brandon Walsh, '[The Programming Historian: A Global Case Study in Multilingual Open Access and DH Tutelage/Instruction](https://msuglobaldh.org/abstracts/#programming-historian)' (panel), _Global Digital Humanities Symposium_, Michigan State University, East Lansing, USA, 12 de abril, 2021. * Jessica Parr, '[Cambridge Cultural Heritage Data School: Final plenary](https://www.cdh.cam.ac.uk/events/cambridge-cultural-heritage-data-school-final-plenary)', University of Cambridge, Reino Unido, 30 de marzo 2021. -* Jennifer Isasi & Riva Quiroga, ['_Programming Historian_: Un proyecto colaborativo para poner la programación al alcance de los humanistas'](http://ixa2.si.ehu.eus/intele/?q=webinars), _INTELE : INfraestructura de TEcnologías del LEnguaje_, España, 25 de marzo, 2021. +* Jennifer Isasi & Riva Quiroga, ['_Programming Historian_: Un proyecto colaborativo para poner la programación al alcance de los humanistas'](https://ixa2.si.ehu.eus/intele/?q=webinars), _INTELE : INfraestructura de TEcnologías del LEnguaje_, España, 25 de marzo, 2021. * Sofia Papastamkou, Jessica Parr & Riva Quiroga, 'Challenges for Digital Literacy in the Humanities: The Open, Community-Based and Multilinguistic Approach of _The Programming Historian_', NewsEye’s International Conference, Europa, 17 de marzo, 2021. * Riva Quiroga, ['Multilingual Digital Humanites'](https://mediacentral.ucl.ac.uk/Play/59506), Digital Humanities Long View Seminar, UCLDH, UK & CESTA, USA, 10 de marzo, 2021. -* Brandon Walsh, '[The Programming Historian and Editorial Process in Digital Publishing](http://walshbr.com/blog/the-programming-historian-and-editorial-process-in-digital-publishing/)', Modern Languages Association Conference 2021, USA, 7-10 de enero, 2021. +* Brandon Walsh, '[The Programming Historian and Editorial Process in Digital Publishing](https://walshbr.com/blog/the-programming-historian-and-editorial-process-in-digital-publishing/)', Modern Languages Association Conference 2021, USA, 7-10 de enero, 2021. * Sofia Papastamkou, François Dominic Laramée, Martin Grandjean, '[Le Programming Historian en français: quelles ressources éducatives libres pour les méthodes numériques ?](https://zenodo.org/record/3819954)', *Humanistica 2020*, Bordeaux, France, 12-14 de mayo 2020. * Sofia Papastamkou, 'A Beating Heart of Digital History: The Programming Historian', [Teaching Digital History Workshop](https://cas.au.dk/en/cedhar/events/show/artikel/teaching-digital-history-workshop), Center for Digital History Aarhus, University of Aarhus, Dinamarca, 23 de octubre 2019. * Jennifer Isasi, Maria José Afanador y Antonio Rojas Castro, 'Retos en la producción de tutoriales de HD en contexto hispanohablantes', Conferencia ACH 2019, The Association for Computers and the Humanities, Pittsburgh, 23 al 26 de julio, 2019, Pittsburgh. @@ -155,12 +155,12 @@ El equipo del proyecto y los miembros de la comunidad en general están involucr * Victor Gayol, 'La investigación del pasado y la historia digital: análisis de datos y cómo aprender (The Programming Historian en español)', _Humanidades Digitales_, IV Feria Internacional de Ciencias Sociales y Humanidades, Centro Universitario de Los Lagos - Universidad de Guadalajara, Lagos de Moreno, Jalisco (9 de marzo, 2017). * Victor Gayol, 'The Programming Historian: 'un modelo colaborativo para la investigación y la enseñanza en ciencias sociales y humanidades digitales', _Mesa de Trabajo sobre Ciencias Sociales y Humanidades Digitales_, El Colegio De Michoacán, México (21 de febrero, 2017). * Adam Crymble, 'Bringing Digital Humanities into the University for Free', University of Cape Town, South Africa (27-28 junio 2016). -* Fred Gibbs, ‘The Programming Historian’ (Cartel), *American Historical Association*, New York (enero 2015). +* Fred Gibbs, ‘The Programming Historian’ (Cartel), *American Historical Association*, New York (enero 2015). * Adam Crymble, ‘The Programming Historian 2’, *Digital History Seminar*, Institute of Historical Research, London (13 octubre 2013). * Adam Crymble, ‘The Programming Historian 2’, *Digital Humanities 2012*, Hamburg (julio 2012). -* Anaclet Pons, “The Programming Historian en español”, Clionauta: Blog de historia, junio 14, 2017, http://clionauta.hypotheses.org/16979 +* Anaclet Pons, “The Programming Historian en español”, Clionauta: Blog de historia, junio 14, 2017, https://clionauta.hypotheses.org/16979 * Seth Denbo, “Historian, Program! Self-Help for Digital Neophytes,” Perspectives on History: The Newsmagazine of the American Historical Association, mayo 2017, https://www.historians.org/publications-and-directories/perspectives-on-history/may-2017/historian-program-self-help-digital-neophytes. -* Víctor Gayol, ‘The Programming Historian en español’, Blog de Humanidades Digitales, marzo 17, 2017, http://humanidadesdigitales.net/blog/2017/03/17/the-programming-historian-en-espanol/. +* Víctor Gayol, ‘The Programming Historian en español’, Blog de Humanidades Digitales, marzo 17, 2017, https://humanidadesdigitales.net/blog/2017/03/17/the-programming-historian-en-espanol/. ### Editoriales @@ -169,11 +169,11 @@ El equipo del proyecto y los miembros de la comunidad en general están involucr * Matthew Lincoln, 'Multilingual Jekyll: How The Programming Historian Does That', *matthewlincoln.net*, 1 de marzo 2020, . * Sue Levine, 'The Early-Stage Ph.D.'s Guide to Summer', _Inside Higher Education_, 10 en junio 2019, . * 'Championing open access with online digital history journal', _University of Sussex Press Office_, 9 de octubre, 2018, . -* Adam Crymble, 'A Decade of Programming Historians', _Network in Canadian History & Environment_, 23 de marzo, 2018, . -* Fred Gibbs, "Sustainable Publishing: Reflections of a Former Programming Historian Editor", FredGibbs.net, 2017, . -* Anaclet Pons, "The Programming Historian en español", *Clionauta: blog de historia*, 14 de junio, 2017 . +* Adam Crymble, 'A Decade of Programming Historians', _Network in Canadian History & Environment_, 23 de marzo, 2018, . +* Fred Gibbs, "Sustainable Publishing: Reflections of a Former Programming Historian Editor", FredGibbs.net, 2017, . +* Anaclet Pons, "The Programming Historian en español", *Clionauta: blog de historia*, 14 de junio, 2017 . * Seth Denbo, “Historian, Program! Self-Help for Digital Neophytes,” _Perspectives on History: The Newsmagazine of the American Historical Association_, May 2017, . -* Víctor Gayol, '*The Programming Historian* en español', *Blog de Humanidades Digitales*, 17 de marzo, 2017, +* Víctor Gayol, '*The Programming Historian* en español', *Blog de Humanidades Digitales*, 17 de marzo, 2017, ### Proyectos que utilizan *The Programming Historian* @@ -184,9 +184,9 @@ El equipo del proyecto y los miembros de la comunidad en general están involucr [Review of the Programming Historian]: https://academic.oup.com/jah/article-abstract/103/1/299/1751315 -[Review of the Programming Historian]: http://jitp.commons.gc.cuny.edu/review-of-the-programming-historian +[Review of the Programming Historian]: https://jitp.commons.gc.cuny.edu/review-of-the-programming-historian ['Identifying and Removing Gender Barriers in Open Learning Communities: The Programming Historian']: https://www.herts.ac.uk/__data/assets/pdf_file/0016/138013/Blip-2016-Autumn-2016-Final-Autumn-2016.pdf ['pre-print pdf']: /researchpapers/openLearningCommunities2016.pdf -[Editorial Sustainability and Open Peer Review at Programming Historian]: http://web.archive.org/web/20180713014622/http://dhcommons.org/journal/issue-1/editorial-sustainability-and-open-peer-review-programming-historian +[Editorial Sustainability and Open Peer Review at Programming Historian]: https://web.archive.org/web/20180713014622/https://dhcommons.org/journal/issue-1/editorial-sustainability-and-open-peer-review-programming-historian ['Digital Project Consultations']: https://dhatasa2015.wordpress.com/ [Library Carpentry: software skills training for library professionals]: https://liberquarterly.eu/article/view/10847 diff --git a/es/lecciones/administracion-de-datos-en-r.md b/es/lecciones/administracion-de-datos-en-r.md index c47e38b757..f2a06569a5 100644 --- a/es/lecciones/administracion-de-datos-en-r.md +++ b/es/lecciones/administracion-de-datos-en-r.md @@ -44,7 +44,7 @@ Al final de la lección, ## Introducción Los datos que puedes encontrar disponibles en red raramente están en el formato necesario para su análisis y necesitarás manipularlos antes de explorar las preguntas que te interesan. ¡Esto puede llevar más tiempo que el análisis! En este tutorial vamos a aprender algunas técnicas básicas de manipulación, manejo y administración de tus datos en R. Más específicamente, vamos a seguir la filosofía de "datos limpios" o [*"tidy data"*](https://www.jstatsoft.org/article/view/v059i10) articulada por Hadley Wickham. -Según [Wickham](http://hadley.nz), los datos están "limpios" cuando cumplen tres criterios: +Según [Wickham](https://hadley.nz), los datos están "limpios" cuando cumplen tres criterios: 1. Cada observación está en una fila. 2. Cada variable está en una columna. 3. Cada valor tiene su propia celda. @@ -61,7 +61,7 @@ Tal vez lo más importante sea que tener nuestros datos en este formato nos perm En este tutorial nos enfocamos en el paquete [dplyr](https://cran.r-project.org/web/packages/dplyr/index.html) de tidyverse pero merece la pena mencionar otros que nos encontraremos por el camino: [**magittr**](https://magrittr.tidyverse.org): Este paquete nos da acceso al el operador `%>%` y hace nuestro código más fácilmente de leer. -[**ggplot2**](https://ggplot2.tidyverse.org): Este paquete utiliza ["la gramática de gráficos"](http://academica-e.unavarra.es/bitstream/handle/2454/15785/Gramática.pdf?sequence=1)[^1] para ofrecer una manera fácil de visualizar nuestros datos. +[**ggplot2**](https://ggplot2.tidyverse.org): Este paquete utiliza ["la gramática de gráficos"](https://academica-e.unavarra.es/bitstream/handle/2454/15785/Gramática.pdf?sequence=1)[^1] para ofrecer una manera fácil de visualizar nuestros datos. [**readr**](https://readr.tidyverse.org): Este paquete da acceso a un método más rápido y racionalizado para importar datos rectangulares (una tabla), como son los archivos CSV (valores separados por comas). [**tibble**](https://tibble.tidyverse.org): Este paquete nos permite reconceptualizar el formato _data frame_ (marco o tabla de datos) para que sea más fácil trabajar con ellos e imprimirlos. @@ -128,7 +128,7 @@ ggplot(data=poblacion_mississipi_y_virginia, aes(x=año, y=poblacion, color=esta Hacer cambios rápidos en el código y reanalizar nuestros datos es una parte fundamental del análisis exploratorio de datos (AED, o EDA por sus siglas en inglés). En vez de tratar de "probar" una hipótesis, el análisis exploratorio de datos nos ayuda a entender nuestros datos mejor y a hacernos preguntas sobre ellos. Para los historiadores el AED ofrece una forma de saber cuándo indagar más en un tema y cuando dejarlo a un lado, y esto es en el área en el que R sobresale. ## Línea de operaciones -Antes de ver `dplyr`, tenemos que entender lo que es la línea de operaciones ```%>%``` en R porque la vamos a utilizar mucho en nuestros ejemplos. Como decíamos, la línea de operaciones es parte del paquete [magittr](https://cran.r-project.org/web/packages/magrittr/vignettes/magrittr.html) creado por [Stefan Milton Bache](http://stefanbache.dk) y [Hadley Wickham](http://hadley.nz/) y está incluida en tidyverse. Su nombre es un homenaje al pintor surrealista Rene Magritte y su famosa obra "[La traición de las imágenes](https://historia-arte.com/obras/la-traicion-de-las-imagenes)", que muestra una pipa con las palabras "esto no es una pipa" debajo, en francés. +Antes de ver `dplyr`, tenemos que entender lo que es la línea de operaciones ```%>%``` en R porque la vamos a utilizar mucho en nuestros ejemplos. Como decíamos, la línea de operaciones es parte del paquete [magittr](https://cran.r-project.org/web/packages/magrittr/vignettes/magrittr.html) creado por [Stefan Milton Bache](https://stefanbache.dk) y [Hadley Wickham](https://hadley.nz/) y está incluida en tidyverse. Su nombre es un homenaje al pintor surrealista Rene Magritte y su famosa obra "[La traición de las imágenes](https://historia-arte.com/obras/la-traicion-de-las-imagenes)", que muestra una pipa con las palabras "esto no es una pipa" debajo, en francés. La línea de operaciones te permite pasar lo que está a su izquierda como la primera variable en una función especificada a la derecha. Aunque pueda parecer extraño al principio, una vez que lo aprendas verás que hace tu código más fácil de leer al evitar declaraciones anidadas. No te preocupes si esto te resulta un poco complicado ahora. Será más fácil una vez que trabajemos con ejemplos. @@ -482,7 +482,7 @@ Este tutorial debería darte una idea de cómo organizar y manipular tus datos e * Para aprender más sobre el paquete 'ggplot2' puedes consultar la sección "[Visualización de datos](https://cienciadedatos.github.io/r4ds/03-visualize.html)" en el libro _R para Ciencia de Datos_ de Hadley Wickham y Garrett Grolemund. -* Tanto la *[Guía para la Presentación de Gráficos Estadísticos](https://www.inei.gob.pe/media/MenuRecursivo/metodologias/libro.pdf),* del Instituto Nacional de Estadística e Informática (2009) así como la [*Gramática de las gráficas: Pistas para mejorar las representaciones de datos*](http://academica-e.unavarra.es/bitstream/handle/2454/15785/Gramática.pdf?sequence=1) de Joaquín Sevilla Moróder ofrecen explicaciones de cómo presentar tus datos y errores a evitar. +* Tanto la *[Guía para la Presentación de Gráficos Estadísticos](https://www.inei.gob.pe/media/MenuRecursivo/metodologias/libro.pdf),* del Instituto Nacional de Estadística e Informática (2009) así como la [*Gramática de las gráficas: Pistas para mejorar las representaciones de datos*](https://academica-e.unavarra.es/bitstream/handle/2454/15785/Gramática.pdf?sequence=1) de Joaquín Sevilla Moróder ofrecen explicaciones de cómo presentar tus datos y errores a evitar. [^1]: En el tutorial original se hace referencia al libro "[The Grammar of Graphics](https://www.springer.com/us/book/9780387245447)" (2005) de Wilkinson. diff --git a/es/lecciones/analisis-de-corpus-con-antconc.md b/es/lecciones/analisis-de-corpus-con-antconc.md index d9e17459c2..fc93b8c14b 100644 --- a/es/lecciones/analisis-de-corpus-con-antconc.md +++ b/es/lecciones/analisis-de-corpus-con-antconc.md @@ -1,315 +1,315 @@ ---- -title: Análisis de corpus con AntConc -authors: -- Heather Froehlich -date: 2015-11-24 -translation_date: 2018-05-04 -editors: -- Fred Gibbs -reviewers: -- Nabeel Siddiqui -- Rob Sieczkiewicz -translator: -- Carlos Manuel Varón Castañeda -translation-editor: -- Antonio Rojas Castro -translation-reviewer: -- Jennifer Isasi -- Antonio Rojas Castro -review-ticket: https://github.com/programminghistorian/ph-submissions/issues/170 -layout: lesson -original: corpus-analysis-with-antconc -difficulty: 1 -activity: analyzing -topics: [distant-reading] -abstract: "El análisis de corpus permite hacer comparaciones a gran escala entre objetos presentes en los textos; es decir, lo que se conoce como lectura distante." -avatar_alt: Grabado de una estantería con libros -doi: 10.46430/phes0032 ---- - -
    -En 2022, Lauren Anthony sacó AntConc 4.0, una actualización importante del software AntConc. Aunque casi todas las funciones siguen siendo las mismas, algunas de las prácticas que se describen en esta lección han cambiado un poco. Si estás utilizando una versión más reciente de AntConc que la que se muestra a continuación, puedes consultar la guía de ayuda proporcionada por Laurence Anthony (solo disponible en inglés). Este aviso es especialmente relevante para abrir un corpus utilizando la nueva herramienta Corpus Manager (enero de 2022). -
    - -{% include toc.html %} - -## Introducción - -El análisis de corpus es un tipo de análisis de textos que permite hacer comparaciones a gran escala entre objetos presentes en los mismos —esto es, aquello que se conoce como lectura distante—. Lo anterior hace posible apreciar fenómenos que no necesariamente se hacen visibles cuando leemos. Si, por ejemplo, dispones de una colección de documentos, es posible que desearas encontrar patrones de uso gramatical o frases de aparición recurrente en la misma. También puede ser que quisieras hallar frases cuya probabilidad de aparición fuese más alta o más baja en la obra de un autor, o bien en un tipo determinado de textos; clases particulares de estructuras gramaticales; o muchos ejemplos de un concepto particular en una gran cantidad de documentos que se encuentran enmarcados en cierto contexto. En este sentido, el análisis de corpus resulta muy útil para demostrar hipótesis sobre textos, o para triangular resultados obtenidos a través de otras metodologías de análisis textual basadas en herramientas digitales. - -Al finalizar este tutorial, tendrás la capacidad de: - -- Crear o descargar un corpus de textos. -- Realizar una búsqueda de palabras clave en contexto. -- Identificar patrones respecto de una palabra determinada. -- Utilizar criterios de búsqueda más específicos. -- Revisar diferencias estadísticamente significativas entre corpus. -- Efectuar comparaciones multimodales a través de metodologías de análisis propias de la lingüística de corpus. - -Es posible que te hayas acercado a la ejecución de análisis como el que se describe aquí si has realizado alguna de las siguientes tareas: - -- Búsqueda de todas las apariciones de un término específico en un archivo PDF o un documento de Microsoft Word®. -- Uso de [Voyant Tools](http://voyant-tools.org/) para revisar patrones en un texto. -- Lectura y desarrollo de los tutoriales de introducción a Python disponibles en *[The Programming Historian](/es/lecciones/)*. - -En muchos sentidos, [Voyant](http://voyant-tools.org/) es una puerta de entrada a la realización de análisis más sofisticados y replicables, ya que la naturaleza de tipo “házlo tú mismo” de los *scripts* en Python o R puede no ser atractiva para todos. [AntConc](http://www.laurenceanthony.net/software/antconc/) llena este vacío en tanto se propone como una aplicación informática independiente para el análisis lingüístico de textos, la cual se encuentra disponible de forma gratuita para los sistemas operativos Windows, Mac OS X y Linux (funciona, por tanto, en múltiples plataformas), y es objeto de actualizaciones permanentes por parte de su creador, [Laurence Anthony](http://www.laurenceanthony.net/)[^1]; si bien existen otras aplicaciones para efectuar análisis de concordancias lingüísticas, se resaltan de AntConc las dos cualidades señaladas (para acceder a recursos adicionales sobre esta temática, véase *[An Introductory Bibliography to Corpus Linguistics](https://hfroehli.ch/2014/05/11/intro-bibliography-corpus-linguistics/)*). - -En este tutorial se presentan varias maneras diferentes de acercarse a un corpus de textos. Es importante tener en cuenta que las metodologías de lingüística de corpus no funcionan en todas las situaciones. Con esto, conforme sigas los pasos propuestos, es conveniente que reflexiones sobre la tarea que estés realizando y cómo puede ser de utilidad para responder una pregunta específica en relación con los datos de los que dispongas. En este sentido, si bien la presente lección está construida bajo la metodología "haz esto y luego esto para lograr *X*", no siempre es necesario seguir en orden estricto los pasos que se muestran aquí: se brinda en este espacio una síntesis general de algunos de los métodos disponibles para realizar análisis de esta naturaleza, en lugar de una receta única para el éxito. - -### Descargas necesarias para el desarrollo de este tutorial - -1. Programa: [AntConc](http://www.laurenceanthony.net/software/antconc/)[^2]. - - Descomprime el archivo del programa (si fuere necesario) e inícialo. Las capturas de pantalla presentadas aquí pueden diferir ligeramente de la versión de AntConc que utilices (y del sistema operativo, desde luego), pero los procedimientos son más o menos los mismos en todas las plataformas y versiones recientes de la aplicación. Este tutorial fue escrito teniendo como referente una versión específica (bastante antigua) de AntConc, en tanto consideramos que resulta más fácil de usar para fines introductorios. Puedes emplear la versión más reciente para desarrollar el tutorial si lo tienes a bien; pero, si deseas seguir los pasos con la misma información que presentamos en las capturas de pantalla de esta lección, es necesario que descargues la versión específica que empleamos aquí ([3.2.4](http://www.laurenceanthony.net/software/antconc/releases/AntConc324/)). - -2. Corpus de prueba: descarga este [archivo zip de reseñas cinematográficas](/assets/corpus-analysis-with-antconc/antconc_corpus_files.zip) (escritas en inglés). - -### Presentación sintética de las temáticas abordadas en la lección - -- Trabajar con archivos de texto plano -- Interfaz de usuario y carga de corpus en AntConc -- Búsqueda de palabras clave en contexto -- Búsqueda avanzada de palabras clave en contexto -- Colocaciones y listas de palabras -- Comparación de corpus -- Discusión: hacer comparaciones significativas -- Recursos adicionales - -### Trabajar con archivos de texto plano - -- AntConc solo funciona con archivos de texto plano de extensión .txt (por ejemplo, "Hamlet.txt"); **no puede leer** archivos de extensiones .doc, .docx o .pdf. Por lo tanto, si dispones de documentos de este tipo, deberás convertirlos en archivos .txt. -- La aplicación tiene la capacidad de trabajar con archivos XML (no te preocupes si los desconoces) guardados con la extensión .txt. - -Visita tu portal de noticias favorito y accede a un artículo (su naturaleza no importa, siempre que se componga mayoritariamente de texto). Luego, selecciona todo el texto (encabezado, pie de página, cuerpo, etc.), haz clic derecho y selecciona “copiar”. Después, abre un editor de texto como Bloc de notas (Windows) o TextEdit (Mac OS X) y pega allí el texto que copiaste. - -Existen otros editores de texto de uso gratuito, tales como [Notepad++](http://notepad-plus-plus.org/) (Windows) o [TextWrangler](http://www.barebones.com/products/textwrangler/) (Mac OS X), que ostentan funciones más avanzadas y son particularmente útiles para hacer una gran cantidad de tareas de limpieza de texto. Con esto último hacemos referencia a eliminar datos paratextuales tales como el texto *boilerplate* (información que incluye elementos como el título de la página, los datos del editor, etc.), el cual aparece de forma reiterada en muchos artículos. Si, por el contrario, conservas esta información, los datos se verán comprometidos, por cuanto el programa de análisis de texto tomará en cuenta estos términos en recuentos de palabras, análisis estadísticos y relaciones léxicas. A este respecto podrías considerar, por ejemplo, la posibilidad de eliminar los encabezados y pies de página estándar que aparecen en cada página (véase el tutorial [Limpieza de datos con OpenRefine](/es/lecciones/limpieza-de-datos-con-OpenRefine) para más información sobre cómo automatizar esta tarea). Ahora bien, en corpus de menor tamaño podría ser más conveniente que tú mismo hicieras dicha labor; de esa manera, adquirirás una mejor percepción de tu corpus. - -- Guarda el artículo como un archivo .txt en el escritorio. Cabría la posibilidad de que hicieras labores adicionales de limpieza del texto, tales como la remoción de los datos del autor (elimínalos y guarda el archivo nuevamente). Recuerda en este sentido que toda la información que permanezca en el archivo puede y será tomada en cuenta por el programa de análisis de texto. -- Ve al escritorio y verifica que puedas encontrar el archivo de texto que guardaste. - -Mediante la ejecución repetida de las tareas anteriores se construye un corpus de archivos de texto plano; esta labor suele implicar el abordaje de asuntos relacionados con muestreo, representatividad y organización. Recuerda: es **necesario** que cada archivo de tu corpus sea de texto plano para que AntConc pueda interpretarlo. A este respecto, se acostumbra nombrar los archivos con la extensión .txt para reconocer fácilmente su naturaleza. - -Como lo supondrás, crear un corpus significativo puede resultar bastante tedioso si este se compone archivo por archivo, en especial si pretendes analizar un conjunto extenso de documentos. Por lo tanto, es muy común hacer *web scraping* (esto es, usar un programa sencillo para tomar archivos de la web de forma automatizada) para construir el corpus; si deseas obtener más información acerca de los conceptos y técnicas asociados a dicha labor, consulta las lecciones [Scraping with Beautiful Soup](/lessons/intro-to-beautiful-soup) y [Automatic Downloading with wget](/lessons/automated-downloading-with-wget), disponibles en *The Programming Historian*. Para efectos de este tutorial, en lugar de componer el corpus documento por documento, vamos a utilizar uno ya existente, compuesto por reseñas cinematográficas y tomado del [Natural Language Processing Toolkit](http://www.nltk.org/) (NLTK). Este corpus se compone de 2000 reseñas, organizadas por su carácter —positivo o negativo—; abordaremos aquí un pequeño subconjunto de ellas (200 de cada categoría). - -La construcción de corpus es un campo de estudio en sí mismo. Para más información sobre este tópico, sugerimos consultar "[Representativeness in Corpus Design](https://academic.oup.com/dsh/article-abstract/8/4/243/928942)", Literary and Linguistic Computing, 8 (4): 243-257; y *[Developing Linguistic Corpora: a Guide to Good Practice](http://www.amazon.com/Developing-Linguistic-Corpora-Practice-Guides/dp/1842172050/ref=sr_1_1)*[^3]. - -### Primeros pasos con AntConc: interfaz de usuario y carga de corpus en la aplicación - -Al iniciarse, AntConc se verá como en la siguiente imagen: - -{% include figure.html filename="antconc1.png" caption="Ventana principal de AntConc" %} - -En el costado izquierdo de la pantalla principal hay un cuadro que enlista todos los archivos cargados del corpus, el cual usaremos más adelante. - -La parte superior de la aplicación consta de 7 pestañas: - -- **_Concordance_ (concordancia):** muestra lo que se conoce como *keyword in context view* (vista de palabras clave en contexto [KWIC, por sus iniciales en inglés]), cuyos resultados se obtienen mediante la barra de búsqueda. -- **_Concordance Plot_ (mapa de concordancia):** presenta una visualización muy sencilla de los resultados de la búsqueda de palabras clave en contexto. Las apariciones del término buscado se representarán como pequeñas líneas negras dentro de un rectángulo que representa la extensión total de cada archivo analizado. -- **_File View_ (vista de archivo):** brinda una vista del archivo completo en la que se resaltan las apariciones del término buscado, con lo cual se obtiene una visión más amplia del contexto en el que este aparece. -- **_Clusters_ (clústeres):** muestra palabras que aparecen juntas muy frecuentemente. -- **_Collocates_ (colocaciones)**: mientras que la pestaña anterior muestra palabras que *definitivamente* aparecen juntas en el corpus, esta presenta aquellas que tienen una alta probabilidad de estarlo. -- **_Word List_ (lista de palabras):** muestra todas las palabras del corpus. -- **_Keyword List_ (lista de palabras clave):** presenta los resultados de comparaciones entre dos corpus. - -Dado su carácter introductorio, este tutorial solo brinda una mirada superficial a lo que se puede hacer con AntConc. En consecuencia, solo nos concentraremos aquí en las funciones de las pestañas *Concordance*, *Collocates*, *Keywords* y *Word List*. - -#### Carga de corpus - -Tal como sucede con cualquier otro programa informático, comenzaremos por ir a “File” – “Open” (“Archivo” – Abrir); pero en lugar de abrir solo **un** archivo, haremos lo propio con la carpeta que contiene todos los documentos que constituyen el corpus. AntConc permite abrir directorios completos; en consecuencia, si ya tienes conocimiento y te sientes cómodo trabajando de esta manera, puedes abrir la carpeta “All reviews” ("Todas las reseñas") y pasar directamente a la sección de análisis de este tutorial [^4]. - -{% include figure.html filename="open-file-21.png" caption="Apertura de una carpeta." %} - -- Recuerda que guardamos los archivos en el escritorio; dirígete entonces a esa ubicación en el menú desplegable. - -{% include figure.html filename="files-on-desktop-open.png" caption="Apertura de una carpeta localizada en el escritorio." %} - -- Una vez en el escritorio, elige la carpeta “movie reviews from ntlk” ("reseñas cienmatográficas del ntlk"): - -{% include figure.html filename="browse-for-directory-inside-folder.png" caption="Localización de la carpeta *movie reviews from nltk*" %} - -- Ahora, selecciona la carpeta “Negative reviews” ("Reseñas negativas") y haz clic en “OK”. Hecho esto, deberían cargarse 200 archivos de texto en la columna izquierda del programa —confírmalo mediante la casilla “Total No.”—. - -{% include figure.html filename="open-negative-reviews.png" caption="Carga de la carpeta *Negative Reviews*." %} - -- Repite el mismo proceso para cargar la carpeta "Positive Reviews" ("Reseñas positivas"). Con esto, deberías tener 400 textos en la columna "Corpus Files". - -{% include figure.html filename="positive-reviews.png" caption="Carga de la carpeta *Positive Reviews*." %} - -{% include figure.html filename="all-reviews-loaded.png" caption="Conjunto completo de reseñas cargadas en el programa." %} - -## Búsqueda de palabras clave en contexto - -### Comenzar con una búsqueda básica - -Una de las labores en las cuales se destacan las herramientas de análisis de corpus como AntConc radica en encontrar patrones en el uso de la lengua que nos resulta difícil identificar como lectores. Nos es complicado rastrear palabras pequeñas y en apariencia poco importantes, tales como 'yo', 'él', 'ella', 'un' y 'es' porque son muy comunes, pero los computadores son muy buenos para realizar esta labor. Estos términos, que en lingüística reciben el nombre de palabras funcionales —se conocen como palabras vacías (*stopwords*) en el ámbito de las humanidades digitales—, suelen constituir indicadores estilísticos muy claros en materias de autoría y género en los textos. En consecuencia, tales palabras pueden ser términos de búsqueda bastante potentes por sí solos, o bien combinados con términos que se relacionen en mayor medida con el contenido (*content-driven terms*), lo cual ayuda al investigador a identificar patrones que tal vez no haya detectado previamente. - -En la pestaña *Concordance*, escribe la palabra 'the' en el cuadro de búsqueda ubicado en la parte inferior y haz clic en “Start”. Acto seguido, el programa mostrará cada una de las apariciones de dicho término en el corpus de reseñas cinematográficas, así como el contexto en el que estas se presentan. Esto recibe el nombre de "visor de palabras clave en contexto" (*keywords in context viewer*). - -{% include figure.html filename="the-thinking.png" caption="*The* es una palabra común en la lengua inglesa." %} - -La palabra buscada aparece 14.618 veces en el corpus según la casilla *Concordance Hits*, que se encuentra en la parte inferior de la pestaña. - -Como se indicó anteriormente, la lista KWIC resulta una buena forma de comenzar a buscar patrones. Aunque la cantidad de información suministrada con la búsqueda es aún muy grande, ¿qué tipo de palabras aparecen cerca de 'the'? - -Ahora, prueba a hacer una búsqueda del término 'a'; tanto este último como 'the' son artículos en la lengua inglesa, pero el primero es definido y el segundo indefinido; y los resultados arrojados por la búsqueda ilustrarán esa diferencia. - -Llegados a este punto, ya debes estar familiarizado con las líneas de texto que componen la vista KWIC. Ahora, realiza una nueva búsqueda, esta vez de la palabra 'shot': los resultados mostrarán las apariciones del término tanto en la función sintáctica de sustantivo (por ejemplo, “line up the shot”) como en la de verbo conjugado (por ejemplo, "this scene was shot carefully"). - -¿Qué ves? Entendemos que esta puede ser una forma de identificar patrones difícil de intepretar. Intenta presionar el botón amarillo “Sort” (clasificar): ¿qué sucede al hacerlo? - -{% include figure.html filename="sorting-shot-1l1r.png" caption="Palabras que aparecen junto a *shot*." %} - -Puedes ajustar la forma en que AntConc ordena la información encontrada si cambias los parámetros que en la imagen anterior aparecen encerrados en el círculo de color rojo: L corresponde a izquierda (*left*) y R a derecha (*right*); lo anterior puede extenderse hasta 5 posiciones en cualquier dirección. Los valores por defecto de la aplicación son 1 izquierda (1L), 2 derecha (2R), 3 derecha (3R); pero puedes alterarlos, por ejemplo, a 3 izquierda (3L), 2 izquierda (2L), 1 derecha (1R) (en aras de obtener frases o trigramas que finalicen con el término buscado) si haces clic en las flechas hacia arriba y abajo que se encuentran junto a los parámetros. Si no deseas realizar este tipo de clasificación, puedes omitirla (dejar los valores predeterminados 1L, 2R y 3R) o dejar todos los parámetros con el valor 0. Cabe la posibilidad de generar clasificaciones menos lineales, como 4L, 3R, 5R, que arrojarían como resultado mucha más información del contexto. El programa puede tardar un poco en mostrar este tipo de clasificaciones, por lo que sugerimos tener paciencia al efectuarlas. Si no estás seguro de cuáles serán los resultados arrojados por la búsqueda, haz clic en "Sort" para ver qué ocurre y efectúa los ajustes a los que haya lugar según tus necesidades. - -### Operadores de búsqueda - -#### Operador * (comodín) - -El operador * (que sirve para buscar 0 o más caracteres) puede ayudar a encontrar las formas de sustantivos en singular y plural, por ejemplo. - -**Tarea:** busca _qualit*_ y ordena los resultados. ¿Qué tiende a preceder y seguir a las palabras 'quality' y 'qualities'? Una pista: son vocablos diferentes con contextos de uso distintos; identifica patrones de uso mediante la búsqueda KWIC. - -Para obtener una lista completa de los operadores comodín disponibles y su función, revisa "Global Settings" – "Wildcard Settings". - -{% include figure.html filename="wildcard-settings.png" caption="Configuración de operadores de búsqueda." %} - -Para conocer la diferencia entre los operadores * y ?, busca _th*n_ y luego _th?n_. Estas dos búsquedas, que a simple vista parecieran muy similares, arrojan resultados distintos. - -El operador *?* es más específico que *, así: - -*wom?n* – 'women' y 'woman'. - -*m?n* – 'man', 'men' y 'min'. - -Una búsqueda de _m*n_, en cambio, no es útil porque se obtendrán resultados que incluirán 'mean', 'melon', etc. - -**Tarea:** compara los resultados de las búsquedas de *wom?n* y *m?n*. - -- Ordena los resultados de cada búsqueda de manera que arrojen datos significativos (por ejemplo, configurar los parámetros de la búsqueda en 0, 1L y 2L) - -- Haz clic en "File" – "Save Output to Text File" y guarda el archivo (no olvides agregar la extensión .txt al nombre del mismo). - -> Sugerencia: durante la exploración en tu investigación, generarás muchos documentos como este para efectos de consulta. Es conveniente, por tanto, nombrar los archivos de tal manera que se describa lo que estos contienen (por ejemplo, “wom?n-results.txt” en lugar de “antconc-results.txt”). - -{% include figure.html filename="save-output-as-text-file.png" caption="Opción *Save output as text file*." %} - -{% include figure.html filename="save-as.png" caption="Cuadro de diálogo *Save As*." %} - -Con lo anterior, puedes abrir el archivo de texto plano generado por el programa en un editor de texto; es posible que debas ampliar la ventana de la aplicación para que este sea legible. - -{% include figure.html filename="results.png" caption="Archivo de resultados de búsqueda KWIC exportado por Antconc, tal como se muestra en un editor de texto." %} - -Realiza el proceso anterior con los resultados de las dos búsquedas y compara los archivos de texto generados. ¿Qué fenómenos puedes ver? - -#### Operador | ("o") - -**Tarea:** busca _she\|he_. - -Ahora, busca las dos palabras anteriores por separado: ¿cuántas veces aparece 'she' en comparación con 'he'? - -La palabra 'she' (ella) aparece en mucha menor cantidad que 'he' (él). ¿Por qué? ¡Esa es una pregunta de investigación! Una buena manera de ampliar este cuestionamiento podría radicar en ordenar la búsqueda anterior para identificar patrones de uso de las palabras en cuestión, y revisar si las mismas están seguidas de algún verbo en particular. - -**Tarea:** a modo de práctica, busca una palabra que te interese, ordena los resultados de formas diferentes, usa los operadores comodín y exporta los datos obtenidos como archivos de texto plano. He aquí un interrogante orientador: ¿qué tipo de patrones puedes observar? ¿Puedes explicarlos? - -### Colocaciones y listas de palabras -Después de haber analizado las líneas de resultados de la vista KWIC en busca de patrones, ¿no te gustaría que hubiera una forma de que el computador te brindara una lista de palabras que aparecen más frecuentemente con la palabra clave buscada? - -Buenas noticias: existe una manera de obtener esta información en AntConc; está disponible en la pestaña *Collocates* (colocaciones). Al hacer clic en la misma, aparecerá un mensaje por medio del cual la aplicación dirá que necesita crear una lista de palabras. Haz clic en "OK" y el programa lo hará automáticamente. - -> Nota: solo recibirás este aviso cuando no hayas creado una lista de palabras. - -{% include figure.html filename="wordlistwarning.png" caption="Mensaje de advertencia para indicar la necesidad de generar una lista de palabras." %} - -Ahora, intenta generar la lista de colocaciones para el término 'she'. - -Los resultados sin clasificar parecerán comenzar con palabras funcionales (palabras con las que se construyen frases) y luego pasarán a palabras de contenido (términos que dan sentido al texto): las primeras son [las más frecuentes en inglés](http://www.wordfrequency.info/free.asp), en tanto funcionan mayormente como elementos para construir frases. Versiones más recientes de AntConc suelen incluir el término buscado como primer resultado, posiblemente porque está presente en el texto y se quiere hallar palabras que puedan aparecer junto a él. - -Algunas personas podrían tener la intención de prescindir de esta clase de palabras mediante el uso de una lista de palabras funcionales (esta es una labor común cuando se hace modelado de tópicos). Desde nuestra óptica, no promovemos esta práctica porque los computadores se destacan, justamente, en la identificación de palabras con alta frecuencia de aparición; tal como se expresó anteriormente, tendemos a pasarlas por alto. Los computadores —y en especial las aplicaciones como AntConc—, pueden mostrar dónde aparecen o no estas palabras, y esa información puede ser de interés, especialmente en colecciones de texto de gran envergadura (como se vio con las búsquedas de 'a', 'she' y 'he'). - -No obstante, en el caso de la lengua inglesa, la frecuencia de aparición de la letra 's' en el corpus también puede ser bastante alta, en tanto representa el posesivo *ʼs* (la aplicación no toma en cuenta el apóstrofo), pero AntConc la toma como otra palabra. Asimismo, la forma *ʼt* puede aparecer junto al verbo 'do' por cuanto conforman la contracción *donʼt*; la alta frecuencia de su aparición conjunta los convierte en colocaciones altamente probables. - -**Tarea:** genera la lista de colocaciones para las búsquedas de *m?n* y *wom?n*. Ahora, ordénalas de acuerdo con su frecuencia de aparición respecto del parámetro 1L. -Los resultados muestran lo que, en teoría, hace que un hombre (*man*) o una mujer (*woman*) sea “digno de mostrarse en el cine”: -- las mujeres deben ser "bellas" (beautiful), "sofisticadas" (*sophisticated*) o estar "embarazadas" (*pregnant*). -- Los hombres tienen que estar, en cierto modo, fuera de lo común: deben ser "santos" (*holy*), "negros" (*black*) o "viejos" (*old*). - -Lo anterior no alude directamente a las películas, sino a la forma como se escribe sobre ellas en las reseñas, y puede llevar a cuestionamientos más sutiles, tales como "¿de qué manera se describen los roles de las mujeres en las comedias románticas en las reseñas escritas por hombres frente a las escritas por mujeres?" - -### Comparación de corpus - -Uno de los tipos de análisis más potentes radica en comparar el corpus propio con uno de referencia más extenso. - -Para este ejercicio, hemos tomado reseñas de filmes en los que Steven Spielberg ha estado involucrado (como director o productor). Podemos compararlos con un corpus de referencia de películas de toda una gama de directores. - -Asegúrate de pensar cuidadosamente sobre las características que podría tener un corpus de referencia para tu propia investigación (por ejemplo, un estudio del lenguaje de Agatha Christie en sus últimos años funcionaría muy bien como un corpus de análisis para compararlo con un corpus de referencia de todas sus novelas). Recuerda que, como lo expresamos anteriormente, la construcción del corpus es un subcampo en sí mismo. - -- Dirígete a "Settings" – "Tool preferences" – "Keyword List". -- Asegúrate de que la casilla de verificación "Use raw files" esté seleccionada en el menú "Reference Corpus". -- Haz clic en el botón "Add Directory" y selecciona la carpeta que contiene los archivos del corpus de referencia. -- Verifica que dispongas de la lista completa de archivos en el listado que se mostrará. - -{% include figure.html filename="adding-a-reference-corpus.png" caption="Carga de un corpus de referencia." %} - -- Haz clic en el botón "Load" y espera que el programa cargue los archivos; una vez la casilla de verificación "Loaded" esté marcada, haz clic en "Apply". - -Existe la posibilidad de intercambiar los roles del corpus de referencia y los archivos principales (es decir, dar al primero la función de los segundos y viceversa) por medio del botón "Swap Ref/Main Files"; en este punto vale la pena experimentar con esta opción y comparar los resultados obtenidos. - -> Si estás utilizando una versión más reciente del programa, el botón anterior puede llamarse "Swap with Target Files". Adicionalmente, cualesquiera sean los datos que vayas a utilizar como corpus de referencia, asegúrate de que estos se carguen correctamente en AntConc (esto es, haz clic en el botón "Load" cada vez que cargues o intercambies un corpus). - -- Dirígete a la pestaña "Keyword list" y una vez allí, presiona el botón "Start" (sin escribir nada en la casilla de búsqueda). Si intercambiaste el corpus de referencia con los archivos objeto del análisis, el programa anunciará la necesidad de crear una nueva lista de palabras antes de generar la lista de palabras clave. Esta se compondrá de aquellos términos que resulten mucho más "inusuales" —de aparición menos probable en terminos estadísticos— en el corpus que se está viendo *vs.* el de referencia. - -> *Keyness* (calidad de la palabra clave): corresponde a la frecuencia de aparición de una palabra en el texto cuando se la compara con su frecuencia en un corpus de referencia, "de tal suerte que la probabilidad estadística, calculada mediante un procedimiento determinado, es menor o igual que el valor *p* especificado por el usuario" (información tomada de [este sitio](http://www.lexically.net/downloads/version6/HTML/index.html?keyness_definition.htm)). Para profundizar sobre los detalles estadísticos de este tópico, sugerimos revisar la sección sobre el mismo en la página 7 del [archivo *Readme* de AntConc](http://www.laurenceanthony.net/software/antconc/releases/AntConc335/help.pdf). - -¿Cuáles son nuestras palabras clave? - -{% include figure.html filename="spielberg-vs-movie-reviews.png" caption="Spielberg *vs.* reseñas cinematográficas." %} - -## Discusión: hacer comparaciones significativas - -Es importante tener en cuenta que la forma en que se organicen los archivos de texto para la investigación tendrá efectos en el tipo de interrogantes que puedan surgir de los mismos, así como en los resultados que se obtengan del análisis. A este respecto, recuerda que la comparación realizada aquí entre reseñas negativas y positivas es extremadamente simple; si se quisiere, podrían efectuarse comparaciones adicionales con otros subconjuntos de reseñas, lo cual daría pie a la formulación de interrogantes muy distintos. - -Así entonces, los archivos que se dispongan en el corpus determinarán los resultados obtenidos. Reiteramos que los temas de representatividad y muestreo son muy relevantes en este sentido: no siempre es necesario o ideal utilizar todo un conjunto de datos, incluso si se dispone de él. En este punto, realmente cabe preguntarse por la manera como estos métodos de análisis textual ayudan a generar preguntas de investigación. - -Si se piensa, por ejemplo, en el funcionamiento de las reseñas cinematográficas en tanto género discursivo, puede dirigirse la atención hacia oposiciones como las siguientes: - -- Reseñas cinematográficas *vs.* reseñas musicales -- Reseñas cinematográficas *vs.* reseñas de libros -- Reseñas cinematográficas *vs.* noticias deportivas -- Reseñas cinematográficas *vs.* noticias en general - -Cada una de estas comparaciones aportará información distinta y puede derivar en preguntas de investigación diferentes, tales como: - -- ¿En qué difieren las reseñas cinematográficas de otros tipos de reseñas de productos mediáticos? - -- ¿En qué se diferencian las reseñas cinematográficas de otros tipos de escritos susceptibles de publicarse? -- ¿Cómo se comparan las reseñas de películas con otros géneros de escritura, tales como la crónica deportiva? -- ¿Qué tienen en común las reseñas cinematográficas y las musicales? - -Desde luego, puede darse la vuelta a estos cuestionamientos para generar nuevas preguntas: - -- ¿En qué se diferencian las reseñas bibliográficas de las cinematográficas? - -- ¿En qué difieren las reseñas musicales de las cinematográficas? -- ¿Qué tienen en común los artículos que se publican en la prensa escrita? -- ¿En qué se asemejan las reseñas cinematográficas a otros tipos de escritos susceptibles de publicarse? - -En síntesis, vale la pena pensar en: - -- Por qué se quiere comparar dos corpus. -- Qué tipo de consultas da lugar a preguntas de investigación significativas. -- Principios de construcción de corpus: muestreo y capacidad de asegurar que se obtengan datos representativos. - -### Recursos adicionales - -*[A Short Bibliography on Corpus Linguistics](http://hfroehlich.wordpress.com/2014/05/11/intro-bibliography-corpus-linguistics/)* - -[Una versión más sencilla de este tutorial, concebida para usuarios con pocos conocimientos de computación](http://hfroehli.ch/workshops/getting-started-with-antconc/) (en inglés). - -*[Guía rápida de análisis de corpus con AntConc](https://rua.ua.es/dspace/bitstream/10045/43959/4/grac.pdf)*, publicada por la Universidad de Alicante (2015). - -## Notas de traducción - -[^1]: Investigador y docente de la Universidad de Waseda (Japón). -[^2]: La interfaz del programa solo está disponible en inglés. -[^3]: Dos materiales en español pueden ser de utilidad si se desea profundizar en esta témática: de un lado, la conferencia *[Aproximación al concepto de representatividad de corpus](https://www.youtube.com/watch?v=bvTigjPhZco)*; y de otro, la obra *[Explotación de los córpora textuales informatizados para la creación de bases de datos terminológicas basadas en el conocimiento](http://elies.rediris.es/elies18/)*. -[^4]: Si se requiere trabajar con corpus en cuyos textos se emplean caracteres especiales (como es el caso de los documentos escritos en lengua española), es imperativo prestar atención a la codificación con la cual se guardaron los archivos que los componen. Por defecto, AntConc está configurado para operar con documentos de texto plano con codificación Unicode (UTF-8). Así entonces, es preciso verificar en el editor de texto que estos se hayan guardado atendiendo a lo anterior, o bien cambiar los parámetros de importación de archivos en el programa según las necesidades (por ejemplo, trabajar con archivos codificados en ANSI). +--- +title: Análisis de corpus con AntConc +authors: +- Heather Froehlich +date: 2015-11-24 +translation_date: 2018-05-04 +editors: +- Fred Gibbs +reviewers: +- Nabeel Siddiqui +- Rob Sieczkiewicz +translator: +- Carlos Manuel Varón Castañeda +translation-editor: +- Antonio Rojas Castro +translation-reviewer: +- Jennifer Isasi +- Antonio Rojas Castro +review-ticket: https://github.com/programminghistorian/ph-submissions/issues/170 +layout: lesson +original: corpus-analysis-with-antconc +difficulty: 1 +activity: analyzing +topics: [distant-reading] +abstract: "El análisis de corpus permite hacer comparaciones a gran escala entre objetos presentes en los textos; es decir, lo que se conoce como lectura distante." +avatar_alt: Grabado de una estantería con libros +doi: 10.46430/phes0032 +--- + +
    +En 2022, Lauren Anthony sacó AntConc 4.0, una actualización importante del software AntConc. Aunque casi todas las funciones siguen siendo las mismas, algunas de las prácticas que se describen en esta lección han cambiado un poco. Si estás utilizando una versión más reciente de AntConc que la que se muestra a continuación, puedes consultar la guía de ayuda proporcionada por Laurence Anthony (solo disponible en inglés). Este aviso es especialmente relevante para abrir un corpus utilizando la nueva herramienta Corpus Manager (enero de 2022). +
    + +{% include toc.html %} + +## Introducción + +El análisis de corpus es un tipo de análisis de textos que permite hacer comparaciones a gran escala entre objetos presentes en los mismos —esto es, aquello que se conoce como lectura distante—. Lo anterior hace posible apreciar fenómenos que no necesariamente se hacen visibles cuando leemos. Si, por ejemplo, dispones de una colección de documentos, es posible que desearas encontrar patrones de uso gramatical o frases de aparición recurrente en la misma. También puede ser que quisieras hallar frases cuya probabilidad de aparición fuese más alta o más baja en la obra de un autor, o bien en un tipo determinado de textos; clases particulares de estructuras gramaticales; o muchos ejemplos de un concepto particular en una gran cantidad de documentos que se encuentran enmarcados en cierto contexto. En este sentido, el análisis de corpus resulta muy útil para demostrar hipótesis sobre textos, o para triangular resultados obtenidos a través de otras metodologías de análisis textual basadas en herramientas digitales. + +Al finalizar este tutorial, tendrás la capacidad de: + +- Crear o descargar un corpus de textos. +- Realizar una búsqueda de palabras clave en contexto. +- Identificar patrones respecto de una palabra determinada. +- Utilizar criterios de búsqueda más específicos. +- Revisar diferencias estadísticamente significativas entre corpus. +- Efectuar comparaciones multimodales a través de metodologías de análisis propias de la lingüística de corpus. + +Es posible que te hayas acercado a la ejecución de análisis como el que se describe aquí si has realizado alguna de las siguientes tareas: + +- Búsqueda de todas las apariciones de un término específico en un archivo PDF o un documento de Microsoft Word®. +- Uso de [Voyant Tools](https://voyant-tools.org/) para revisar patrones en un texto. +- Lectura y desarrollo de los tutoriales de introducción a Python disponibles en *[The Programming Historian](/es/lecciones/)*. + +En muchos sentidos, [Voyant](https://voyant-tools.org/) es una puerta de entrada a la realización de análisis más sofisticados y replicables, ya que la naturaleza de tipo “házlo tú mismo” de los *scripts* en Python o R puede no ser atractiva para todos. [AntConc](https://www.laurenceanthony.net/software/antconc/) llena este vacío en tanto se propone como una aplicación informática independiente para el análisis lingüístico de textos, la cual se encuentra disponible de forma gratuita para los sistemas operativos Windows, Mac OS X y Linux (funciona, por tanto, en múltiples plataformas), y es objeto de actualizaciones permanentes por parte de su creador, [Laurence Anthony](https://www.laurenceanthony.net/)[^1]; si bien existen otras aplicaciones para efectuar análisis de concordancias lingüísticas, se resaltan de AntConc las dos cualidades señaladas (para acceder a recursos adicionales sobre esta temática, véase *[An Introductory Bibliography to Corpus Linguistics](https://hfroehli.ch/2014/05/11/intro-bibliography-corpus-linguistics/)*). + +En este tutorial se presentan varias maneras diferentes de acercarse a un corpus de textos. Es importante tener en cuenta que las metodologías de lingüística de corpus no funcionan en todas las situaciones. Con esto, conforme sigas los pasos propuestos, es conveniente que reflexiones sobre la tarea que estés realizando y cómo puede ser de utilidad para responder una pregunta específica en relación con los datos de los que dispongas. En este sentido, si bien la presente lección está construida bajo la metodología "haz esto y luego esto para lograr *X*", no siempre es necesario seguir en orden estricto los pasos que se muestran aquí: se brinda en este espacio una síntesis general de algunos de los métodos disponibles para realizar análisis de esta naturaleza, en lugar de una receta única para el éxito. + +### Descargas necesarias para el desarrollo de este tutorial + +1. Programa: [AntConc](https://www.laurenceanthony.net/software/antconc/)[^2]. + + Descomprime el archivo del programa (si fuere necesario) e inícialo. Las capturas de pantalla presentadas aquí pueden diferir ligeramente de la versión de AntConc que utilices (y del sistema operativo, desde luego), pero los procedimientos son más o menos los mismos en todas las plataformas y versiones recientes de la aplicación. Este tutorial fue escrito teniendo como referente una versión específica (bastante antigua) de AntConc, en tanto consideramos que resulta más fácil de usar para fines introductorios. Puedes emplear la versión más reciente para desarrollar el tutorial si lo tienes a bien; pero, si deseas seguir los pasos con la misma información que presentamos en las capturas de pantalla de esta lección, es necesario que descargues la versión específica que empleamos aquí ([3.2.4](https://www.laurenceanthony.net/software/antconc/releases/AntConc324/)). + +2. Corpus de prueba: descarga este [archivo zip de reseñas cinematográficas](/assets/corpus-analysis-with-antconc/antconc_corpus_files.zip) (escritas en inglés). + +### Presentación sintética de las temáticas abordadas en la lección + +- Trabajar con archivos de texto plano +- Interfaz de usuario y carga de corpus en AntConc +- Búsqueda de palabras clave en contexto +- Búsqueda avanzada de palabras clave en contexto +- Colocaciones y listas de palabras +- Comparación de corpus +- Discusión: hacer comparaciones significativas +- Recursos adicionales + +### Trabajar con archivos de texto plano + +- AntConc solo funciona con archivos de texto plano de extensión .txt (por ejemplo, "Hamlet.txt"); **no puede leer** archivos de extensiones .doc, .docx o .pdf. Por lo tanto, si dispones de documentos de este tipo, deberás convertirlos en archivos .txt. +- La aplicación tiene la capacidad de trabajar con archivos XML (no te preocupes si los desconoces) guardados con la extensión .txt. + +Visita tu portal de noticias favorito y accede a un artículo (su naturaleza no importa, siempre que se componga mayoritariamente de texto). Luego, selecciona todo el texto (encabezado, pie de página, cuerpo, etc.), haz clic derecho y selecciona “copiar”. Después, abre un editor de texto como Bloc de notas (Windows) o TextEdit (Mac OS X) y pega allí el texto que copiaste. + +Existen otros editores de texto de uso gratuito, tales como [Notepad++](https://notepad-plus-plus.org/) (Windows) o [TextWrangler](https://www.barebones.com/products/textwrangler/) (Mac OS X), que ostentan funciones más avanzadas y son particularmente útiles para hacer una gran cantidad de tareas de limpieza de texto. Con esto último hacemos referencia a eliminar datos paratextuales tales como el texto *boilerplate* (información que incluye elementos como el título de la página, los datos del editor, etc.), el cual aparece de forma reiterada en muchos artículos. Si, por el contrario, conservas esta información, los datos se verán comprometidos, por cuanto el programa de análisis de texto tomará en cuenta estos términos en recuentos de palabras, análisis estadísticos y relaciones léxicas. A este respecto podrías considerar, por ejemplo, la posibilidad de eliminar los encabezados y pies de página estándar que aparecen en cada página (véase el tutorial [Limpieza de datos con OpenRefine](/es/lecciones/limpieza-de-datos-con-OpenRefine) para más información sobre cómo automatizar esta tarea). Ahora bien, en corpus de menor tamaño podría ser más conveniente que tú mismo hicieras dicha labor; de esa manera, adquirirás una mejor percepción de tu corpus. + +- Guarda el artículo como un archivo .txt en el escritorio. Cabría la posibilidad de que hicieras labores adicionales de limpieza del texto, tales como la remoción de los datos del autor (elimínalos y guarda el archivo nuevamente). Recuerda en este sentido que toda la información que permanezca en el archivo puede y será tomada en cuenta por el programa de análisis de texto. +- Ve al escritorio y verifica que puedas encontrar el archivo de texto que guardaste. + +Mediante la ejecución repetida de las tareas anteriores se construye un corpus de archivos de texto plano; esta labor suele implicar el abordaje de asuntos relacionados con muestreo, representatividad y organización. Recuerda: es **necesario** que cada archivo de tu corpus sea de texto plano para que AntConc pueda interpretarlo. A este respecto, se acostumbra nombrar los archivos con la extensión .txt para reconocer fácilmente su naturaleza. + +Como lo supondrás, crear un corpus significativo puede resultar bastante tedioso si este se compone archivo por archivo, en especial si pretendes analizar un conjunto extenso de documentos. Por lo tanto, es muy común hacer *web scraping* (esto es, usar un programa sencillo para tomar archivos de la web de forma automatizada) para construir el corpus; si deseas obtener más información acerca de los conceptos y técnicas asociados a dicha labor, consulta las lecciones [Scraping with Beautiful Soup](/lessons/intro-to-beautiful-soup) y [Automatic Downloading with wget](/lessons/automated-downloading-with-wget), disponibles en *The Programming Historian*. Para efectos de este tutorial, en lugar de componer el corpus documento por documento, vamos a utilizar uno ya existente, compuesto por reseñas cinematográficas y tomado del [Natural Language Processing Toolkit](https://www.nltk.org/) (NLTK). Este corpus se compone de 2000 reseñas, organizadas por su carácter —positivo o negativo—; abordaremos aquí un pequeño subconjunto de ellas (200 de cada categoría). + +La construcción de corpus es un campo de estudio en sí mismo. Para más información sobre este tópico, sugerimos consultar "[Representativeness in Corpus Design](https://academic.oup.com/dsh/article-abstract/8/4/243/928942)", Literary and Linguistic Computing, 8 (4): 243-257; y *[Developing Linguistic Corpora: a Guide to Good Practice](https://www.amazon.com/Developing-Linguistic-Corpora-Practice-Guides/dp/1842172050/ref=sr_1_1)*[^3]. + +### Primeros pasos con AntConc: interfaz de usuario y carga de corpus en la aplicación + +Al iniciarse, AntConc se verá como en la siguiente imagen: + +{% include figure.html filename="antconc1.png" caption="Ventana principal de AntConc" %} + +En el costado izquierdo de la pantalla principal hay un cuadro que enlista todos los archivos cargados del corpus, el cual usaremos más adelante. + +La parte superior de la aplicación consta de 7 pestañas: + +- **_Concordance_ (concordancia):** muestra lo que se conoce como *keyword in context view* (vista de palabras clave en contexto [KWIC, por sus iniciales en inglés]), cuyos resultados se obtienen mediante la barra de búsqueda. +- **_Concordance Plot_ (mapa de concordancia):** presenta una visualización muy sencilla de los resultados de la búsqueda de palabras clave en contexto. Las apariciones del término buscado se representarán como pequeñas líneas negras dentro de un rectángulo que representa la extensión total de cada archivo analizado. +- **_File View_ (vista de archivo):** brinda una vista del archivo completo en la que se resaltan las apariciones del término buscado, con lo cual se obtiene una visión más amplia del contexto en el que este aparece. +- **_Clusters_ (clústeres):** muestra palabras que aparecen juntas muy frecuentemente. +- **_Collocates_ (colocaciones)**: mientras que la pestaña anterior muestra palabras que *definitivamente* aparecen juntas en el corpus, esta presenta aquellas que tienen una alta probabilidad de estarlo. +- **_Word List_ (lista de palabras):** muestra todas las palabras del corpus. +- **_Keyword List_ (lista de palabras clave):** presenta los resultados de comparaciones entre dos corpus. + +Dado su carácter introductorio, este tutorial solo brinda una mirada superficial a lo que se puede hacer con AntConc. En consecuencia, solo nos concentraremos aquí en las funciones de las pestañas *Concordance*, *Collocates*, *Keywords* y *Word List*. + +#### Carga de corpus + +Tal como sucede con cualquier otro programa informático, comenzaremos por ir a “File” – “Open” (“Archivo” – Abrir); pero en lugar de abrir solo **un** archivo, haremos lo propio con la carpeta que contiene todos los documentos que constituyen el corpus. AntConc permite abrir directorios completos; en consecuencia, si ya tienes conocimiento y te sientes cómodo trabajando de esta manera, puedes abrir la carpeta “All reviews” ("Todas las reseñas") y pasar directamente a la sección de análisis de este tutorial [^4]. + +{% include figure.html filename="open-file-21.png" caption="Apertura de una carpeta." %} + +- Recuerda que guardamos los archivos en el escritorio; dirígete entonces a esa ubicación en el menú desplegable. + +{% include figure.html filename="files-on-desktop-open.png" caption="Apertura de una carpeta localizada en el escritorio." %} + +- Una vez en el escritorio, elige la carpeta “movie reviews from ntlk” ("reseñas cienmatográficas del ntlk"): + +{% include figure.html filename="browse-for-directory-inside-folder.png" caption="Localización de la carpeta *movie reviews from nltk*" %} + +- Ahora, selecciona la carpeta “Negative reviews” ("Reseñas negativas") y haz clic en “OK”. Hecho esto, deberían cargarse 200 archivos de texto en la columna izquierda del programa —confírmalo mediante la casilla “Total No.”—. + +{% include figure.html filename="open-negative-reviews.png" caption="Carga de la carpeta *Negative Reviews*." %} + +- Repite el mismo proceso para cargar la carpeta "Positive Reviews" ("Reseñas positivas"). Con esto, deberías tener 400 textos en la columna "Corpus Files". + +{% include figure.html filename="positive-reviews.png" caption="Carga de la carpeta *Positive Reviews*." %} + +{% include figure.html filename="all-reviews-loaded.png" caption="Conjunto completo de reseñas cargadas en el programa." %} + +## Búsqueda de palabras clave en contexto + +### Comenzar con una búsqueda básica + +Una de las labores en las cuales se destacan las herramientas de análisis de corpus como AntConc radica en encontrar patrones en el uso de la lengua que nos resulta difícil identificar como lectores. Nos es complicado rastrear palabras pequeñas y en apariencia poco importantes, tales como 'yo', 'él', 'ella', 'un' y 'es' porque son muy comunes, pero los computadores son muy buenos para realizar esta labor. Estos términos, que en lingüística reciben el nombre de palabras funcionales —se conocen como palabras vacías (*stopwords*) en el ámbito de las humanidades digitales—, suelen constituir indicadores estilísticos muy claros en materias de autoría y género en los textos. En consecuencia, tales palabras pueden ser términos de búsqueda bastante potentes por sí solos, o bien combinados con términos que se relacionen en mayor medida con el contenido (*content-driven terms*), lo cual ayuda al investigador a identificar patrones que tal vez no haya detectado previamente. + +En la pestaña *Concordance*, escribe la palabra 'the' en el cuadro de búsqueda ubicado en la parte inferior y haz clic en “Start”. Acto seguido, el programa mostrará cada una de las apariciones de dicho término en el corpus de reseñas cinematográficas, así como el contexto en el que estas se presentan. Esto recibe el nombre de "visor de palabras clave en contexto" (*keywords in context viewer*). + +{% include figure.html filename="the-thinking.png" caption="*The* es una palabra común en la lengua inglesa." %} + +La palabra buscada aparece 14.618 veces en el corpus según la casilla *Concordance Hits*, que se encuentra en la parte inferior de la pestaña. + +Como se indicó anteriormente, la lista KWIC resulta una buena forma de comenzar a buscar patrones. Aunque la cantidad de información suministrada con la búsqueda es aún muy grande, ¿qué tipo de palabras aparecen cerca de 'the'? + +Ahora, prueba a hacer una búsqueda del término 'a'; tanto este último como 'the' son artículos en la lengua inglesa, pero el primero es definido y el segundo indefinido; y los resultados arrojados por la búsqueda ilustrarán esa diferencia. + +Llegados a este punto, ya debes estar familiarizado con las líneas de texto que componen la vista KWIC. Ahora, realiza una nueva búsqueda, esta vez de la palabra 'shot': los resultados mostrarán las apariciones del término tanto en la función sintáctica de sustantivo (por ejemplo, “line up the shot”) como en la de verbo conjugado (por ejemplo, "this scene was shot carefully"). + +¿Qué ves? Entendemos que esta puede ser una forma de identificar patrones difícil de intepretar. Intenta presionar el botón amarillo “Sort” (clasificar): ¿qué sucede al hacerlo? + +{% include figure.html filename="sorting-shot-1l1r.png" caption="Palabras que aparecen junto a *shot*." %} + +Puedes ajustar la forma en que AntConc ordena la información encontrada si cambias los parámetros que en la imagen anterior aparecen encerrados en el círculo de color rojo: L corresponde a izquierda (*left*) y R a derecha (*right*); lo anterior puede extenderse hasta 5 posiciones en cualquier dirección. Los valores por defecto de la aplicación son 1 izquierda (1L), 2 derecha (2R), 3 derecha (3R); pero puedes alterarlos, por ejemplo, a 3 izquierda (3L), 2 izquierda (2L), 1 derecha (1R) (en aras de obtener frases o trigramas que finalicen con el término buscado) si haces clic en las flechas hacia arriba y abajo que se encuentran junto a los parámetros. Si no deseas realizar este tipo de clasificación, puedes omitirla (dejar los valores predeterminados 1L, 2R y 3R) o dejar todos los parámetros con el valor 0. Cabe la posibilidad de generar clasificaciones menos lineales, como 4L, 3R, 5R, que arrojarían como resultado mucha más información del contexto. El programa puede tardar un poco en mostrar este tipo de clasificaciones, por lo que sugerimos tener paciencia al efectuarlas. Si no estás seguro de cuáles serán los resultados arrojados por la búsqueda, haz clic en "Sort" para ver qué ocurre y efectúa los ajustes a los que haya lugar según tus necesidades. + +### Operadores de búsqueda + +#### Operador * (comodín) + +El operador * (que sirve para buscar 0 o más caracteres) puede ayudar a encontrar las formas de sustantivos en singular y plural, por ejemplo. + +**Tarea:** busca _qualit*_ y ordena los resultados. ¿Qué tiende a preceder y seguir a las palabras 'quality' y 'qualities'? Una pista: son vocablos diferentes con contextos de uso distintos; identifica patrones de uso mediante la búsqueda KWIC. + +Para obtener una lista completa de los operadores comodín disponibles y su función, revisa "Global Settings" – "Wildcard Settings". + +{% include figure.html filename="wildcard-settings.png" caption="Configuración de operadores de búsqueda." %} + +Para conocer la diferencia entre los operadores * y ?, busca _th*n_ y luego _th?n_. Estas dos búsquedas, que a simple vista parecieran muy similares, arrojan resultados distintos. + +El operador *?* es más específico que *, así: + +*wom?n* – 'women' y 'woman'. + +*m?n* – 'man', 'men' y 'min'. + +Una búsqueda de _m*n_, en cambio, no es útil porque se obtendrán resultados que incluirán 'mean', 'melon', etc. + +**Tarea:** compara los resultados de las búsquedas de *wom?n* y *m?n*. + +- Ordena los resultados de cada búsqueda de manera que arrojen datos significativos (por ejemplo, configurar los parámetros de la búsqueda en 0, 1L y 2L) + +- Haz clic en "File" – "Save Output to Text File" y guarda el archivo (no olvides agregar la extensión .txt al nombre del mismo). + +> Sugerencia: durante la exploración en tu investigación, generarás muchos documentos como este para efectos de consulta. Es conveniente, por tanto, nombrar los archivos de tal manera que se describa lo que estos contienen (por ejemplo, “wom?n-results.txt” en lugar de “antconc-results.txt”). + +{% include figure.html filename="save-output-as-text-file.png" caption="Opción *Save output as text file*." %} + +{% include figure.html filename="save-as.png" caption="Cuadro de diálogo *Save As*." %} + +Con lo anterior, puedes abrir el archivo de texto plano generado por el programa en un editor de texto; es posible que debas ampliar la ventana de la aplicación para que este sea legible. + +{% include figure.html filename="results.png" caption="Archivo de resultados de búsqueda KWIC exportado por Antconc, tal como se muestra en un editor de texto." %} + +Realiza el proceso anterior con los resultados de las dos búsquedas y compara los archivos de texto generados. ¿Qué fenómenos puedes ver? + +#### Operador | ("o") + +**Tarea:** busca _she\|he_. + +Ahora, busca las dos palabras anteriores por separado: ¿cuántas veces aparece 'she' en comparación con 'he'? + +La palabra 'she' (ella) aparece en mucha menor cantidad que 'he' (él). ¿Por qué? ¡Esa es una pregunta de investigación! Una buena manera de ampliar este cuestionamiento podría radicar en ordenar la búsqueda anterior para identificar patrones de uso de las palabras en cuestión, y revisar si las mismas están seguidas de algún verbo en particular. + +**Tarea:** a modo de práctica, busca una palabra que te interese, ordena los resultados de formas diferentes, usa los operadores comodín y exporta los datos obtenidos como archivos de texto plano. He aquí un interrogante orientador: ¿qué tipo de patrones puedes observar? ¿Puedes explicarlos? + +### Colocaciones y listas de palabras +Después de haber analizado las líneas de resultados de la vista KWIC en busca de patrones, ¿no te gustaría que hubiera una forma de que el computador te brindara una lista de palabras que aparecen más frecuentemente con la palabra clave buscada? + +Buenas noticias: existe una manera de obtener esta información en AntConc; está disponible en la pestaña *Collocates* (colocaciones). Al hacer clic en la misma, aparecerá un mensaje por medio del cual la aplicación dirá que necesita crear una lista de palabras. Haz clic en "OK" y el programa lo hará automáticamente. + +> Nota: solo recibirás este aviso cuando no hayas creado una lista de palabras. + +{% include figure.html filename="wordlistwarning.png" caption="Mensaje de advertencia para indicar la necesidad de generar una lista de palabras." %} + +Ahora, intenta generar la lista de colocaciones para el término 'she'. + +Los resultados sin clasificar parecerán comenzar con palabras funcionales (palabras con las que se construyen frases) y luego pasarán a palabras de contenido (términos que dan sentido al texto): las primeras son [las más frecuentes en inglés](https://www.wordfrequency.info/free.asp), en tanto funcionan mayormente como elementos para construir frases. Versiones más recientes de AntConc suelen incluir el término buscado como primer resultado, posiblemente porque está presente en el texto y se quiere hallar palabras que puedan aparecer junto a él. + +Algunas personas podrían tener la intención de prescindir de esta clase de palabras mediante el uso de una lista de palabras funcionales (esta es una labor común cuando se hace modelado de tópicos). Desde nuestra óptica, no promovemos esta práctica porque los computadores se destacan, justamente, en la identificación de palabras con alta frecuencia de aparición; tal como se expresó anteriormente, tendemos a pasarlas por alto. Los computadores —y en especial las aplicaciones como AntConc—, pueden mostrar dónde aparecen o no estas palabras, y esa información puede ser de interés, especialmente en colecciones de texto de gran envergadura (como se vio con las búsquedas de 'a', 'she' y 'he'). + +No obstante, en el caso de la lengua inglesa, la frecuencia de aparición de la letra 's' en el corpus también puede ser bastante alta, en tanto representa el posesivo *ʼs* (la aplicación no toma en cuenta el apóstrofo), pero AntConc la toma como otra palabra. Asimismo, la forma *ʼt* puede aparecer junto al verbo 'do' por cuanto conforman la contracción *donʼt*; la alta frecuencia de su aparición conjunta los convierte en colocaciones altamente probables. + +**Tarea:** genera la lista de colocaciones para las búsquedas de *m?n* y *wom?n*. Ahora, ordénalas de acuerdo con su frecuencia de aparición respecto del parámetro 1L. +Los resultados muestran lo que, en teoría, hace que un hombre (*man*) o una mujer (*woman*) sea “digno de mostrarse en el cine”: +- las mujeres deben ser "bellas" (beautiful), "sofisticadas" (*sophisticated*) o estar "embarazadas" (*pregnant*). +- Los hombres tienen que estar, en cierto modo, fuera de lo común: deben ser "santos" (*holy*), "negros" (*black*) o "viejos" (*old*). + +Lo anterior no alude directamente a las películas, sino a la forma como se escribe sobre ellas en las reseñas, y puede llevar a cuestionamientos más sutiles, tales como "¿de qué manera se describen los roles de las mujeres en las comedias románticas en las reseñas escritas por hombres frente a las escritas por mujeres?" + +### Comparación de corpus + +Uno de los tipos de análisis más potentes radica en comparar el corpus propio con uno de referencia más extenso. + +Para este ejercicio, hemos tomado reseñas de filmes en los que Steven Spielberg ha estado involucrado (como director o productor). Podemos compararlos con un corpus de referencia de películas de toda una gama de directores. + +Asegúrate de pensar cuidadosamente sobre las características que podría tener un corpus de referencia para tu propia investigación (por ejemplo, un estudio del lenguaje de Agatha Christie en sus últimos años funcionaría muy bien como un corpus de análisis para compararlo con un corpus de referencia de todas sus novelas). Recuerda que, como lo expresamos anteriormente, la construcción del corpus es un subcampo en sí mismo. + +- Dirígete a "Settings" – "Tool preferences" – "Keyword List". +- Asegúrate de que la casilla de verificación "Use raw files" esté seleccionada en el menú "Reference Corpus". +- Haz clic en el botón "Add Directory" y selecciona la carpeta que contiene los archivos del corpus de referencia. +- Verifica que dispongas de la lista completa de archivos en el listado que se mostrará. + +{% include figure.html filename="adding-a-reference-corpus.png" caption="Carga de un corpus de referencia." %} + +- Haz clic en el botón "Load" y espera que el programa cargue los archivos; una vez la casilla de verificación "Loaded" esté marcada, haz clic en "Apply". + +Existe la posibilidad de intercambiar los roles del corpus de referencia y los archivos principales (es decir, dar al primero la función de los segundos y viceversa) por medio del botón "Swap Ref/Main Files"; en este punto vale la pena experimentar con esta opción y comparar los resultados obtenidos. + +> Si estás utilizando una versión más reciente del programa, el botón anterior puede llamarse "Swap with Target Files". Adicionalmente, cualesquiera sean los datos que vayas a utilizar como corpus de referencia, asegúrate de que estos se carguen correctamente en AntConc (esto es, haz clic en el botón "Load" cada vez que cargues o intercambies un corpus). + +- Dirígete a la pestaña "Keyword list" y una vez allí, presiona el botón "Start" (sin escribir nada en la casilla de búsqueda). Si intercambiaste el corpus de referencia con los archivos objeto del análisis, el programa anunciará la necesidad de crear una nueva lista de palabras antes de generar la lista de palabras clave. Esta se compondrá de aquellos términos que resulten mucho más "inusuales" —de aparición menos probable en terminos estadísticos— en el corpus que se está viendo *vs.* el de referencia. + +> *Keyness* (calidad de la palabra clave): corresponde a la frecuencia de aparición de una palabra en el texto cuando se la compara con su frecuencia en un corpus de referencia, "de tal suerte que la probabilidad estadística, calculada mediante un procedimiento determinado, es menor o igual que el valor *p* especificado por el usuario" (información tomada de [este sitio](https://www.lexically.net/downloads/version6/HTML/index.html?keyness_definition.htm)). Para profundizar sobre los detalles estadísticos de este tópico, sugerimos revisar la sección sobre el mismo en la página 7 del [archivo *Readme* de AntConc](https://www.laurenceanthony.net/software/antconc/releases/AntConc335/help.pdf). + +¿Cuáles son nuestras palabras clave? + +{% include figure.html filename="spielberg-vs-movie-reviews.png" caption="Spielberg *vs.* reseñas cinematográficas." %} + +## Discusión: hacer comparaciones significativas + +Es importante tener en cuenta que la forma en que se organicen los archivos de texto para la investigación tendrá efectos en el tipo de interrogantes que puedan surgir de los mismos, así como en los resultados que se obtengan del análisis. A este respecto, recuerda que la comparación realizada aquí entre reseñas negativas y positivas es extremadamente simple; si se quisiere, podrían efectuarse comparaciones adicionales con otros subconjuntos de reseñas, lo cual daría pie a la formulación de interrogantes muy distintos. + +Así entonces, los archivos que se dispongan en el corpus determinarán los resultados obtenidos. Reiteramos que los temas de representatividad y muestreo son muy relevantes en este sentido: no siempre es necesario o ideal utilizar todo un conjunto de datos, incluso si se dispone de él. En este punto, realmente cabe preguntarse por la manera como estos métodos de análisis textual ayudan a generar preguntas de investigación. + +Si se piensa, por ejemplo, en el funcionamiento de las reseñas cinematográficas en tanto género discursivo, puede dirigirse la atención hacia oposiciones como las siguientes: + +- Reseñas cinematográficas *vs.* reseñas musicales +- Reseñas cinematográficas *vs.* reseñas de libros +- Reseñas cinematográficas *vs.* noticias deportivas +- Reseñas cinematográficas *vs.* noticias en general + +Cada una de estas comparaciones aportará información distinta y puede derivar en preguntas de investigación diferentes, tales como: + +- ¿En qué difieren las reseñas cinematográficas de otros tipos de reseñas de productos mediáticos? + +- ¿En qué se diferencian las reseñas cinematográficas de otros tipos de escritos susceptibles de publicarse? +- ¿Cómo se comparan las reseñas de películas con otros géneros de escritura, tales como la crónica deportiva? +- ¿Qué tienen en común las reseñas cinematográficas y las musicales? + +Desde luego, puede darse la vuelta a estos cuestionamientos para generar nuevas preguntas: + +- ¿En qué se diferencian las reseñas bibliográficas de las cinematográficas? + +- ¿En qué difieren las reseñas musicales de las cinematográficas? +- ¿Qué tienen en común los artículos que se publican en la prensa escrita? +- ¿En qué se asemejan las reseñas cinematográficas a otros tipos de escritos susceptibles de publicarse? + +En síntesis, vale la pena pensar en: + +- Por qué se quiere comparar dos corpus. +- Qué tipo de consultas da lugar a preguntas de investigación significativas. +- Principios de construcción de corpus: muestreo y capacidad de asegurar que se obtengan datos representativos. + +### Recursos adicionales + +*[A Short Bibliography on Corpus Linguistics](https://hfroehlich.wordpress.com/2014/05/11/intro-bibliography-corpus-linguistics/)* + +[Una versión más sencilla de este tutorial, concebida para usuarios con pocos conocimientos de computación](https://hfroehli.ch/workshops/getting-started-with-antconc/) (en inglés). + +*[Guía rápida de análisis de corpus con AntConc](https://rua.ua.es/dspace/bitstream/10045/43959/4/grac.pdf)*, publicada por la Universidad de Alicante (2015). + +## Notas de traducción + +[^1]: Investigador y docente de la Universidad de Waseda (Japón). +[^2]: La interfaz del programa solo está disponible en inglés. +[^3]: Dos materiales en español pueden ser de utilidad si se desea profundizar en esta témática: de un lado, la conferencia *[Aproximación al concepto de representatividad de corpus](https://www.youtube.com/watch?v=bvTigjPhZco)*; y de otro, la obra *[Explotación de los córpora textuales informatizados para la creación de bases de datos terminológicas basadas en el conocimiento](https://elies.rediris.es/elies18/)*. +[^4]: Si se requiere trabajar con corpus en cuyos textos se emplean caracteres especiales (como es el caso de los documentos escritos en lengua española), es imperativo prestar atención a la codificación con la cual se guardaron los archivos que los componen. Por defecto, AntConc está configurado para operar con documentos de texto plano con codificación Unicode (UTF-8). Así entonces, es preciso verificar en el editor de texto que estos se hayan guardado atendiendo a lo anterior, o bien cambiar los parámetros de importación de archivos en el programa según las necesidades (por ejemplo, trabajar con archivos codificados en ANSI). diff --git a/es/lecciones/analisis-de-correspondencia-en-r.md b/es/lecciones/analisis-de-correspondencia-en-r.md index a288c91498..bb8bf76f8d 100644 --- a/es/lecciones/analisis-de-correspondencia-en-r.md +++ b/es/lecciones/analisis-de-correspondencia-en-r.md @@ -133,7 +133,7 @@ harper_df <- read.csv("https://raw.githubusercontent.com/programminghistorian/je ## Los datos -Los datos originales de la versión en inglés de este tutorial se encuentran archivados en [Zenodo](http://doi.org/10.5281/zenodo.889846), en caso de que quieras ver los datos brutos. Se han incluido en formato tabular también. En esta traducción al español trabajaremos sobre una versión traducida de los datos (no es necesario que descargues estos archivos de forma manual; los descargaremos directamente usando R): +Los datos originales de la versión en inglés de este tutorial se encuentran archivados en [Zenodo](https://doi.org/10.5281/zenodo.889846), en caso de que quieras ver los datos brutos. Se han incluido en formato tabular también. En esta traducción al español trabajaremos sobre una versión traducida de los datos (no es necesario que descargues estos archivos de forma manual; los descargaremos directamente usando R): 1) [CPCs de Harper]({{ site.baseurl }}/assets/correspondence-analysis-in-R/es-translation/HarperCP-es.csv) 2) [CPCs de Trudeau]({{ site.baseurl }}/assets/correspondence-analysis-in-R/es-translation/TrudeauCP-es.csv) @@ -473,7 +473,7 @@ Otro puntaje importante es visible en el gráfico CA: el porcentaje de valor exp [^inertia]: En general, en estadística el término _inercia_ hace referencia a la variación o "extensión" de un conjunto de datos. Es análoga a la desviación estándar en la distribución de datos. -[^pickton]: Ver Laura Kane (April 3, 2017), "Missing and murdered women's inquiry not reaching out to families, say advocates." *CBC News Indigenous*. [http://www.cbc.ca/news/indigenous/mmiw-inquiry-not-reaching-out-to-families-says-advocates-1.4053694](https://perma.cc/MH3Y-9HW2) +[^pickton]: Ver Laura Kane (April 3, 2017), "Missing and murdered women's inquiry not reaching out to families, say advocates." *CBC News Indigenous*. [https://www.cbc.ca/news/indigenous/mmiw-inquiry-not-reaching-out-to-families-says-advocates-1.4053694](https://perma.cc/MH3Y-9HW2) [^pvalue]: En estadística, un valor p, una abreviación para _valor de probabilidad_, es un indicador de qué tan probable es que un determinado resultado haya ocurrido por azar. Un valor p bajo sugiere una baja probabilidad de que el resultado sea producto del azar y, por lo tanto, entrega evidencia de que la hipótesos nula, (en este caso, que los MP y los CP son categorías independientes) es poco probable. diff --git a/es/lecciones/analisis-de-sentimientos-r.md b/es/lecciones/analisis-de-sentimientos-r.md index ea74d54d62..ceeb6967c2 100644 --- a/es/lecciones/analisis-de-sentimientos-r.md +++ b/es/lecciones/analisis-de-sentimientos-r.md @@ -49,11 +49,11 @@ El paquete `syuzhet` trabaja con cuatro diccionarios de sentimientos: Bing, Afin Sus términos de uso indican que el vocabulario puede ser utilizado de forma gratuita con propósitos de investigación, por lo que todos los datos están disponible para su descarga. -Si sabes inglés, puedes interactuar con las diferentes categorías en su página web [NRC Word-Emotion Association Lexicon](http://saifmohammad.com/WebPages/NRC-Emotion-Lexicon.htm). En ella también puedes encontrar trabajos publicados sobre la obtención de los valores para el vocabulario, su organización, ampliación, etc. +Si sabes inglés, puedes interactuar con las diferentes categorías en su página web [NRC Word-Emotion Association Lexicon](https://saifmohammad.com/WebPages/NRC-Emotion-Lexicon.htm). En ella también puedes encontrar trabajos publicados sobre la obtención de los valores para el vocabulario, su organización, ampliación, etc. ## Paquete `syuzhet` -El [paquete de R `syuzhet`](https://cran.r-project.org/web/packages/syuzhet/vignettes/syuzhet-vignette.html) fue desarrollado en 2015 por Matthew Jockers; continuamente introduce cambios y se encarga de mantenerlo (al momento de preparar esta lección se usó la versión de diciembre 2017). Una serie de entradas de blog acompañan el desarrollo del paquete, y pueden consultarse (en inglés) en el blog del profesor desde el [5 de junio de 2014.](http://www.matthewjockers.net/page/2/) +El [paquete de R `syuzhet`](https://cran.r-project.org/web/packages/syuzhet/vignettes/syuzhet-vignette.html) fue desarrollado en 2015 por Matthew Jockers; continuamente introduce cambios y se encarga de mantenerlo (al momento de preparar esta lección se usó la versión de diciembre 2017). Una serie de entradas de blog acompañan el desarrollo del paquete, y pueden consultarse (en inglés) en el blog del profesor desde el [5 de junio de 2014.](https://www.matthewjockers.net/page/2/) Por descontado, el paquete ha sido desarrollado con pruebas en textos escritos o traducidos al inglés y no sin debate sobre su utilidad, por asignar valores a textos literarios que suelen ser, por naturaleza, bastante subjetivos. diff --git a/es/lecciones/analisis-redes-sociales-teatro-1.md b/es/lecciones/analisis-redes-sociales-teatro-1.md index 08e3edd32f..84ef84f5c3 100644 --- a/es/lecciones/analisis-redes-sociales-teatro-1.md +++ b/es/lecciones/analisis-redes-sociales-teatro-1.md @@ -40,7 +40,7 @@ En esta lección trabajaremos las relaciones entre los personajes de los textos Para poder estudiar las relaciones entre personajes nos serviremos del [Análisis de Redes Sociales](https://perma.cc/UW6A-33KQ) (ARS), un campo de estudio interdisciplinario que toma elementos de la sociología, la psicología, la estadística, las matemáticas y las ciencias computacionales[^3]. Gracias al análisis de redes podemos abstraer y representar cualquier sistema formado por elementos relacionados y estudiarlo aplicando conceptos y medidas de la [teoría de grafos](https://perma.cc/P963-APQC). La informática, la física, la biología o la sociología, son disciplinas que tradicionalmente han identificado en sus campos de investigación sistemas susceptibles de estudiarse a través de redes, y recientemente también lo han hecho las humanidades, especialmente la historia[^4] y los estudios literarios. Del interés de la historia por el análisis de redes dan cuenta las lecciones de _Programming Historian_ [Análisis de redes temporal en R](/es/lecciones/analisis-temporal-red) o [De la hermenéutica a las redes de datos: Extracción de datos y visualización de redes en fuentes históricas](/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas). Por otro lado, los estudios literarios han utilizado el análisis de redes para el estudio de los sistemas de personajes, de las redes de producción literaria, para representar los resultados del análisis estilométricos de autoría, etc[^5]. Por ejemplo, sobre el estudio de personajes tetrales a través del análisis de redes sociales, podemos destacar los trabajos del grupo de investigación HDAUNIR a partir del corpus [BETTE](https://perma.cc/2NR3-V5UU)[^6]; y sobre el estudio de la novela y sus personajes los trabajos de Isasi[^7]. -El análisis de redes sociales es para la crítica literaria una metodología de tipo "distant reading" ([lectura distante](https://web.archive.org/web/20210622210039/http://dictionaryworldliterature.org/index.php/Lectura_distante)) en términos de Moretti[^8], o "macroanlysis" si preferimos el concepto de Matthew L. Jockers[^9]. Es decir, nos permite estudiar grandes cantidades de textos a través de sus formas, relaciones, estructuras y modelos[^10], al cambiar el foco de atención de las características individuales a las tendencias o patrones de repetidas en un corpus [^11]. Más recientemente, Escobar Varela ha investigado las posibilidades de estudiar el teatro a través de datos como parte de lo que denomina "computational theater research"[^12]. Este concepto refiere a los estudios teatrales computacionales en su sentido más amplio; incluye los enfoques escénicos además de los literarios. Desde un enfoque puramente textual, dentro de los "Computational Literary Studies"(CLS), está en proceso de conformación un área especializada en teatro, denominada "Computational Drama Analysis", que integra el análisis de redes sociales, junto a otras metodologías cuantitativas y computacionales, tal como la estilometría, el análisis de sentimientos o el modelado de tópicos[^13]. +El análisis de redes sociales es para la crítica literaria una metodología de tipo "distant reading" ([lectura distante](https://web.archive.org/web/20210622210039/https://dictionaryworldliterature.org/index.php/Lectura_distante)) en términos de Moretti[^8], o "macroanlysis" si preferimos el concepto de Matthew L. Jockers[^9]. Es decir, nos permite estudiar grandes cantidades de textos a través de sus formas, relaciones, estructuras y modelos[^10], al cambiar el foco de atención de las características individuales a las tendencias o patrones de repetidas en un corpus [^11]. Más recientemente, Escobar Varela ha investigado las posibilidades de estudiar el teatro a través de datos como parte de lo que denomina "computational theater research"[^12]. Este concepto refiere a los estudios teatrales computacionales en su sentido más amplio; incluye los enfoques escénicos además de los literarios. Desde un enfoque puramente textual, dentro de los "Computational Literary Studies"(CLS), está en proceso de conformación un área especializada en teatro, denominada "Computational Drama Analysis", que integra el análisis de redes sociales, junto a otras metodologías cuantitativas y computacionales, tal como la estilometría, el análisis de sentimientos o el modelado de tópicos[^13]. Para llevar a cabo un análisis de redes sociales de personajes teatrales debemos seguir una serie de pasos consecutivos: * Paso 1. Creación del corpus de análisis @@ -471,7 +471,7 @@ Jiménez Fernández, C. M., y Calvo Tello, J. "Grafos de Escenas y Estudios Lite Jockers, M. L. _Macroanalysis: Digital Methods and Literary History_. University of Illinois Press, 2013. -Merino Recalde, D. "El sistema de personajes de las comedias urbanas de Lope de Vega. Propuesta metodológica y posibilidades del análisis de redes sociales para el estudio del teatro del Siglo de Oro" (Trabajo de Fin de Máster, Universidad Nacional de Educación a Distancia, 2022). [http://e-spacio.uned.es/fez/view/bibliuned:master-Filologia-FILTCE-Dmerino](https://perma.cc/4C7R-39V3). +Merino Recalde, D. "El sistema de personajes de las comedias urbanas de Lope de Vega. Propuesta metodológica y posibilidades del análisis de redes sociales para el estudio del teatro del Siglo de Oro" (Trabajo de Fin de Máster, Universidad Nacional de Educación a Distancia, 2022). [https://e-spacio.uned.es/fez/view/bibliuned:master-Filologia-FILTCE-Dmerino](https://perma.cc/4C7R-39V3). Martínez Carro, E. "Una interpretación digital de dos tragedias lorquianas: Yerma y Doña Rosita la soltera." _Caracteres: estudios culturales y críticos de la esfera digital_ 7, no. 2 (2018): 240-267. diff --git a/es/lecciones/analisis-redes-sociales-teatro-2.md b/es/lecciones/analisis-redes-sociales-teatro-2.md index 5a715f6211..3e4a67afd5 100644 --- a/es/lecciones/analisis-redes-sociales-teatro-2.md +++ b/es/lecciones/analisis-redes-sociales-teatro-2.md @@ -1,288 +1,288 @@ ---- -title: "Análisis de redes sociales de personajes teatrales (parte 2)" -slug: analisis-redes-sociales-teatro-2 -layout: lesson -collection: lessons -date: 2023-11-30 -authors: -- David Merino Recalde -reviewers: -- Sara Arribas Colmenar -- Andrés Lombana -editors: -- Jennifer Isasi -review-ticket: https://github.com/programminghistorian/ph-submissions/issues/547 -previous: analisis-redes-sociales-teatro-1 -series_total: 2 lessons -sequence: 2 -difficulty: 2 -activity: analyzing -topics: [network-analysis, distant-reading, data-visualization] -abstract: En esta lección aprenderás a realizar un Análisis de Redes Sociales con los personajes de un texto teatral. Aprenderás sobre la importación de datos a Gephi, la creación de visualizaciones, la implementación de medidas y algoritmos, y el análisis e interpretación de los resultados. -avatar_alt: Recorte de dibujo a pluma de la escenografía usada en la representación de la comedia 'La fiera, el rayo y la piedra' de Pedro Calderón de la Barca en 1690, en el que se puede ver a varios personajes interactuando en escena. -doi: 10.46430/phes0065 ---- - -{% include toc.html %} - -## Introducción a la segunda parte - -Esta es la segunda parte de la lección _Análisis de redes sociales de personajes teatrales_. En la [primera parte](/es/lecciones/analisis-redes-sociales-teatro-1) conocimos algunas de las aplicaciones del análisis de redes sociales (ARS) a los estudios literarios y aprendimos los conceptos y nociones necesarias para enfrentarnos a esta metodología computacional-cuantitativa. Además, establecimos que para llevar a cabo un análisis de redes sociales de personajes teatrales debemos seguir una serie de pasos consecutivos: - - * Paso 1. Creación del corpus de análisis - * Paso 2. Conseguir los datos - * Toma de decisiones para la extracción de datos - * Extracción y estructuración de datos - * El proceso de vaciado - * Paso 4. Visualización y análisis de grafos con Gephi - * Paso 5. Interpretación de los resultados - -Ya hemos visto los pasos 1 y 2, y en esta segunda parte trataremos los dos últimos pasos. Si has seguido la primera parte de la lección cuentas con todos los archivos necesarios para continuar. Si has saltado directamente a la segunda parte porque lo que te interesa es aprender visualización y análisis de grafos con [Gephi](https://gephi.org/), debes descargar ahora los archivos que utilizaremos aquí. En cualquier caso, recomendamos leer la primera parte, pues es importante comprender el proceso de extracción y recogida de datos para poder analizar correctamente los resultados del análisis. ¡Vamos a ello! - -## Paso 3. Visualización y análisis de grafos con Gephi - -Tenemos tres archivos CSV: por un lado, una [lista de nodos](/assets/analisis-redes-sociales-teatro-1/nodos_bizarrias.csv) (`nodos_bizarrias.csv`); por el otro, la [lista de aristas](/assets/analisis-redes-sociales-teatro-1/aristas-coaparicion_bizarrias.csv) de un grafo no dirigido (`aristas-coaparicion_bizarrias.csv`) y la [matriz de adyacencia](/assets/analisis-redes-sociales-teatro-1/aristas-interaccion_bizarrias.csv) de uno dirigido (`aristas-interaccion_bizarrias.csv`), según el criterio de la coaparición de personajes en escena y el de interacciones lingüísticas directas entre personajes, respectivamente. El siguiente paso es generar visualizaciones, los grafos propiamente dichos, y analizarlos aplicando lo que se conoce como 'medidas' o 'métricas' de ARS. - -### Instalación de Gephi y primeros pasos - -El programa que vamos a utilizar para llevar a cabo todo esto se llama [Gephi](https://gephi.org/), pero existen muchos otros para los que también te servirán los archivos CSV que hemos preparado[^1]. Gephi es un software libre de código abierto especializado en análisis de redes, muy conocido y utilizado en Humanidades Digitales, bastante intuitivo, y que es sostenido y actualizado por sus desarrolladores[^2]. Además, disponemos de numerosos [plugins](https://gephi.org/plugins/#/) (complementos de software que añaden funcionalidades al programa), [guías de uso](https://perma.cc/4RFA-TZB9), videotutoriales en español[^3] y una comunidad activa en Twitter/X y Github a la que consultar nuestras dudas. - -Lo primero que debemos hacer es instalar el programa. En su sitio web, [https://gephi.org/](https://gephi.org/), haz clic en _Download FREE_. Está disponible para Windows, Mac OS y Linux. Es posible que la web reconozca tu sistema operativo y te ofrezca lo que necesitas, si no, selecciona en el apartado **All Downloads** de tu sistema operativo. Si necesitas ayuda con la instalación, puedes visitar [https://gephi.org/users/install/](https://perma.cc/YF6E-994N) (está solo disponible en inglés, pero puedes consultar los primeros minutos de este [videotutorial en español](https://www.youtube.com/watch?v=sX5XYec4tWo)). - -Una vez que finalices la instalación, ejecuta Gephi. Se abrirá una ventana de bienvenida con distintas opciones: crear un nuevo proyento, abrir un archivo de grafo ya existente, una columna con proyectos y archivos recientes (si los hubiese) y varios proyectos de ejemplo. Haz clic en _Nuevo proyecto_: - -{% include figure.html filename="es-or-analisis-redes-sociales-teatro-2-01.png" alt="Captura de pantalla de la ventana de bienvenida al programa Gephi, con las opciones de crear un nuevo proyecto, abrir recientes o proyectos de ejemplo" caption=" Figura 1. Ventana de bienvenida de Gephi" %} - -Ahora estás en la pantalla principal del programa. Gephi funciona mediante proyectos (fíjate que te indicará en la barra superior que estás en el **Proyecto 1**), y dentro de cada proyecto puedes crear distintos espacios de trabajo. Ahora estás en el **Espacio de trabajo 1**. Cada espacio de trabajo funciona como la pestaña de un navegador web y contiene a su vez los tres apartados de Gephi: **Vista general**, **Laboratorio de datos** y **Previsualización**. - -{% include figure.html filename="es-or-analisis-redes-sociales-teatro-2-02.png" alt="Captura de pantalla de la pantalla principal del programa Gephi, la llamada vista general" caption="Figura 2. Pantalla principal de Gephi, la Vista general" %} - -
    -Si te aparece el programa en inglés te recomiendo cambiar el idioma, pues esta lección se ha preparado con Gephi en español. Puedes hacerlo fácilmente en Tools > Language > Español. Te indicará que el programa debe cerrarse y que deberás reiniciarlo manualmente, es decir, volver a abrirlo. No es necesario que guardes nada si aún no has importando ningún dato. -
    - -En la pestaña **Vista general**, se crean las visualizaciones y se aplican los filtros y medidas para analizar los grafos. En **Laboratorio de datos** se trabaja con los datos que generan los grafos, pudiéndose importar o introducir directamente, modificar y exportar. En el apartado de **Previsualización** se realizan los últimos ajustes para generar y exportar las visualizaciones (grafos) en formato de imagen `.svg`, `.pdf` o `.png`. - -Comencemos a trabajar: -1. En la barra de opciones superior, haz clic en **Espacio de trabajo** > **Nuevo** para crear un nuevo espacio de trabajo. -2. Renombra los dos espacios creados. Dentro de cada espacio, has clic en **Espacio de trabajo** > _Renombrar_. Denomina al primero 'Coaparición en escena', y al segundo, 'Interacción lingüística'. -3. Guarda el proyecto en **Archivo** > _Guardar como_, y denomínalo `bizarrias.gephi`. - -### El laboratorio de datos: importación de aristas y nodos - -Ahora vamos a importar nuestros datos. Lo haremos en paralelo con los dos grafos, pues te ayudará a no perderte. Primero las aristas del grafo de coaparición de personajes en escena: -1\. En el espacio de trabajo 'Coaparición en escena', dirígete al **Laboratorio de datos** y haz clic en _Importar hoja de cálculo_. -2\. Busca y selecciona el archivo `aristas-coaparicion_bizarrias.csv` y haz clic en _Abrir_. -3\. Se abrirá una primera ventana de **Opciones generales de CSV**. Seguramente Gephi ha detectado que se trata de una tabla de aristas, que el separador es la coma y que el formato de codificación de caracterse es UTF-8. Si no, selecciona estas opciones en los desplegables y haz clic en _Siguiente_. - -{% include figure.html filename="es-or-analisis-redes-sociales-teatro-2-03.png" alt="Captura de pantalla de la ventana de importación de hojas de cálculo con las opciones generales de importación desde archivos CSV para la lista de aristas" caption="Figura 3. Ventana de importación de hojas de cálculo con las opciones generales para la lista de aristas" %} - -4\. En la siguiente ventana, **Parámetros de importación**, deja seleccionadas todas las casillas, pues queremos importar nuestras cinco columnas. Gephi reconoce el tipo de datos: `double` (números) para el peso y `string` (cadena de caracteres) para las etiquetas. Haz clic en _Terminar_. -5\. Ahora te aparecerá la última ventana del proceso: el **Informe de importación**. Verás que Gephi ha detectado que se trata de un grafo 'no dirigido' con 11 nodos y 42 aristas, y que no encuentra ningún problema en el archivo. Muy importante: cambia la selección de **Nuevo espacio de trabajo** a **Añadir al espacio de trabajo existente**. Queremos que nos importe los datos en el espacio en el que estamos trabajando, **Coaparición en escena**. Cuando lo hagas, haz clic en _Aceptar_. - -{% include figure.html filename="es-or-analisis-redes-sociales-teatro-2-04.png" alt="Captura de pantalla del informe de importación de una lista de aristas, con opciones finales como seleccionar el tipo de grafo o en qué espacio de trabajo se quiere realizar la importación" caption="Figura 4. Ventana con el informe de importación de la lista de aristas" %} - -Verás que ha aparecido una tabla con los `id` de los personajes en la pestaña **Nodos** y una tabla con las relaciones en la pestaña **Aristas**. Gephi ha extraido esta información de nuestra lista de aristas, asignando además un `id` a cada arista. - -Ahora vamos a importar las aristas del grafo de interacciones lingüísticas directas, siguiendo los mismos pasos: -1. Dentro del espacio de trabajo **Interacción lingüística** dirígete al **Laboratorio de datos** y haz clic en _Importar hoja de cálculo_. -2. Busca y selecciona el archivo `aristas-interaccion_bizarrias.csv` y haz clic en _Abrir_. -3. Se abrirá una primera ventana de **Opciones generales de CSV**. Seguramente Gephi ha detectado que se trata de una matriz, que el separador es la coma y que el formato de codificación de caracterse es UTF-8. Si no, selecciona estas opciones en los desplegables y haz clic en _Siguiente_. -4. En la siguiente ventana, **Parámetros de importación**, simplemente haz clic en _Terminar_. Ahora no hay columnas entre las que poder elegir. -5. Por último te aparecerá la ventana **Informe de importación**. Verás que Gephi ha detectado que se trata de un grafo 'dirigido' con 11 nodos y 51 aristas, y que no encuentra ningún problema en el archivo. Muy importante: cambia la selección de **Nuevo espacio de trabajo** a **Añadir al espacio de trabajo existente**. Como antes, queremos que nos importe los datos en el espacio en el que estamos trabajando, **Interacción lingüística**. Cuando lo hagas, haz clic en _Aceptar_. - -Gephi ha importado nuestra matriz y la ha transformado en una lista de aristas con un nodo de origen, otro de destino, un tipo de relación, un peso y un `id`. Además, ha creado 11 nodos utilizando como etiqueta el `id` numérico que les asignamos. - -En la nueva lista de aristas importada, que puedes ver en la pestaña **Aristas** del **Laboratorio de datos**, verás que nos faltan los atributos (‘Label’, etiqueta) que sí pudimos importar en en el grafo de coaparición en escena, pues venían ya en nuestro archivo CSV. Nos faltan las relaciones entre los personajes: amor correspondido, amistad, servidumbre, etc. Para poder visualizarlas en este grafo tendremos que introducirlas manualmente en la columna correspondiente (’Label’, etiqueta). Puedes coger esta información de la lista de aristas del grafo no dirigido, teniendo en cuenta que ahora las relaciones están duplicadas y también tendrás, por tanto, que duplicar las etiquetas. Es decir, etiqueta como `amor correspondido` la relación de Belisa (nodo 1) a Don Juan (nodo 6) y también un `amor correspondido` de Don Juan (nodo 6) a Belisa (nodo 1). Y una relación de `amistad` de Belisa (nodo 1) a Celia (nodo 3) y otra relación de `amistad` de Celia (nodo 3) a Belisa (nodo 1). Cuando termines, tu lista de aristas dirigidas debería verse así: - -{% include figure.html filename="es-or-analisis-redes-sociales-teatro-2-05.png" alt="Captura de pantalla del laboratorio de datos en la pestaña de aristas, ya con los todos los datos introducidos" caption="Figura 5. Pestaña de aristas después de introducir manualmente las etiquetas de las relaciones" %} - -Con las aristas preparadas, ahora vamos a importar los datos referentes a los nodos de los dos grafos. Los pasos ahora son exactamente los mismos para los dos grafos, así que hazlo primero en un espacio de trabajo y luego en el otro: - -1. Dentro del **Laboratorio de datos** de cada espacio de trabajo vuelve a hacer clic en _Importar hoja de cálculo_. -2. Ahora busca y selecciona el archivo [`nodos_bizarrias-csv`](/assets/analisis-redes-sociales-teatro-1/nodos_bizarrias.csv) y haz clic en _Abrir_. -3. En esta ocasión Gephi habrá detectado que se trata de una 'tabla de nodos', que nuevamente el separador es la coma y que la codificación de caracteres es UTF-8. Si no, selecciona estas opciones en los desplegables y haz clic en _Siguiente_. -4. En la ventana **Parámetros de importación**, mantén seleccionadas todas las casillas; queremos que importe las cuatro columnas. Ahora ha detectado que tanto la columna `género` como `función` son cadenas de caracteres. Haz clic en _Terminar_. -5. En la última ventana, **Informe de importación**, cerciórate que de que ha identificado 11 nodos y que no hay problemas en la importación. En el desplegable referente al tipo de grafo, selecciona **No dirigido** o **Dirigido** en función del grafo al que estés importando los nodos. Importante: cambia una vez más la opción de **Nuevo espacio de trabajo** a **Añadir al espacio de trabajo existente**. Después, haz clic en _Aceptar_. - -{% include figure.html filename="es-or-analisis-redes-sociales-teatro-2-06.png" alt="Captura de pantalla de la ventana con el informe de importación de la lista de nodos" caption="Figura 6. Ventana con el informe de importación de la lista de nodos del grafo de coaparición de personajes en escena" %} - -Gephi ha importado la lista de nodos y ha combinado la nueva información con los nodos que creó antes a partir de la lista de aristas o la matriz de adyacencia. Este es el motivo por el que era importante sustituir los nombres de los personaje por su `id` antes de exportar las hojas de cálculo a CSV. Así, Gephi ha podido identificar quién es quién y fusionar los datos de ambos archivos. - -¡Enhorabuena! Hemos terminado la importación de los datos de los dos grafos, ahora podemos pasar a trabajar en la pestaña **Vista general**. - -### La vista general -La **Vista general** es donde modificaremos la visualización de nuestros grafos (que se ve en el centro del programa) y donde aplicaremos las medidas y métricas de análisis. A la izquierda tienes las opciones de visualización (los paneles **Apariencia** y **Distribución**), y a la derecha están el panel con información sobre el grafo (**Contexto**) y los paneles **Filtros** y **Estadísticas** para consultar y analizar el grafo: - -{% include figure.html filename="es-or-analisis-redes-sociales-teatro-2-07.png" alt="Captura de pantalla de la vista general del espacio de trabajo con una primera visualización del grafo, aún sin cambiar parámetros de visualización" caption="Figura 7. Vista general de nuestro espacio de trabajo" %} - -Las opciones de visualización y análisis son muy numerosas y no las cubriremos todas en esta lección, así que para explorar e introducirnos en Gephi vamos a crear una visualización sencilla y aplicar solo algunas medidas básicas. A partir de ahora todos los pasos que des en un espacio de trabajo puedes replicarlos en el otro. Así, repetir los mismos pasos dos veces te servirá además para aprender a usar el programa. Después, te animo a continuar probando todas las demás opciones y configuraciones por tu cuenta. - -#### Modificar la apariencia y distribución del grafo - -En el centro de la **Vista general**, en el panel llamado **Grafo**, nos ha tenido que aparecer una red con nodos y aristas en negro. Seguramente, el grafo de la captura de arriba (es el de coaparición en escena) no es exactamente igual al que te ha aparecido a ti. Es normal, se ha generado con una distribución de nodos aleatoria. Comencemos a dar forma y color a nuestra red de personajes: - -1. Para desenmarañar la red empezaremos por aplicar un 'algoritmo de distribución'. En el panel de abajo a la izquierda, **Distribución** elige el algoritmo `ForceAtlas 2` y modifica estos parámetros: escalado 2500 y activar _Evitar el solapamiento_. Lo demás puedes dejarlo como está por defecto. Haz clic en _Ejecutar_ y cuando el grafo se estabilice y deje de moverse, haz clic en _Parar_. ¿Qué ha ocurrido? Los nodos han comenzado a repelerse (alejarse) entre ellos a la vez que las aristas que los conectan los han intentado atraer. Así, se ha generado un movimiento que ha terminado convergiendo en una posición balanceada para cada nodo en la que aquellos personajes más conectados entre sí han quedado más cerca y los menos conectados más alejados. El objetivo de este algoritmo de distribución no es otro que colocar los nodos de forma que nos ayude a entender e interpretar mejor el grafo [^4]. Además de `ForceAtlas 2` existen otros algoritmos, como puedes comprobar en el desplegable, pero este nos ofrece buenos resultados y es uno de los más extendidos. -2. Ahora haz clic en el icono 'T' negro que se encuentra en la cinta de opciones inferior, a la derecha de la cámara fotográfica, en la parte inferior del panel del Grafo. Has activado las etiquetas (label) de los nodos, es decir, los nombres de los personajes. Puedes modificar el tamaño, tipografía y color en el resto de opciones de la cinta. -3. Vamos a modificar ahora el color y el tamaño de los nodos y aristas. Para ello, ve al panel **Apariencia** (arriba a la izquierda) y sigue estas indicaciones: -a. En **Nodos-Color** (icono de la paleta de pintura), selecciona **Partición** y escoge el atributo `Función`. Gephi asigna un color distinto a cada valor del atributo, puedes modificar la paleta de colores o dejar los colores por defecto y hacer clic en _Aplicar_. Los nodos del grafo se han coloreado y también lo han hecho las aristas. Ve a la cinta de opciones inferior y deselecciona la opción **Las aristas tienen el color del nodo de origen**, su icono es una línea con un arcoiris. Ahora las aristas serán todas de un mismo color gris. -b. En **Nodos-Tamaño** (icono de los círculos), selecciona **Ranking** y escoge el atributo `Grado` (Gephi calcula automáticamente el grado de los nodos). Cambia el tamaño mínimo a 10 y el máximo a 40 y haz clic en _Aplicar_. Ahora los nodos tienen un tamaño relativo a su grado, es decir, a la cantidad de nodos con los que están relacionados. A mayor número de personajes con los que comparte escena un personaje -> mayor grado del nodo que representa el personaje -> mayor diámetro del nodo en la visualización. -c. En **Aristas-Color** (icono de la paleta de pintura), selecciona **Ranking** y escoge el atributo `Peso`. Te aparecerá un gradiente de color. Puedes cambiar la paleta de colores o dejarlo en verde y hacer clic en _Aplicar_. Ahora el color de las aristas está más o menos intenso en función de su peso, es decir, del número de escenas que comparten dos los personajes o de sus interacciones lingüísticas. Si las ves muy finas, puedes cambiar el tamaño de las aristas en la cinta de opciones inferior, están por defecto más o menos gruesas también según el peso. - -Seguramente te ha quedado algo muy similar esto en el caso del grafo de coaparición de personajes en escena: - -{% include figure.html filename="es-or-analisis-redes-sociales-teatro-2-08.png" alt="Captura de pantalla de la vista general del espacio de trabajo con la visualización del grafo una vez aplicados los parámetros de visualización escogidos" caption="Figura 8. Visualización del grafo de coaparición de personajes en escena, resultado de aplicar los parámetros indicados" %} - -¡Enhorabuena! Ahora puedes ver cuáles son los personajes más relacionados (`grado`) por el tamaño de los nodos, la `función` de estos personajes por el color de los nodos y la cantidad de veces que dos personajes coinciden en escena o interactúan entre ellos (`peso`) por el grosor y la intensidad de color de sus aristas. Si comparas la captura con tu vista del grafo de coaparición en escena puede que tu grafo tenga otra disposición. En realidad tus nodos y los míos están colocados en el mismo sitio y a la misma distancia, solo que están rotados en otro sentido. En el panel de **Distribución** puedes utilizar la opción **Rotar** (en el desplegable) y buscar una disposición que te guste más. No cambiará la distribución que creó el algoritmo `ForceAtlas 2`. Otras opciones que puedes explorar son **Contracción** y **Expansión**, o **Ajuste de etiquetas** si alguna está superpuesta. - -Una vez repitas los pasos también en el espacio de trabajo del grafo de interacciones lingüísticas y hayas modificado su apariencia verás que en este caso las aristas tienen flechas que nos indican la dirección de las relaciones, se trata de un grafo dirigido: - -{% include figure.html filename="es-or-analisis-redes-sociales-teatro-2-09.png" alt="Captura de pantalla de la vista general del espacio de trabajo con la visualización del grafo una vez aplicados los parámetros de visualización escogidos" caption="Figura 9. Visualización del grafo de interacciones lingüísticas entre personajes, resultado de aplicar los parámetros indicados" %} - -También puedes activar las etiquetas de las aristas, haciendo clic en la 'T' blanca en la cinta de opciones de debajo del grafo. El color de las etiquetas y su tamaño deberás modificarlo en **Apariencia**, en la pestaña **Aristas-A subrayada** (color) y en la pestaña **Aristas-tT** (tamaño): - -{% include figure.html filename="es-or-analisis-redes-sociales-teatro-2-10.png" alt="Captura de pantalla de la vista general del espacio de trabajo con la visualización del grafo según los parámetros escogidos y con las etiquetas de las aristas visibles" caption="Figura 10. Visualización del grafo de coaparición de personajes en escena con las etiqutas de las aristas activadas" %} - -#### El contexto y los filtros - -Nos quedan por explorar los paneles de configuración de la derecha. El de **Contexto** nos da información sobre grafo en pantalla. Por ejemplo, en el de interacciones lingüísticas nos dice que se trata de un 'grafo dirigido' con 11 nodos y 51 aristas. - -Vamos a probar los filtros, por ejemplo, filtrando cualquiera de los grafos según el género de los personajes: -1. En el panel **Filtros**, despliega las carpetas **Atributos** y **Partición** (dentro de la primera). -2. Selecciona el atributo `género (Nodo)` y arrástralo al panel de **Consultas**. -3. Haz clic en _Mujer (45,45 %)_ y en _Filtrar_. - -Verás algo similar a esto, un grafo solo con los personajes clasificados por ti como **Mujer**: - -{% include figure.html filename="es-or-analisis-redes-sociales-teatro-2-11.png" alt="Captura de pantalla de la vista general del espacio de trabajo con el resultado de filtrar el grafo según el atributo 'mujer'" caption="Figura 11. Grafo resultante de filtrar por el atributo 'Mujer'" %} - -Puedes hacer lo mismo con los personajes **Hombre** o utilizar otro atributo para el filtrado, como la función de los personajes. Con cada filtro que apliques verás que la información del **Contexto** cambia. Para volver atrás, elimina el filtro con el botón derecho _Suprimir_ sobre el filtro o haciendo clic en _Restaurar_. - -#### Medidas, métricas y algoritmos de análisis - -Ahora vamos a aplicar algunas medidas en el panel **Estadísticas**. Te dejaré explicaciones de cada una. Gephi ha simplificado al máximo el análisis de los grafos, pues es tan fácil como hacer clic en _Ejecutar_ en la medida o algoritmo que queramos implementar. Algunas de estas medidas abriran una ventana emergente al ejecutarlas, un pequeño informe que podemos descargar u opciones de configuración. Otras, simplemente añadirán columnas en nuestra tabla de nodos del **Laboratorio de datos**. Estos nuevos datos, generados gracias a la aplicación de medidas, nos dan más información sobre nuestro grafo, nos permiten modificar la visualización en base a ellos (son como nuevos atributos) y exportándolos podremos procesarlos en otra herramienta o programa. En esta lección no nos adentraremos ahí, pero quiero que sepas que a partir de aquí las posibilidades se multiplican. - -En el apartado **Visión general de la red** lo primero que encontramos es el ['grado medio'](https://perma.cc/M8B7-34LD), es decir, la media de los grados de todos los nodos del grafo. Recordemos que el grado es el número de nodos con los que un nodo está conectado. En el caso de los grafos dirigidos, obtendremos además el 'grado medio de entrada' y el 'grado medio de salida'. Después, el 'grado medio con pesos', que tiene en cuenta el peso de las aristas conectadas a un nodo y no simplemente la cantidad nodos con los que se conecta. De nuevo, habrá un 'grado medio con pesos de entrada' y un 'grado medio con pesos de salida'. Al ejecutar estas dos estadísticas, se añadirán dos columnas nuevas en la tabla de nodos del **Laboratorio de datos** con los valores de grado y grado con peso de cada nodo: - -{% include figure.html filename="es-or-analisis-redes-sociales-teatro-2-12.png" alt="Captura de pantalla del laboratorio de datos del grafo de interacciones lingüísticas con columnas resultantantes de aplicar las medidas de grado" caption="Figura 12. Laboratorio de datos del grafo de interacciones lingüísticas con las nuevas columnas de grado" %} - -El 'diámetro de la red' es una de las medidas de tamaño o distancia. Para entenderlo, primero has de saber que en análisis de redes se entiende por 'camino' una secuencia de nodos conectados por aristas. Esta noción de camino nos permite calcular las métricas de distancia y tamaño de la red. Por otro lado, se entiende por ['distancia'](https://perma.cc/YYA3-ZLG9) o 'longitud' de un camino el número de aristas (no de nodos) que deben cruzarse para ir de un nodo a otro (siempre por el camino más corto). El ['diámetro'](https://perma.cc/2EU8-J4ZR) es, entonces, la distancia entre los nodos más alejados de una red: - -{% include figure.html filename="es-or-analisis-redes-sociales-teatro-2-13.png" alt="Grafo explicativo del concepto 'diámetro', con las aristas que sirven para medir el diámetro coloreadas" caption="Figura 13. Ejemplo del diámetro de una red" %} - -Haz clic en _Ejecutar_ el diámetro: -1. En la ventana que se ha abierto encontrarás definiciones de las métricas de distancia: distancia media, diámetro y las medidas de centralidad de intermediación, cercanía y excentricidad. Al ejecutar esta función, no solo se calcula el diámetro sino todas esas métricas relacionadas con la distancia. -2. Gephi te permite normalizar las centralidades (ahora veremos lo que son) en un rango [0,1], lo que facilita después la comparación de grafos de obras distintas. Marca esta opción y haz clic en _Aceptar_. - -{% include figure.html filename="es-or-analisis-redes-sociales-teatro-2-14.png" alt="Captura de pantalla de la ventana de parámetros que se abre para ejecutar las medidas de distancia de un grafo" caption="Figura 14. Ventana de parámetros de distancia del grafo de coaparición de personajes en escena" %} - -Si comparas el diámetro de los dos grafos verás que hay diferencias: en uno es 2 y en el otro 4. Es normal la diferencia, nos habla de que hay personajes que comparten escena pero que no interactúan entre ellos. - -Si te diriges al **Laboratorio de datos**, verás que se han añadido varias columnas más en la tabla de nodos, ahora con los resultados de las 'medidas de centralidad'. La 'centralidad' en ARS tiene que ver con el lugar que ocupan los nodos en el conjunto de una red y nos ayuda a entender la 'importancia' de los nodos dentro del sistema que analizamos[^5]. Estas son algunas de las medidas de centralidad, pero hay unas cuantas más: -- El 'grado' o el 'grado con pesos' pueden ser medidas de centralidad, pues valores más altos indican mayor conectividad. En ese caso, nos referimos a ellas como ['centralidad de grado'](https://perma.cc/2SW2-LZT4) (degree centrality) y 'centralidad de grado con pesos' (weighted degree centrality). -- La ['centralidad de cercanía'](https://perma.cc/7E9Y-CH68) (closeness centrality) de un nodo se obtiene midiendo la distancia media que guarda dicho nodo con todos los demás del grafo. Dicho de otra forma, nos ayuda a encontrar el nodo más cercano a todos los demás, que no tiene por qué ser el de mayor grado (el más conectado). -- La ['centralidad de intermediación'](https://perma.cc/5YSB-9KVX) (betweenness centrality) de un nodo se halla calculando la cantidad de veces que dicho nodo se encuentra en el camino más corto entre todos los otros nodos. La importancia de los nodos depende, en este caso, de su labor de intermediación, de puente conector entre nodos separados. Si faltan estos nodos, la estructura de un grafo suele verse muy afectada. - -Por ejemplo, en la comedia con la que estamos trabajando, *Las bizarrías de Belisa*, ningún personaje tiene una centralidad de intermediación normalizada demasiado alta. No hay ningún nodo que eliminándolo provoque un 'grafo disconexo' en el que ciertos nodos queden desconectados del núcleo principal. - -Siguiendo en el panel de **Estadísticas** nos encontramos la **Densidad**. La ['densidad'](https://perma.cc/E5C7-XVX8) mide el nivel de conectividad entre todos los nodos de un grafo. Por ejemplo, un grafo tendría una densidad del 100% cuando todos los nodos están conectados entre sí. Matemáticamente la densidad se calcula a través de la proporción de aristas que tiene una red frente al total de aristas posibles, expresado el resultado en un rango [0,1]: cerca de 1 se dice que es un grafo 'denso'; cuanto más cerca de 0 se habla de un grafo 'disperso'. Haz clic en _Ejecutar_: -1. Se abrirá una ventana que nos permite elegir seleccionar si nuestro grafo es dirigido o no dirigido. -2. Selecciona tu opción haz clic en _Aceptar_. - -Nuevamente, hay diferencia entre la densidad del grafo de coaparición en escena y la del grafo de interacciones lingüísticas por el mismo motivo: hay personajes que comparten escena pero que no intercambian palabra. - -Vamos a saltar ahora al apartado **Community Detection**. En ARS se entiende por ['comunidad'](https://perma.cc/CJ23-HB7M) un grupo de nodos que están densamente interconectados entre sí y que a su vez están poco conectados con los nodos de otra comunidad: - -{% include figure.html filename="es-or-analisis-redes-sociales-teatro-2-15.png" alt="Grafo explicativo del concepto 'comunidad' con los nodos coloreados según la comunidad a la que pertenecen" caption="Figura 15. Ejemplo de grafo con comunidades coloreadas en dos colores distintos" %} - -Las distintas comunidades de un grafo se hayan implementando un ’algoritmo de [modularidad](https://perma.cc/PY99-MBVB)’ que Gephi incorpora, que podemos utilizar simplemente haciendo clic en _Ejecutar_. -1. Se abrirá una ventana de **Parámetro de Modularid**. No es necesario que modifiques nada: utiliza la opción de aleatoriedad y de incorporar los pesos de las aristas, y deja la resolución en 1 (modularidad estándar). -2. El algoritmo va a numerar las comunidades a partir del 0, pero si quieres que comience a contar en 1, simplemente cambia la opción **Classes start at: 1** y dale a _Aceptar_. - -Si implementas el algoritmo de modularidad en el grafo de interacciones lingüísticas directas comprobarás que se detectan tres comunidades de nodos. Puedes ver qué comunidad ha sido asignada a cada nodo en la nueva columna del **Laboratorio de datos**. Para visualizar las comunidades en el grafo, ve al panel **Apariencia** de la **Vista general** y cambia el color de los nodos eligiendo la partición **Modularity Class**, haciendo clic en _Aplicar_ con los colores por defecto o modificándolos. Debería quedarte un grafo similar a este: - -{% include figure.html filename="es-or-analisis-redes-sociales-teatro-2-16.png" alt="Captura de pantalla de la vista general del espacio de tabajo con la visualización del grafo de interacciones lingüísticas con los nodos coloreados según la comunidad a la que pertenecen: morado, verde o naranja" caption="Figura 16. Grafo de interacciones lingüísticas con los nodos coloreados según la comunidad a la que pertenecen, detectadas gracias al algoritmo de modiularidad" %} - -Cuando has desplegado el menú de **Partición** en el color de los nodos habrás visto que han aparecido muchas más opciones de las que teníamos al principio, y es que puedes utilizar los resultados de las medidas que has ido implementando para colorear y dar tamaño a los nodos y aristas. Por ejemplo, utilizando la opción **Ranking** puedes poner el diámetro de los nodos en función de su centralidad de intermediación y el color graduado en intensidad según su grado. Esto te permitiría a golpe de vista comparar la diferencia entre ambas medidas para cada nodo. ¿Ves cómo las opciones se multiplican? - -### La previsualización: últimos ajustes y exportación de visualizaciones - -Para finalizar con el trabajo en Gephi, vamos a exportar alguna visualización en la pestaña de **Previsualización**. Al entrar, verás un panel grande en gris vacío: es donde aparecerá el grafo una vez introduzcas los parámetros en el panel de configuración de la izquierda. Haz una prueba: entra a la previsualización del espacio de trabajo **Coaparición en escena**, haz clic en _Refrescar_ y mira cómo se ve tu grafo con los parámetros que vienen por defecto. Estarás viendo el mismo grafo de la **Vista general** pero con algunos ajustes de visualización. Ahora modifica estos parámetros y deja el resto como están por defecto: -- Nodos: - - Ancho de borde: 0.0 -- Etiquetas de nodos: - - Mostrar etiqueta: activado - - Fuente: Arial 24 Sin Formato - - Tamaño proporcional: desactivado -- Aristas: - - Grosor: 20 - - Reescalar pesos: activado - - Color: original (es decir, el gradiente que pusimos en la vista general) -- Etiquetas de aristas - - Mostrar etiquetas: activado - - Fuente: Arial 14 Sin Formato - - Color: específico: #000000 - -Haz clic en _Refrescar_ de nuevo y debería aparecerte un grafo similar a este, quizá con otra rotación: - -{% include figure.html filename="es-or-analisis-redes-sociales-teatro-2-17.png" alt="Captura de pantalla de la pestaña de previsualización, con la columna de opciones finales de visualización a la izquierda y el grafo resultante a la derecha" caption="Figura 17. Visualización final del grafo de coaparición de personajes en escena" %} - -Ahora puedes exportar la visualización hacienco clic en _Exportar SVG/PDF/PNG_ en la parte inferior del panel de la izquierda. Como bien deduces, esos son los tres formatos que permite exportar Gephi. [PNG](https://perma.cc/3CAF-NZTD) es un buen formato de imagen, y podrás insertarlo en un documento de texto, utilizarlo para crear un póster o una presentación de diapositivas. Si seleccionas en el desplegable `Files of type` la opción `Archivos PNG (*.png)` y accedes al menú de **Opciones**, Gephi te permitirá configurar la resolución de la imagen, el margen alrededor del grafo y si quieres fondo transparente o no. - -{% include figure.html filename="es-or-analisis-redes-sociales-teatro-2-18.png" alt="Captura de pantalla de las ventanas del menú de exportación de visualizaciones" caption="Figura 18. Menú de exportación de visualizaciones" %} - -Otra buena opción es exportar en [SVG](https://perma.cc/EBJ4-C2KZ), el formato de gráficos vectoriales escalables que se suele utilizar en diseño gráfico, ya que son manipulables por ejemplo con [CSS](https://perma.cc/6M8D-Q4MS) y [JavaScript](https://perma.cc/2M3K-JRT8). Si quieres utilizar tus visualizaciones en un sitio web, puede que este formato sea el que más te convenga. Además, este formato lo puedes abrir y editar con programas de código abierto como [Inkscape](https://inkscape.org/es/) o [LibreOffice Draw](https://es.libreoffice.org/descubre/draw/) o privativos como [Adoble Illustrator](https://www.adobe.com/es/products/illustrator.html). - -Si repites lo mismo con el grafo de interacción lingüística directa ahora podrás seleccionar si quieres aristas curvas (que marcan la dirección en el sentido de las agujas de un reloj) o rectas con flechas. Por ejemplo, reutiliza los parámetros anteriores y modifica estos: -- Aristas: - - Curvas: desactivado -- Flechas de aristas: - - Tamaño: 3.0 -- Etiquetas de aristas: - - Mostrar etiquetas: desactivado - -Haz clic en _Refrescar_ y verás algo así (con los nodos coloreados según su comunidad porque antes aplicamos este cambio en la vista general): - -{% include figure.html filename="es-or-analisis-redes-sociales-teatro-2-19.png" alt="Captura de pantalla de la pestaña de previsualización, con la columna de opciones finales de visualización a la izquierda y el grafo resultante a la derecha" caption="Figura 19. Visualización final del grafo de interacciones lingüísticas entre personjes" %} - -## Paso 4. Interpretación de los resultados - -Hemos generado visualizaciones y aplicado medidas a los grafos construidos gracias a los datos que primero extrajimos de *Las bizarrías de Belisa*. Las visualizaciones ya nos pueden ayudar en el análisis de una obra, por ejemplo, ilustrando un análisis de los personajes más 'tradicional'. Pero si has llegado hasta aquí seguramente lo que te interesa es tener en consideración los datos obtenidos de la aplicación de medidas, métricas y algoritmos. - -Primero creo que es necesario incidir en que los datos obtenidos de un análisis de redes sociales como el que hemos llevado a cabo deben analizarse cuidadosamente y no utilizarse para confirmar hipótesis sin una valoración crítica. En realidad, todo el proceso que has llevado a cabo, desde la elección del corpus hasta la creación de visualizaciones, debe considerarse parte del proceso crítico de investigación. Piensa, por ejemplo, en la tediosa extracción de datos y todas las decisiones interpretativas que has tomado. ¡Cualquier otra decisión variaría los resultados! Por eso debes insistir en ser consistente con el procedimiento y criterios de análisis que elijas, y comunicarlos con detalle para contextualizar tus resultados. - -Vamos entonces a explorar los datos y grafos obtenidos de nuestro análisis de redes sociales de *Las bizarrías de Belisa*. Mi primera recomendación es que, después de aplicar las medidas y algoritmos que te interesen, vayas al **Laboratorio de datos** y hagas clic en _Exportar tabla_ para exportar la tabla de nodos pero ahora con las nuevas columnas agregadas con más datos sobre los personajes. Gracias a este CSV podrás procesar los resultados cómodamente con lenguajes de programación como [R](https://perma.cc/7ESJ-S5K4) (enfocado al análisis estadístico) o [Python](https://perma.cc/BT4G-U7FE), o incluso con el mismo programa de hojas de cálculo que utilizaste para recoger tus datos. - -Hagamos esto último. Abre un nuevo archivo de hojas de cálculo e importa la tabla de nodos CSV del grafo de interacción lingüística que acabas de exportar de Gephi. Puedes llamar a este nuevo archivo `analisis-datos_Bizarrias`. ¿Qué podemos hacer ahora? Primero analicemos el grado de los personajes que, recordemos, cuantifica lo conectado que está un nodo con el resto de nodos de la red social. Los nodos además de 'grado' (a secas) también tienen 'grado con peso'. El primero tiene que ver con el número de personajes con los que habla un nodo (en un sentido y otro) y el segundo tiene en cuenta además la cantidad de interacciones. Fijémonos en las diferencias entre una y otra medida, observando estos gráficos generados en la hoja de cálculo mediante las opciones que ofrece Google Sheets: - -{% include figure.html filename="es-or-analisis-redes-sociales-teatro-2-20.png" alt="Gráficos de barras verticales con los gafos y grados con pesos de los personajes de la comedia analizada, ordenados de mayor a menor grado" caption="Figura 20. Grados y grados con pesos de los personajes de 'Las bizarrías de Belisa' según sus interacciones lingüísticas directas" %} - -Don Juan ha resultado ser el personaje que más interactúa, logrando el grado más alto de toda la red social (15) y superando a Belisa por un punto, la indiscutible protagonista femenina que incluso da nombre a la comedia. ¿Por qué? Si vamos a nuestro grafo podremos ver cómo Don Juan interactúa con Octavio y Julio, mientras que Belisa, aunque se enfrenta a ellos vestida de hombre y con espada, no cruza palabra durante dicho enfrentamiento. Sin embargo, si vemos los datos del grafo de coaparición en escena, son Belisa y su criada Finea quienes logran el grado más alto, convirtiéndose en los dos únicos personajes de la comedia que comparten escena al menos una vez con todos los demás personajes (por eso su grado es 10). Pero recordemos, compartir escena no significa necesariamente compartir diálogo, como nos demuestra el grafo dirigido. ¿Y en cuanto al grado con peso? Si volvemos al gráfico de barras, ahora sí Belisa logra la primera posición, y supera con creces a Don Juan. Su grado con peso es 318, es decir, se dirige 157 veces a otros personajes y es receptora de 161 intervenciones. Como vemos, en función de qué nos interese estudiar de un texto teatral, puede interesarnos más un criterio de análisis u otro. - -Veamos por último un dato global de los grafos: su densidad. El grafo de coaparición en escena (no dirigido) tiene una densidad de 0,764, mientras que el de interacción lingüística alcanza tan solo 0,464. ¿Qué nos aporta esta información? *Las bizarrías de Belisa* se trata de una comedia bastante densa en cuanto a la coaparición de personajes en escena (cuanto más cerca de 1, mayor densidad). Son pocos personajes, tan solo diez, y la configuración de la acción genera que compartan muchas escenas. Lope escribió una comedia urbana del gusto de la época, alejado ya de sus primeras incursiones al género en las que el reparto superaba los 20 personajes y las acciones estaban más dispersas. Sin embargo, la densidad del grafo dirigido no llega al medio punto, lo que nos demuestra que aunque los personajes coinciden en escena, no significa que necesariamente dialoguen. La diferencia entre la densidad de los dos tipos de grafo en esta comedia podemos explicarla principalmente por la situación particular de Octavio, galán rival de don Juan (por ser pretendiente también de Lucinda, la segunda dama). Aunque sabemos que Octavio visita a Lucinda (le vemos salir de su casa), esta pareja nunca interactúa en el escenario. Es una situación quizá algo atípica pero que entendemos por el desdoblamiento de galanes rivales: don Juan y el Conde pretenden a Belisa, y don Juan y Octavio pretenden a Lucinda. Dado que la acción amorosa principal es la de Belisa, Lope no dedica demasiados versos al desarrollo de la relación entre Octavio y Lucinda. - -No podemos explorar todos los resultados del análisis practicado sobre *Las bizarrías de Belisa*, así que sirva lo dicho para comprender el tipo de conclusiones a las que nos llevan los datos y grafos generados. Por último, apuntar las posibilidades del análisis comparado de redes sociales, es decir, a partir de un corpus de dos o más obras. Por ejemplo, este es un gráfico en el que se compara el grado con pesos normalizado (sobre 1) de los primeros galanes y primeras damas de ocho comedias urbanas de Lope de Vega (en orden cronológico), entre las que se incluye la que hemos utilizado en esta lección: - -{% include figure.html filename="es-or-analisis-redes-sociales-teatro-2-21.png" alt="Diagrama de dispersión de puntos con líneas de tendencia comparando el grado con pesos normalizado de los primeros galanes y primeras damas de ocho comedias urbanas de Lope de Vega" caption="Figura 21. Gráfico comparativo del grado con pesos normalizado de los primeros galanes y primeras damas de ocho comedias urbanas de Lope de Vega (elaboración propia, Merino Recalde (2022)" %} - -## Recapitulación final - -Terminemos esta lección anotando las cuestiones elementales que deberás tener en cuenta cuando realices un análisis de redes sociales de textos teatrales: -1. Divide el proceso en cuatro partes diferenciadas: - a. Creación del corpus - b. Extracción y estructuración de datos - c. Visualizaciones y análisis - d. Interpretación de los resultados (datos y grafos) -2. Documenta el proceso y la toma de decisiones. Sé consistente en ello. Procura basarte siempre en criterios preestablecidos, ya sean provenientes de otras investigaciones que trabajen con el mismo tipo de obras o diseñados por ti en función de tus objetivos y del corpus de análisis. -3. Procura guardar tus datos finales en [formatos abiertos](https://perma.cc/M2XM-DYUZ) que garanticen el acceso a los datos a largo plazo, como el CSV (`.csv`). Si únicamente guardas tus datos en formato excel (`.xlxs`) o en la extensión del propio Gephi (`.gephi`) puede que tu archivo termine corrompiéndose o fallando. Un CSV tiene una vida más larga, es más fácil de preservar y rápidamente puedes importarlo, transformarlo y volver sobre tus datos para reconstruir tus grafos y análisis. -4. Cuando generes visualizaciones anota los parámetros que utilizaste (tamaño de los nodos, colores, algoritmo de distribución, etc.). Es importante que acompañes tus resultados de esta información, pues ayuda a entender y contextualizar las representaciones. - -Y sobre todo, no tengas miedo de probar y explorar todas las posibilidades que nos ofrece el análisis de redes para estudiar la literatura teatral. - -## Notas - -[^1]: Existen otros programas y herramientas de análisis de redes que podemos mencionar. Por ejemplo, [Cytoscape](https://cytoscape.org/) es otro programa de código abierto y libre descarga, muy utilizado en bioinformática. También hay aplicaciones web: [Palladio](http://hdlab.stanford.edu/palladio/), desarrollada por el Humanities+Design Research Lab de la Standford University y pensada para la investigación histórica; o [ONODO](https://onodo.org/), una aplicación muy sencilla que permite crear redes e implementar medidas fácilmente. -[^2]: Esta lección se ha preparado con la versión 0.9.7 de Gephi. En 2022, y tras cinco años sin actualizaciones, se han publicado 5 versiones nuevas corrigiendo errores (bug fixes) y añadiendo mejoras. Por ejemplo, desde la versión 0.9.3 ya no es necesario instalar Java para que Gephi funcione en Windows y Linux, lo que causaba numerosos problemas en Windows. Durante las revisiones de está lección se han publicado las versiones 0.10 y 0.10.1, pero sus actualizaciones no impiden el correcto seguimiento de esta lección. Puedes leer más acerca de las actualizaciones de Gephi en [https://gephi.wordpress.com/2022/05/11/transition-to-semantic-versioning/](https://perma.cc/XPF2-ZKJY) y en [https://github.com/gephi/gephi/releases](https://perma.cc/NQL4-77P2). -[^3]: Por ejemplo, este estupendo videotutorial en 5 partes de Salvador Sánchez, disponible en YouTube: [https://www.youtube.com/playlist?list=PLIvIcfwy1T6IDiW3K10TplK3rvdwMLOb2](https://www.youtube.com/playlist?list=PLIvIcfwy1T6IDiW3K10TplK3rvdwMLOb2). O la *introducción rápida a Gephi* de José Manuel Galán, también en Youtube: [https://www.youtube.com/watch?v=sX5XYec4tWo](https://www.youtube.com/watch?v=sX5XYec4tWo). -[^4]: Si te interesa conocer más sobre cómo funciona `ForceAtlas 2` y sabes inglés, te recomiendo este artículo de sus desarrolladores: Jacomy, Mathieu, Tommaso Venturini, Sebastien Heymann, y Mathieu Bastian. «ForceAtlas2, a Continuous Graph Layout Algorithm for Handy Network Visualization Designed for the Gephi Software». PLoS ONE 9, n.º 6 (2014): e98679. [https://doi.org/10.1371/journal.pone.0098679](https://doi.org/10.1371/journal.pone.0098679). -[^5]: 'Importancia' es un concepto algo complejo. Debemos diferenciar la importancia de los nodos según su centralidad (una importancia cuantitativa derivada del ARS) y la importancia que le otorgamos a los personajes (una importancia cualitativa, por ejemplo: protagonista, secundario, terciario, etc.). La correlación entre estos dos tipos de importancia no siempre se da, como demuestran Santa María Fernández et al. en un estudio de 2020. Te recomiendo este artículo para explorar en profundidad las implicaciones de las medidas de centralidad: Santa María Fernández, Teresa, José Calvo Tello, y Concepción María Jiménez Fernández. «¿Existe correlación entre importancia y centralidad? Evaluación de personajes con redes sociales en obras teatrales de la Edad de Plata». Digital Scholarship in the Humanities 36, n.º June (2020): i81-i88. [https://doi.org/10.1093/llc/fqaa015](https://doi.org/10.1093/llc/fqaa015). +--- +title: "Análisis de redes sociales de personajes teatrales (parte 2)" +slug: analisis-redes-sociales-teatro-2 +layout: lesson +collection: lessons +date: 2023-11-30 +authors: +- David Merino Recalde +reviewers: +- Sara Arribas Colmenar +- Andrés Lombana +editors: +- Jennifer Isasi +review-ticket: https://github.com/programminghistorian/ph-submissions/issues/547 +previous: analisis-redes-sociales-teatro-1 +series_total: 2 lessons +sequence: 2 +difficulty: 2 +activity: analyzing +topics: [network-analysis, distant-reading, data-visualization] +abstract: En esta lección aprenderás a realizar un Análisis de Redes Sociales con los personajes de un texto teatral. Aprenderás sobre la importación de datos a Gephi, la creación de visualizaciones, la implementación de medidas y algoritmos, y el análisis e interpretación de los resultados. +avatar_alt: Recorte de dibujo a pluma de la escenografía usada en la representación de la comedia 'La fiera, el rayo y la piedra' de Pedro Calderón de la Barca en 1690, en el que se puede ver a varios personajes interactuando en escena. +doi: 10.46430/phes0065 +--- + +{% include toc.html %} + +## Introducción a la segunda parte + +Esta es la segunda parte de la lección _Análisis de redes sociales de personajes teatrales_. En la [primera parte](/es/lecciones/analisis-redes-sociales-teatro-1) conocimos algunas de las aplicaciones del análisis de redes sociales (ARS) a los estudios literarios y aprendimos los conceptos y nociones necesarias para enfrentarnos a esta metodología computacional-cuantitativa. Además, establecimos que para llevar a cabo un análisis de redes sociales de personajes teatrales debemos seguir una serie de pasos consecutivos: + + * Paso 1. Creación del corpus de análisis + * Paso 2. Conseguir los datos + * Toma de decisiones para la extracción de datos + * Extracción y estructuración de datos + * El proceso de vaciado + * Paso 4. Visualización y análisis de grafos con Gephi + * Paso 5. Interpretación de los resultados + +Ya hemos visto los pasos 1 y 2, y en esta segunda parte trataremos los dos últimos pasos. Si has seguido la primera parte de la lección cuentas con todos los archivos necesarios para continuar. Si has saltado directamente a la segunda parte porque lo que te interesa es aprender visualización y análisis de grafos con [Gephi](https://gephi.org/), debes descargar ahora los archivos que utilizaremos aquí. En cualquier caso, recomendamos leer la primera parte, pues es importante comprender el proceso de extracción y recogida de datos para poder analizar correctamente los resultados del análisis. ¡Vamos a ello! + +## Paso 3. Visualización y análisis de grafos con Gephi + +Tenemos tres archivos CSV: por un lado, una [lista de nodos](/assets/analisis-redes-sociales-teatro-1/nodos_bizarrias.csv) (`nodos_bizarrias.csv`); por el otro, la [lista de aristas](/assets/analisis-redes-sociales-teatro-1/aristas-coaparicion_bizarrias.csv) de un grafo no dirigido (`aristas-coaparicion_bizarrias.csv`) y la [matriz de adyacencia](/assets/analisis-redes-sociales-teatro-1/aristas-interaccion_bizarrias.csv) de uno dirigido (`aristas-interaccion_bizarrias.csv`), según el criterio de la coaparición de personajes en escena y el de interacciones lingüísticas directas entre personajes, respectivamente. El siguiente paso es generar visualizaciones, los grafos propiamente dichos, y analizarlos aplicando lo que se conoce como 'medidas' o 'métricas' de ARS. + +### Instalación de Gephi y primeros pasos + +El programa que vamos a utilizar para llevar a cabo todo esto se llama [Gephi](https://gephi.org/), pero existen muchos otros para los que también te servirán los archivos CSV que hemos preparado[^1]. Gephi es un software libre de código abierto especializado en análisis de redes, muy conocido y utilizado en Humanidades Digitales, bastante intuitivo, y que es sostenido y actualizado por sus desarrolladores[^2]. Además, disponemos de numerosos [plugins](https://gephi.org/plugins/#/) (complementos de software que añaden funcionalidades al programa), [guías de uso](https://perma.cc/4RFA-TZB9), videotutoriales en español[^3] y una comunidad activa en Twitter/X y Github a la que consultar nuestras dudas. + +Lo primero que debemos hacer es instalar el programa. En su sitio web, [https://gephi.org/](https://gephi.org/), haz clic en _Download FREE_. Está disponible para Windows, Mac OS y Linux. Es posible que la web reconozca tu sistema operativo y te ofrezca lo que necesitas, si no, selecciona en el apartado **All Downloads** de tu sistema operativo. Si necesitas ayuda con la instalación, puedes visitar [https://gephi.org/users/install/](https://perma.cc/YF6E-994N) (está solo disponible en inglés, pero puedes consultar los primeros minutos de este [videotutorial en español](https://www.youtube.com/watch?v=sX5XYec4tWo)). + +Una vez que finalices la instalación, ejecuta Gephi. Se abrirá una ventana de bienvenida con distintas opciones: crear un nuevo proyento, abrir un archivo de grafo ya existente, una columna con proyectos y archivos recientes (si los hubiese) y varios proyectos de ejemplo. Haz clic en _Nuevo proyecto_: + +{% include figure.html filename="es-or-analisis-redes-sociales-teatro-2-01.png" alt="Captura de pantalla de la ventana de bienvenida al programa Gephi, con las opciones de crear un nuevo proyecto, abrir recientes o proyectos de ejemplo" caption=" Figura 1. Ventana de bienvenida de Gephi" %} + +Ahora estás en la pantalla principal del programa. Gephi funciona mediante proyectos (fíjate que te indicará en la barra superior que estás en el **Proyecto 1**), y dentro de cada proyecto puedes crear distintos espacios de trabajo. Ahora estás en el **Espacio de trabajo 1**. Cada espacio de trabajo funciona como la pestaña de un navegador web y contiene a su vez los tres apartados de Gephi: **Vista general**, **Laboratorio de datos** y **Previsualización**. + +{% include figure.html filename="es-or-analisis-redes-sociales-teatro-2-02.png" alt="Captura de pantalla de la pantalla principal del programa Gephi, la llamada vista general" caption="Figura 2. Pantalla principal de Gephi, la Vista general" %} + +
    +Si te aparece el programa en inglés te recomiendo cambiar el idioma, pues esta lección se ha preparado con Gephi en español. Puedes hacerlo fácilmente en Tools > Language > Español. Te indicará que el programa debe cerrarse y que deberás reiniciarlo manualmente, es decir, volver a abrirlo. No es necesario que guardes nada si aún no has importando ningún dato. +
    + +En la pestaña **Vista general**, se crean las visualizaciones y se aplican los filtros y medidas para analizar los grafos. En **Laboratorio de datos** se trabaja con los datos que generan los grafos, pudiéndose importar o introducir directamente, modificar y exportar. En el apartado de **Previsualización** se realizan los últimos ajustes para generar y exportar las visualizaciones (grafos) en formato de imagen `.svg`, `.pdf` o `.png`. + +Comencemos a trabajar: +1. En la barra de opciones superior, haz clic en **Espacio de trabajo** > **Nuevo** para crear un nuevo espacio de trabajo. +2. Renombra los dos espacios creados. Dentro de cada espacio, has clic en **Espacio de trabajo** > _Renombrar_. Denomina al primero 'Coaparición en escena', y al segundo, 'Interacción lingüística'. +3. Guarda el proyecto en **Archivo** > _Guardar como_, y denomínalo `bizarrias.gephi`. + +### El laboratorio de datos: importación de aristas y nodos + +Ahora vamos a importar nuestros datos. Lo haremos en paralelo con los dos grafos, pues te ayudará a no perderte. Primero las aristas del grafo de coaparición de personajes en escena: +1\. En el espacio de trabajo 'Coaparición en escena', dirígete al **Laboratorio de datos** y haz clic en _Importar hoja de cálculo_. +2\. Busca y selecciona el archivo `aristas-coaparicion_bizarrias.csv` y haz clic en _Abrir_. +3\. Se abrirá una primera ventana de **Opciones generales de CSV**. Seguramente Gephi ha detectado que se trata de una tabla de aristas, que el separador es la coma y que el formato de codificación de caracterse es UTF-8. Si no, selecciona estas opciones en los desplegables y haz clic en _Siguiente_. + +{% include figure.html filename="es-or-analisis-redes-sociales-teatro-2-03.png" alt="Captura de pantalla de la ventana de importación de hojas de cálculo con las opciones generales de importación desde archivos CSV para la lista de aristas" caption="Figura 3. Ventana de importación de hojas de cálculo con las opciones generales para la lista de aristas" %} + +4\. En la siguiente ventana, **Parámetros de importación**, deja seleccionadas todas las casillas, pues queremos importar nuestras cinco columnas. Gephi reconoce el tipo de datos: `double` (números) para el peso y `string` (cadena de caracteres) para las etiquetas. Haz clic en _Terminar_. +5\. Ahora te aparecerá la última ventana del proceso: el **Informe de importación**. Verás que Gephi ha detectado que se trata de un grafo 'no dirigido' con 11 nodos y 42 aristas, y que no encuentra ningún problema en el archivo. Muy importante: cambia la selección de **Nuevo espacio de trabajo** a **Añadir al espacio de trabajo existente**. Queremos que nos importe los datos en el espacio en el que estamos trabajando, **Coaparición en escena**. Cuando lo hagas, haz clic en _Aceptar_. + +{% include figure.html filename="es-or-analisis-redes-sociales-teatro-2-04.png" alt="Captura de pantalla del informe de importación de una lista de aristas, con opciones finales como seleccionar el tipo de grafo o en qué espacio de trabajo se quiere realizar la importación" caption="Figura 4. Ventana con el informe de importación de la lista de aristas" %} + +Verás que ha aparecido una tabla con los `id` de los personajes en la pestaña **Nodos** y una tabla con las relaciones en la pestaña **Aristas**. Gephi ha extraido esta información de nuestra lista de aristas, asignando además un `id` a cada arista. + +Ahora vamos a importar las aristas del grafo de interacciones lingüísticas directas, siguiendo los mismos pasos: +1. Dentro del espacio de trabajo **Interacción lingüística** dirígete al **Laboratorio de datos** y haz clic en _Importar hoja de cálculo_. +2. Busca y selecciona el archivo `aristas-interaccion_bizarrias.csv` y haz clic en _Abrir_. +3. Se abrirá una primera ventana de **Opciones generales de CSV**. Seguramente Gephi ha detectado que se trata de una matriz, que el separador es la coma y que el formato de codificación de caracterse es UTF-8. Si no, selecciona estas opciones en los desplegables y haz clic en _Siguiente_. +4. En la siguiente ventana, **Parámetros de importación**, simplemente haz clic en _Terminar_. Ahora no hay columnas entre las que poder elegir. +5. Por último te aparecerá la ventana **Informe de importación**. Verás que Gephi ha detectado que se trata de un grafo 'dirigido' con 11 nodos y 51 aristas, y que no encuentra ningún problema en el archivo. Muy importante: cambia la selección de **Nuevo espacio de trabajo** a **Añadir al espacio de trabajo existente**. Como antes, queremos que nos importe los datos en el espacio en el que estamos trabajando, **Interacción lingüística**. Cuando lo hagas, haz clic en _Aceptar_. + +Gephi ha importado nuestra matriz y la ha transformado en una lista de aristas con un nodo de origen, otro de destino, un tipo de relación, un peso y un `id`. Además, ha creado 11 nodos utilizando como etiqueta el `id` numérico que les asignamos. + +En la nueva lista de aristas importada, que puedes ver en la pestaña **Aristas** del **Laboratorio de datos**, verás que nos faltan los atributos (‘Label’, etiqueta) que sí pudimos importar en en el grafo de coaparición en escena, pues venían ya en nuestro archivo CSV. Nos faltan las relaciones entre los personajes: amor correspondido, amistad, servidumbre, etc. Para poder visualizarlas en este grafo tendremos que introducirlas manualmente en la columna correspondiente (’Label’, etiqueta). Puedes coger esta información de la lista de aristas del grafo no dirigido, teniendo en cuenta que ahora las relaciones están duplicadas y también tendrás, por tanto, que duplicar las etiquetas. Es decir, etiqueta como `amor correspondido` la relación de Belisa (nodo 1) a Don Juan (nodo 6) y también un `amor correspondido` de Don Juan (nodo 6) a Belisa (nodo 1). Y una relación de `amistad` de Belisa (nodo 1) a Celia (nodo 3) y otra relación de `amistad` de Celia (nodo 3) a Belisa (nodo 1). Cuando termines, tu lista de aristas dirigidas debería verse así: + +{% include figure.html filename="es-or-analisis-redes-sociales-teatro-2-05.png" alt="Captura de pantalla del laboratorio de datos en la pestaña de aristas, ya con los todos los datos introducidos" caption="Figura 5. Pestaña de aristas después de introducir manualmente las etiquetas de las relaciones" %} + +Con las aristas preparadas, ahora vamos a importar los datos referentes a los nodos de los dos grafos. Los pasos ahora son exactamente los mismos para los dos grafos, así que hazlo primero en un espacio de trabajo y luego en el otro: + +1. Dentro del **Laboratorio de datos** de cada espacio de trabajo vuelve a hacer clic en _Importar hoja de cálculo_. +2. Ahora busca y selecciona el archivo [`nodos_bizarrias-csv`](/assets/analisis-redes-sociales-teatro-1/nodos_bizarrias.csv) y haz clic en _Abrir_. +3. En esta ocasión Gephi habrá detectado que se trata de una 'tabla de nodos', que nuevamente el separador es la coma y que la codificación de caracteres es UTF-8. Si no, selecciona estas opciones en los desplegables y haz clic en _Siguiente_. +4. En la ventana **Parámetros de importación**, mantén seleccionadas todas las casillas; queremos que importe las cuatro columnas. Ahora ha detectado que tanto la columna `género` como `función` son cadenas de caracteres. Haz clic en _Terminar_. +5. En la última ventana, **Informe de importación**, cerciórate que de que ha identificado 11 nodos y que no hay problemas en la importación. En el desplegable referente al tipo de grafo, selecciona **No dirigido** o **Dirigido** en función del grafo al que estés importando los nodos. Importante: cambia una vez más la opción de **Nuevo espacio de trabajo** a **Añadir al espacio de trabajo existente**. Después, haz clic en _Aceptar_. + +{% include figure.html filename="es-or-analisis-redes-sociales-teatro-2-06.png" alt="Captura de pantalla de la ventana con el informe de importación de la lista de nodos" caption="Figura 6. Ventana con el informe de importación de la lista de nodos del grafo de coaparición de personajes en escena" %} + +Gephi ha importado la lista de nodos y ha combinado la nueva información con los nodos que creó antes a partir de la lista de aristas o la matriz de adyacencia. Este es el motivo por el que era importante sustituir los nombres de los personaje por su `id` antes de exportar las hojas de cálculo a CSV. Así, Gephi ha podido identificar quién es quién y fusionar los datos de ambos archivos. + +¡Enhorabuena! Hemos terminado la importación de los datos de los dos grafos, ahora podemos pasar a trabajar en la pestaña **Vista general**. + +### La vista general +La **Vista general** es donde modificaremos la visualización de nuestros grafos (que se ve en el centro del programa) y donde aplicaremos las medidas y métricas de análisis. A la izquierda tienes las opciones de visualización (los paneles **Apariencia** y **Distribución**), y a la derecha están el panel con información sobre el grafo (**Contexto**) y los paneles **Filtros** y **Estadísticas** para consultar y analizar el grafo: + +{% include figure.html filename="es-or-analisis-redes-sociales-teatro-2-07.png" alt="Captura de pantalla de la vista general del espacio de trabajo con una primera visualización del grafo, aún sin cambiar parámetros de visualización" caption="Figura 7. Vista general de nuestro espacio de trabajo" %} + +Las opciones de visualización y análisis son muy numerosas y no las cubriremos todas en esta lección, así que para explorar e introducirnos en Gephi vamos a crear una visualización sencilla y aplicar solo algunas medidas básicas. A partir de ahora todos los pasos que des en un espacio de trabajo puedes replicarlos en el otro. Así, repetir los mismos pasos dos veces te servirá además para aprender a usar el programa. Después, te animo a continuar probando todas las demás opciones y configuraciones por tu cuenta. + +#### Modificar la apariencia y distribución del grafo + +En el centro de la **Vista general**, en el panel llamado **Grafo**, nos ha tenido que aparecer una red con nodos y aristas en negro. Seguramente, el grafo de la captura de arriba (es el de coaparición en escena) no es exactamente igual al que te ha aparecido a ti. Es normal, se ha generado con una distribución de nodos aleatoria. Comencemos a dar forma y color a nuestra red de personajes: + +1. Para desenmarañar la red empezaremos por aplicar un 'algoritmo de distribución'. En el panel de abajo a la izquierda, **Distribución** elige el algoritmo `ForceAtlas 2` y modifica estos parámetros: escalado 2500 y activar _Evitar el solapamiento_. Lo demás puedes dejarlo como está por defecto. Haz clic en _Ejecutar_ y cuando el grafo se estabilice y deje de moverse, haz clic en _Parar_. ¿Qué ha ocurrido? Los nodos han comenzado a repelerse (alejarse) entre ellos a la vez que las aristas que los conectan los han intentado atraer. Así, se ha generado un movimiento que ha terminado convergiendo en una posición balanceada para cada nodo en la que aquellos personajes más conectados entre sí han quedado más cerca y los menos conectados más alejados. El objetivo de este algoritmo de distribución no es otro que colocar los nodos de forma que nos ayude a entender e interpretar mejor el grafo [^4]. Además de `ForceAtlas 2` existen otros algoritmos, como puedes comprobar en el desplegable, pero este nos ofrece buenos resultados y es uno de los más extendidos. +2. Ahora haz clic en el icono 'T' negro que se encuentra en la cinta de opciones inferior, a la derecha de la cámara fotográfica, en la parte inferior del panel del Grafo. Has activado las etiquetas (label) de los nodos, es decir, los nombres de los personajes. Puedes modificar el tamaño, tipografía y color en el resto de opciones de la cinta. +3. Vamos a modificar ahora el color y el tamaño de los nodos y aristas. Para ello, ve al panel **Apariencia** (arriba a la izquierda) y sigue estas indicaciones: +a. En **Nodos-Color** (icono de la paleta de pintura), selecciona **Partición** y escoge el atributo `Función`. Gephi asigna un color distinto a cada valor del atributo, puedes modificar la paleta de colores o dejar los colores por defecto y hacer clic en _Aplicar_. Los nodos del grafo se han coloreado y también lo han hecho las aristas. Ve a la cinta de opciones inferior y deselecciona la opción **Las aristas tienen el color del nodo de origen**, su icono es una línea con un arcoiris. Ahora las aristas serán todas de un mismo color gris. +b. En **Nodos-Tamaño** (icono de los círculos), selecciona **Ranking** y escoge el atributo `Grado` (Gephi calcula automáticamente el grado de los nodos). Cambia el tamaño mínimo a 10 y el máximo a 40 y haz clic en _Aplicar_. Ahora los nodos tienen un tamaño relativo a su grado, es decir, a la cantidad de nodos con los que están relacionados. A mayor número de personajes con los que comparte escena un personaje -> mayor grado del nodo que representa el personaje -> mayor diámetro del nodo en la visualización. +c. En **Aristas-Color** (icono de la paleta de pintura), selecciona **Ranking** y escoge el atributo `Peso`. Te aparecerá un gradiente de color. Puedes cambiar la paleta de colores o dejarlo en verde y hacer clic en _Aplicar_. Ahora el color de las aristas está más o menos intenso en función de su peso, es decir, del número de escenas que comparten dos los personajes o de sus interacciones lingüísticas. Si las ves muy finas, puedes cambiar el tamaño de las aristas en la cinta de opciones inferior, están por defecto más o menos gruesas también según el peso. + +Seguramente te ha quedado algo muy similar esto en el caso del grafo de coaparición de personajes en escena: + +{% include figure.html filename="es-or-analisis-redes-sociales-teatro-2-08.png" alt="Captura de pantalla de la vista general del espacio de trabajo con la visualización del grafo una vez aplicados los parámetros de visualización escogidos" caption="Figura 8. Visualización del grafo de coaparición de personajes en escena, resultado de aplicar los parámetros indicados" %} + +¡Enhorabuena! Ahora puedes ver cuáles son los personajes más relacionados (`grado`) por el tamaño de los nodos, la `función` de estos personajes por el color de los nodos y la cantidad de veces que dos personajes coinciden en escena o interactúan entre ellos (`peso`) por el grosor y la intensidad de color de sus aristas. Si comparas la captura con tu vista del grafo de coaparición en escena puede que tu grafo tenga otra disposición. En realidad tus nodos y los míos están colocados en el mismo sitio y a la misma distancia, solo que están rotados en otro sentido. En el panel de **Distribución** puedes utilizar la opción **Rotar** (en el desplegable) y buscar una disposición que te guste más. No cambiará la distribución que creó el algoritmo `ForceAtlas 2`. Otras opciones que puedes explorar son **Contracción** y **Expansión**, o **Ajuste de etiquetas** si alguna está superpuesta. + +Una vez repitas los pasos también en el espacio de trabajo del grafo de interacciones lingüísticas y hayas modificado su apariencia verás que en este caso las aristas tienen flechas que nos indican la dirección de las relaciones, se trata de un grafo dirigido: + +{% include figure.html filename="es-or-analisis-redes-sociales-teatro-2-09.png" alt="Captura de pantalla de la vista general del espacio de trabajo con la visualización del grafo una vez aplicados los parámetros de visualización escogidos" caption="Figura 9. Visualización del grafo de interacciones lingüísticas entre personajes, resultado de aplicar los parámetros indicados" %} + +También puedes activar las etiquetas de las aristas, haciendo clic en la 'T' blanca en la cinta de opciones de debajo del grafo. El color de las etiquetas y su tamaño deberás modificarlo en **Apariencia**, en la pestaña **Aristas-A subrayada** (color) y en la pestaña **Aristas-tT** (tamaño): + +{% include figure.html filename="es-or-analisis-redes-sociales-teatro-2-10.png" alt="Captura de pantalla de la vista general del espacio de trabajo con la visualización del grafo según los parámetros escogidos y con las etiquetas de las aristas visibles" caption="Figura 10. Visualización del grafo de coaparición de personajes en escena con las etiqutas de las aristas activadas" %} + +#### El contexto y los filtros + +Nos quedan por explorar los paneles de configuración de la derecha. El de **Contexto** nos da información sobre grafo en pantalla. Por ejemplo, en el de interacciones lingüísticas nos dice que se trata de un 'grafo dirigido' con 11 nodos y 51 aristas. + +Vamos a probar los filtros, por ejemplo, filtrando cualquiera de los grafos según el género de los personajes: +1. En el panel **Filtros**, despliega las carpetas **Atributos** y **Partición** (dentro de la primera). +2. Selecciona el atributo `género (Nodo)` y arrástralo al panel de **Consultas**. +3. Haz clic en _Mujer (45,45 %)_ y en _Filtrar_. + +Verás algo similar a esto, un grafo solo con los personajes clasificados por ti como **Mujer**: + +{% include figure.html filename="es-or-analisis-redes-sociales-teatro-2-11.png" alt="Captura de pantalla de la vista general del espacio de trabajo con el resultado de filtrar el grafo según el atributo 'mujer'" caption="Figura 11. Grafo resultante de filtrar por el atributo 'Mujer'" %} + +Puedes hacer lo mismo con los personajes **Hombre** o utilizar otro atributo para el filtrado, como la función de los personajes. Con cada filtro que apliques verás que la información del **Contexto** cambia. Para volver atrás, elimina el filtro con el botón derecho _Suprimir_ sobre el filtro o haciendo clic en _Restaurar_. + +#### Medidas, métricas y algoritmos de análisis + +Ahora vamos a aplicar algunas medidas en el panel **Estadísticas**. Te dejaré explicaciones de cada una. Gephi ha simplificado al máximo el análisis de los grafos, pues es tan fácil como hacer clic en _Ejecutar_ en la medida o algoritmo que queramos implementar. Algunas de estas medidas abriran una ventana emergente al ejecutarlas, un pequeño informe que podemos descargar u opciones de configuración. Otras, simplemente añadirán columnas en nuestra tabla de nodos del **Laboratorio de datos**. Estos nuevos datos, generados gracias a la aplicación de medidas, nos dan más información sobre nuestro grafo, nos permiten modificar la visualización en base a ellos (son como nuevos atributos) y exportándolos podremos procesarlos en otra herramienta o programa. En esta lección no nos adentraremos ahí, pero quiero que sepas que a partir de aquí las posibilidades se multiplican. + +En el apartado **Visión general de la red** lo primero que encontramos es el ['grado medio'](https://perma.cc/M8B7-34LD), es decir, la media de los grados de todos los nodos del grafo. Recordemos que el grado es el número de nodos con los que un nodo está conectado. En el caso de los grafos dirigidos, obtendremos además el 'grado medio de entrada' y el 'grado medio de salida'. Después, el 'grado medio con pesos', que tiene en cuenta el peso de las aristas conectadas a un nodo y no simplemente la cantidad nodos con los que se conecta. De nuevo, habrá un 'grado medio con pesos de entrada' y un 'grado medio con pesos de salida'. Al ejecutar estas dos estadísticas, se añadirán dos columnas nuevas en la tabla de nodos del **Laboratorio de datos** con los valores de grado y grado con peso de cada nodo: + +{% include figure.html filename="es-or-analisis-redes-sociales-teatro-2-12.png" alt="Captura de pantalla del laboratorio de datos del grafo de interacciones lingüísticas con columnas resultantantes de aplicar las medidas de grado" caption="Figura 12. Laboratorio de datos del grafo de interacciones lingüísticas con las nuevas columnas de grado" %} + +El 'diámetro de la red' es una de las medidas de tamaño o distancia. Para entenderlo, primero has de saber que en análisis de redes se entiende por 'camino' una secuencia de nodos conectados por aristas. Esta noción de camino nos permite calcular las métricas de distancia y tamaño de la red. Por otro lado, se entiende por ['distancia'](https://perma.cc/YYA3-ZLG9) o 'longitud' de un camino el número de aristas (no de nodos) que deben cruzarse para ir de un nodo a otro (siempre por el camino más corto). El ['diámetro'](https://perma.cc/2EU8-J4ZR) es, entonces, la distancia entre los nodos más alejados de una red: + +{% include figure.html filename="es-or-analisis-redes-sociales-teatro-2-13.png" alt="Grafo explicativo del concepto 'diámetro', con las aristas que sirven para medir el diámetro coloreadas" caption="Figura 13. Ejemplo del diámetro de una red" %} + +Haz clic en _Ejecutar_ el diámetro: +1. En la ventana que se ha abierto encontrarás definiciones de las métricas de distancia: distancia media, diámetro y las medidas de centralidad de intermediación, cercanía y excentricidad. Al ejecutar esta función, no solo se calcula el diámetro sino todas esas métricas relacionadas con la distancia. +2. Gephi te permite normalizar las centralidades (ahora veremos lo que son) en un rango [0,1], lo que facilita después la comparación de grafos de obras distintas. Marca esta opción y haz clic en _Aceptar_. + +{% include figure.html filename="es-or-analisis-redes-sociales-teatro-2-14.png" alt="Captura de pantalla de la ventana de parámetros que se abre para ejecutar las medidas de distancia de un grafo" caption="Figura 14. Ventana de parámetros de distancia del grafo de coaparición de personajes en escena" %} + +Si comparas el diámetro de los dos grafos verás que hay diferencias: en uno es 2 y en el otro 4. Es normal la diferencia, nos habla de que hay personajes que comparten escena pero que no interactúan entre ellos. + +Si te diriges al **Laboratorio de datos**, verás que se han añadido varias columnas más en la tabla de nodos, ahora con los resultados de las 'medidas de centralidad'. La 'centralidad' en ARS tiene que ver con el lugar que ocupan los nodos en el conjunto de una red y nos ayuda a entender la 'importancia' de los nodos dentro del sistema que analizamos[^5]. Estas son algunas de las medidas de centralidad, pero hay unas cuantas más: +- El 'grado' o el 'grado con pesos' pueden ser medidas de centralidad, pues valores más altos indican mayor conectividad. En ese caso, nos referimos a ellas como ['centralidad de grado'](https://perma.cc/2SW2-LZT4) (degree centrality) y 'centralidad de grado con pesos' (weighted degree centrality). +- La ['centralidad de cercanía'](https://perma.cc/7E9Y-CH68) (closeness centrality) de un nodo se obtiene midiendo la distancia media que guarda dicho nodo con todos los demás del grafo. Dicho de otra forma, nos ayuda a encontrar el nodo más cercano a todos los demás, que no tiene por qué ser el de mayor grado (el más conectado). +- La ['centralidad de intermediación'](https://perma.cc/5YSB-9KVX) (betweenness centrality) de un nodo se halla calculando la cantidad de veces que dicho nodo se encuentra en el camino más corto entre todos los otros nodos. La importancia de los nodos depende, en este caso, de su labor de intermediación, de puente conector entre nodos separados. Si faltan estos nodos, la estructura de un grafo suele verse muy afectada. + +Por ejemplo, en la comedia con la que estamos trabajando, *Las bizarrías de Belisa*, ningún personaje tiene una centralidad de intermediación normalizada demasiado alta. No hay ningún nodo que eliminándolo provoque un 'grafo disconexo' en el que ciertos nodos queden desconectados del núcleo principal. + +Siguiendo en el panel de **Estadísticas** nos encontramos la **Densidad**. La ['densidad'](https://perma.cc/E5C7-XVX8) mide el nivel de conectividad entre todos los nodos de un grafo. Por ejemplo, un grafo tendría una densidad del 100% cuando todos los nodos están conectados entre sí. Matemáticamente la densidad se calcula a través de la proporción de aristas que tiene una red frente al total de aristas posibles, expresado el resultado en un rango [0,1]: cerca de 1 se dice que es un grafo 'denso'; cuanto más cerca de 0 se habla de un grafo 'disperso'. Haz clic en _Ejecutar_: +1. Se abrirá una ventana que nos permite elegir seleccionar si nuestro grafo es dirigido o no dirigido. +2. Selecciona tu opción haz clic en _Aceptar_. + +Nuevamente, hay diferencia entre la densidad del grafo de coaparición en escena y la del grafo de interacciones lingüísticas por el mismo motivo: hay personajes que comparten escena pero que no intercambian palabra. + +Vamos a saltar ahora al apartado **Community Detection**. En ARS se entiende por ['comunidad'](https://perma.cc/CJ23-HB7M) un grupo de nodos que están densamente interconectados entre sí y que a su vez están poco conectados con los nodos de otra comunidad: + +{% include figure.html filename="es-or-analisis-redes-sociales-teatro-2-15.png" alt="Grafo explicativo del concepto 'comunidad' con los nodos coloreados según la comunidad a la que pertenecen" caption="Figura 15. Ejemplo de grafo con comunidades coloreadas en dos colores distintos" %} + +Las distintas comunidades de un grafo se hayan implementando un ’algoritmo de [modularidad](https://perma.cc/PY99-MBVB)’ que Gephi incorpora, que podemos utilizar simplemente haciendo clic en _Ejecutar_. +1. Se abrirá una ventana de **Parámetro de Modularid**. No es necesario que modifiques nada: utiliza la opción de aleatoriedad y de incorporar los pesos de las aristas, y deja la resolución en 1 (modularidad estándar). +2. El algoritmo va a numerar las comunidades a partir del 0, pero si quieres que comience a contar en 1, simplemente cambia la opción **Classes start at: 1** y dale a _Aceptar_. + +Si implementas el algoritmo de modularidad en el grafo de interacciones lingüísticas directas comprobarás que se detectan tres comunidades de nodos. Puedes ver qué comunidad ha sido asignada a cada nodo en la nueva columna del **Laboratorio de datos**. Para visualizar las comunidades en el grafo, ve al panel **Apariencia** de la **Vista general** y cambia el color de los nodos eligiendo la partición **Modularity Class**, haciendo clic en _Aplicar_ con los colores por defecto o modificándolos. Debería quedarte un grafo similar a este: + +{% include figure.html filename="es-or-analisis-redes-sociales-teatro-2-16.png" alt="Captura de pantalla de la vista general del espacio de tabajo con la visualización del grafo de interacciones lingüísticas con los nodos coloreados según la comunidad a la que pertenecen: morado, verde o naranja" caption="Figura 16. Grafo de interacciones lingüísticas con los nodos coloreados según la comunidad a la que pertenecen, detectadas gracias al algoritmo de modiularidad" %} + +Cuando has desplegado el menú de **Partición** en el color de los nodos habrás visto que han aparecido muchas más opciones de las que teníamos al principio, y es que puedes utilizar los resultados de las medidas que has ido implementando para colorear y dar tamaño a los nodos y aristas. Por ejemplo, utilizando la opción **Ranking** puedes poner el diámetro de los nodos en función de su centralidad de intermediación y el color graduado en intensidad según su grado. Esto te permitiría a golpe de vista comparar la diferencia entre ambas medidas para cada nodo. ¿Ves cómo las opciones se multiplican? + +### La previsualización: últimos ajustes y exportación de visualizaciones + +Para finalizar con el trabajo en Gephi, vamos a exportar alguna visualización en la pestaña de **Previsualización**. Al entrar, verás un panel grande en gris vacío: es donde aparecerá el grafo una vez introduzcas los parámetros en el panel de configuración de la izquierda. Haz una prueba: entra a la previsualización del espacio de trabajo **Coaparición en escena**, haz clic en _Refrescar_ y mira cómo se ve tu grafo con los parámetros que vienen por defecto. Estarás viendo el mismo grafo de la **Vista general** pero con algunos ajustes de visualización. Ahora modifica estos parámetros y deja el resto como están por defecto: +- Nodos: + - Ancho de borde: 0.0 +- Etiquetas de nodos: + - Mostrar etiqueta: activado + - Fuente: Arial 24 Sin Formato + - Tamaño proporcional: desactivado +- Aristas: + - Grosor: 20 + - Reescalar pesos: activado + - Color: original (es decir, el gradiente que pusimos en la vista general) +- Etiquetas de aristas + - Mostrar etiquetas: activado + - Fuente: Arial 14 Sin Formato + - Color: específico: #000000 + +Haz clic en _Refrescar_ de nuevo y debería aparecerte un grafo similar a este, quizá con otra rotación: + +{% include figure.html filename="es-or-analisis-redes-sociales-teatro-2-17.png" alt="Captura de pantalla de la pestaña de previsualización, con la columna de opciones finales de visualización a la izquierda y el grafo resultante a la derecha" caption="Figura 17. Visualización final del grafo de coaparición de personajes en escena" %} + +Ahora puedes exportar la visualización hacienco clic en _Exportar SVG/PDF/PNG_ en la parte inferior del panel de la izquierda. Como bien deduces, esos son los tres formatos que permite exportar Gephi. [PNG](https://perma.cc/3CAF-NZTD) es un buen formato de imagen, y podrás insertarlo en un documento de texto, utilizarlo para crear un póster o una presentación de diapositivas. Si seleccionas en el desplegable `Files of type` la opción `Archivos PNG (*.png)` y accedes al menú de **Opciones**, Gephi te permitirá configurar la resolución de la imagen, el margen alrededor del grafo y si quieres fondo transparente o no. + +{% include figure.html filename="es-or-analisis-redes-sociales-teatro-2-18.png" alt="Captura de pantalla de las ventanas del menú de exportación de visualizaciones" caption="Figura 18. Menú de exportación de visualizaciones" %} + +Otra buena opción es exportar en [SVG](https://perma.cc/EBJ4-C2KZ), el formato de gráficos vectoriales escalables que se suele utilizar en diseño gráfico, ya que son manipulables por ejemplo con [CSS](https://perma.cc/6M8D-Q4MS) y [JavaScript](https://perma.cc/2M3K-JRT8). Si quieres utilizar tus visualizaciones en un sitio web, puede que este formato sea el que más te convenga. Además, este formato lo puedes abrir y editar con programas de código abierto como [Inkscape](https://inkscape.org/es/) o [LibreOffice Draw](https://es.libreoffice.org/descubre/draw/) o privativos como [Adoble Illustrator](https://www.adobe.com/es/products/illustrator.html). + +Si repites lo mismo con el grafo de interacción lingüística directa ahora podrás seleccionar si quieres aristas curvas (que marcan la dirección en el sentido de las agujas de un reloj) o rectas con flechas. Por ejemplo, reutiliza los parámetros anteriores y modifica estos: +- Aristas: + - Curvas: desactivado +- Flechas de aristas: + - Tamaño: 3.0 +- Etiquetas de aristas: + - Mostrar etiquetas: desactivado + +Haz clic en _Refrescar_ y verás algo así (con los nodos coloreados según su comunidad porque antes aplicamos este cambio en la vista general): + +{% include figure.html filename="es-or-analisis-redes-sociales-teatro-2-19.png" alt="Captura de pantalla de la pestaña de previsualización, con la columna de opciones finales de visualización a la izquierda y el grafo resultante a la derecha" caption="Figura 19. Visualización final del grafo de interacciones lingüísticas entre personjes" %} + +## Paso 4. Interpretación de los resultados + +Hemos generado visualizaciones y aplicado medidas a los grafos construidos gracias a los datos que primero extrajimos de *Las bizarrías de Belisa*. Las visualizaciones ya nos pueden ayudar en el análisis de una obra, por ejemplo, ilustrando un análisis de los personajes más 'tradicional'. Pero si has llegado hasta aquí seguramente lo que te interesa es tener en consideración los datos obtenidos de la aplicación de medidas, métricas y algoritmos. + +Primero creo que es necesario incidir en que los datos obtenidos de un análisis de redes sociales como el que hemos llevado a cabo deben analizarse cuidadosamente y no utilizarse para confirmar hipótesis sin una valoración crítica. En realidad, todo el proceso que has llevado a cabo, desde la elección del corpus hasta la creación de visualizaciones, debe considerarse parte del proceso crítico de investigación. Piensa, por ejemplo, en la tediosa extracción de datos y todas las decisiones interpretativas que has tomado. ¡Cualquier otra decisión variaría los resultados! Por eso debes insistir en ser consistente con el procedimiento y criterios de análisis que elijas, y comunicarlos con detalle para contextualizar tus resultados. + +Vamos entonces a explorar los datos y grafos obtenidos de nuestro análisis de redes sociales de *Las bizarrías de Belisa*. Mi primera recomendación es que, después de aplicar las medidas y algoritmos que te interesen, vayas al **Laboratorio de datos** y hagas clic en _Exportar tabla_ para exportar la tabla de nodos pero ahora con las nuevas columnas agregadas con más datos sobre los personajes. Gracias a este CSV podrás procesar los resultados cómodamente con lenguajes de programación como [R](https://perma.cc/7ESJ-S5K4) (enfocado al análisis estadístico) o [Python](https://perma.cc/BT4G-U7FE), o incluso con el mismo programa de hojas de cálculo que utilizaste para recoger tus datos. + +Hagamos esto último. Abre un nuevo archivo de hojas de cálculo e importa la tabla de nodos CSV del grafo de interacción lingüística que acabas de exportar de Gephi. Puedes llamar a este nuevo archivo `analisis-datos_Bizarrias`. ¿Qué podemos hacer ahora? Primero analicemos el grado de los personajes que, recordemos, cuantifica lo conectado que está un nodo con el resto de nodos de la red social. Los nodos además de 'grado' (a secas) también tienen 'grado con peso'. El primero tiene que ver con el número de personajes con los que habla un nodo (en un sentido y otro) y el segundo tiene en cuenta además la cantidad de interacciones. Fijémonos en las diferencias entre una y otra medida, observando estos gráficos generados en la hoja de cálculo mediante las opciones que ofrece Google Sheets: + +{% include figure.html filename="es-or-analisis-redes-sociales-teatro-2-20.png" alt="Gráficos de barras verticales con los gafos y grados con pesos de los personajes de la comedia analizada, ordenados de mayor a menor grado" caption="Figura 20. Grados y grados con pesos de los personajes de 'Las bizarrías de Belisa' según sus interacciones lingüísticas directas" %} + +Don Juan ha resultado ser el personaje que más interactúa, logrando el grado más alto de toda la red social (15) y superando a Belisa por un punto, la indiscutible protagonista femenina que incluso da nombre a la comedia. ¿Por qué? Si vamos a nuestro grafo podremos ver cómo Don Juan interactúa con Octavio y Julio, mientras que Belisa, aunque se enfrenta a ellos vestida de hombre y con espada, no cruza palabra durante dicho enfrentamiento. Sin embargo, si vemos los datos del grafo de coaparición en escena, son Belisa y su criada Finea quienes logran el grado más alto, convirtiéndose en los dos únicos personajes de la comedia que comparten escena al menos una vez con todos los demás personajes (por eso su grado es 10). Pero recordemos, compartir escena no significa necesariamente compartir diálogo, como nos demuestra el grafo dirigido. ¿Y en cuanto al grado con peso? Si volvemos al gráfico de barras, ahora sí Belisa logra la primera posición, y supera con creces a Don Juan. Su grado con peso es 318, es decir, se dirige 157 veces a otros personajes y es receptora de 161 intervenciones. Como vemos, en función de qué nos interese estudiar de un texto teatral, puede interesarnos más un criterio de análisis u otro. + +Veamos por último un dato global de los grafos: su densidad. El grafo de coaparición en escena (no dirigido) tiene una densidad de 0,764, mientras que el de interacción lingüística alcanza tan solo 0,464. ¿Qué nos aporta esta información? *Las bizarrías de Belisa* se trata de una comedia bastante densa en cuanto a la coaparición de personajes en escena (cuanto más cerca de 1, mayor densidad). Son pocos personajes, tan solo diez, y la configuración de la acción genera que compartan muchas escenas. Lope escribió una comedia urbana del gusto de la época, alejado ya de sus primeras incursiones al género en las que el reparto superaba los 20 personajes y las acciones estaban más dispersas. Sin embargo, la densidad del grafo dirigido no llega al medio punto, lo que nos demuestra que aunque los personajes coinciden en escena, no significa que necesariamente dialoguen. La diferencia entre la densidad de los dos tipos de grafo en esta comedia podemos explicarla principalmente por la situación particular de Octavio, galán rival de don Juan (por ser pretendiente también de Lucinda, la segunda dama). Aunque sabemos que Octavio visita a Lucinda (le vemos salir de su casa), esta pareja nunca interactúa en el escenario. Es una situación quizá algo atípica pero que entendemos por el desdoblamiento de galanes rivales: don Juan y el Conde pretenden a Belisa, y don Juan y Octavio pretenden a Lucinda. Dado que la acción amorosa principal es la de Belisa, Lope no dedica demasiados versos al desarrollo de la relación entre Octavio y Lucinda. + +No podemos explorar todos los resultados del análisis practicado sobre *Las bizarrías de Belisa*, así que sirva lo dicho para comprender el tipo de conclusiones a las que nos llevan los datos y grafos generados. Por último, apuntar las posibilidades del análisis comparado de redes sociales, es decir, a partir de un corpus de dos o más obras. Por ejemplo, este es un gráfico en el que se compara el grado con pesos normalizado (sobre 1) de los primeros galanes y primeras damas de ocho comedias urbanas de Lope de Vega (en orden cronológico), entre las que se incluye la que hemos utilizado en esta lección: + +{% include figure.html filename="es-or-analisis-redes-sociales-teatro-2-21.png" alt="Diagrama de dispersión de puntos con líneas de tendencia comparando el grado con pesos normalizado de los primeros galanes y primeras damas de ocho comedias urbanas de Lope de Vega" caption="Figura 21. Gráfico comparativo del grado con pesos normalizado de los primeros galanes y primeras damas de ocho comedias urbanas de Lope de Vega (elaboración propia, Merino Recalde (2022)" %} + +## Recapitulación final + +Terminemos esta lección anotando las cuestiones elementales que deberás tener en cuenta cuando realices un análisis de redes sociales de textos teatrales: +1. Divide el proceso en cuatro partes diferenciadas: + a. Creación del corpus + b. Extracción y estructuración de datos + c. Visualizaciones y análisis + d. Interpretación de los resultados (datos y grafos) +2. Documenta el proceso y la toma de decisiones. Sé consistente en ello. Procura basarte siempre en criterios preestablecidos, ya sean provenientes de otras investigaciones que trabajen con el mismo tipo de obras o diseñados por ti en función de tus objetivos y del corpus de análisis. +3. Procura guardar tus datos finales en [formatos abiertos](https://perma.cc/M2XM-DYUZ) que garanticen el acceso a los datos a largo plazo, como el CSV (`.csv`). Si únicamente guardas tus datos en formato excel (`.xlxs`) o en la extensión del propio Gephi (`.gephi`) puede que tu archivo termine corrompiéndose o fallando. Un CSV tiene una vida más larga, es más fácil de preservar y rápidamente puedes importarlo, transformarlo y volver sobre tus datos para reconstruir tus grafos y análisis. +4. Cuando generes visualizaciones anota los parámetros que utilizaste (tamaño de los nodos, colores, algoritmo de distribución, etc.). Es importante que acompañes tus resultados de esta información, pues ayuda a entender y contextualizar las representaciones. + +Y sobre todo, no tengas miedo de probar y explorar todas las posibilidades que nos ofrece el análisis de redes para estudiar la literatura teatral. + +## Notas + +[^1]: Existen otros programas y herramientas de análisis de redes que podemos mencionar. Por ejemplo, [Cytoscape](https://cytoscape.org/) es otro programa de código abierto y libre descarga, muy utilizado en bioinformática. También hay aplicaciones web: [Palladio](https://hdlab.stanford.edu/palladio/), desarrollada por el Humanities+Design Research Lab de la Standford University y pensada para la investigación histórica; o [ONODO](https://onodo.org/), una aplicación muy sencilla que permite crear redes e implementar medidas fácilmente. +[^2]: Esta lección se ha preparado con la versión 0.9.7 de Gephi. En 2022, y tras cinco años sin actualizaciones, se han publicado 5 versiones nuevas corrigiendo errores (bug fixes) y añadiendo mejoras. Por ejemplo, desde la versión 0.9.3 ya no es necesario instalar Java para que Gephi funcione en Windows y Linux, lo que causaba numerosos problemas en Windows. Durante las revisiones de está lección se han publicado las versiones 0.10 y 0.10.1, pero sus actualizaciones no impiden el correcto seguimiento de esta lección. Puedes leer más acerca de las actualizaciones de Gephi en [https://gephi.wordpress.com/2022/05/11/transition-to-semantic-versioning/](https://perma.cc/XPF2-ZKJY) y en [https://github.com/gephi/gephi/releases](https://perma.cc/NQL4-77P2). +[^3]: Por ejemplo, este estupendo videotutorial en 5 partes de Salvador Sánchez, disponible en YouTube: [https://www.youtube.com/playlist?list=PLIvIcfwy1T6IDiW3K10TplK3rvdwMLOb2](https://www.youtube.com/playlist?list=PLIvIcfwy1T6IDiW3K10TplK3rvdwMLOb2). O la *introducción rápida a Gephi* de José Manuel Galán, también en Youtube: [https://www.youtube.com/watch?v=sX5XYec4tWo](https://www.youtube.com/watch?v=sX5XYec4tWo). +[^4]: Si te interesa conocer más sobre cómo funciona `ForceAtlas 2` y sabes inglés, te recomiendo este artículo de sus desarrolladores: Jacomy, Mathieu, Tommaso Venturini, Sebastien Heymann, y Mathieu Bastian. «ForceAtlas2, a Continuous Graph Layout Algorithm for Handy Network Visualization Designed for the Gephi Software». PLoS ONE 9, n.º 6 (2014): e98679. [https://doi.org/10.1371/journal.pone.0098679](https://doi.org/10.1371/journal.pone.0098679). +[^5]: 'Importancia' es un concepto algo complejo. Debemos diferenciar la importancia de los nodos según su centralidad (una importancia cuantitativa derivada del ARS) y la importancia que le otorgamos a los personajes (una importancia cualitativa, por ejemplo: protagonista, secundario, terciario, etc.). La correlación entre estos dos tipos de importancia no siempre se da, como demuestran Santa María Fernández et al. en un estudio de 2020. Te recomiendo este artículo para explorar en profundidad las implicaciones de las medidas de centralidad: Santa María Fernández, Teresa, José Calvo Tello, y Concepción María Jiménez Fernández. «¿Existe correlación entre importancia y centralidad? Evaluación de personajes con redes sociales en obras teatrales de la Edad de Plata». Digital Scholarship in the Humanities 36, n.º June (2020): i81-i88. [https://doi.org/10.1093/llc/fqaa015](https://doi.org/10.1093/llc/fqaa015). diff --git a/es/lecciones/analisis-temporal-red.md b/es/lecciones/analisis-temporal-red.md index 67c5769915..e6f88b29d6 100644 --- a/es/lecciones/analisis-temporal-red.md +++ b/es/lecciones/analisis-temporal-red.md @@ -388,7 +388,7 @@ Vamos a dar un paso atrás y reflexionar sobre lo que hemos aprendido. En este m Si hay algo que espero que hayas aprendido con este tutorial es la idea de que agregar datos temporales a los nodos y a los vínculos transforma una herramienta general de las ciencias sociales en un método útil para la argumentación histórica. La comparación de estructuras de red y las métricas para comparar intervalos de tiempo les da significación histórica que puede ser difícil o imposible de discernir en los análisis de redes sociales estáticos tradicionales. -Este tutorial ha presentado solo algunas de las muchas herramientas y técnicas que se pueden usar para el análisis de redes temporal. Un área especialmente interesante de este campo es la simulación dinámica que modela la transmisión de algo como, por ejemplo, una enfermedad o una idea entre individuos dentro de una red temporal. Si eso te suena interesante, echa un vistazo al paquete [EpiModel](http://www.epimodel.org) (en inglés) u otras herramientas creadas por los epidemiólogos para modelar la difusión dentro de redes dinámicas. +Este tutorial ha presentado solo algunas de las muchas herramientas y técnicas que se pueden usar para el análisis de redes temporal. Un área especialmente interesante de este campo es la simulación dinámica que modela la transmisión de algo como, por ejemplo, una enfermedad o una idea entre individuos dentro de una red temporal. Si eso te suena interesante, echa un vistazo al paquete [EpiModel](https://www.epimodel.org) (en inglés) u otras herramientas creadas por los epidemiólogos para modelar la difusión dentro de redes dinámicas. Dependiendo de los datos históricos con los que estés trabajando, el análisis de redes temporal te puede ofrecer ideas importantes sobre cómo las propiedades de los nodos, sus vínculos y la red en su conjunto cambian a lo largo del tiempo. Tanto si decides o no dar el salto al análisis de redes temporal, es útil recordar que las redes de todo tipo son fenómenos históricos que emergen, se desarrollan, se transforman más allá de su reconocimiento y desaparecen con el transcurso del tiempo. @@ -399,7 +399,7 @@ Si has hecho este tutorial pero todavía te sientes más cómodo/a usando una in * [Convertir una red con fechas en una red dinámica](https://seinecle.github.io/gephi-tutorials/generated-html/converting-a-network-with-dates-into-dynamic.html) (en inglés) de Clément Levallois. * Ken Cherven hace un buen recorrido por el Análisis de Redes Dinámico con Gephi en su libro *Mastering Gephi Network Visualization* (2015) -Si tienes más ganas de realizar análisis de redes temporal con R, [este tutorial](https://web.archive.org/web/20180423112846/http://statnet.csde.washington.edu/workshops/SUNBELT/current/ndtv/ndtv_workshop.html) (en inglés) de Skye Bender-deMoll explica funciones adicionales y propiedades de los paquetes que hemos usado. Me sirvió como guía para aprender sobre el análisis de redes temporal, inspirándome a escribir este tutorial. +Si tienes más ganas de realizar análisis de redes temporal con R, [este tutorial](https://web.archive.org/web/20180423112846/https://statnet.csde.washington.edu/workshops/SUNBELT/current/ndtv/ndtv_workshop.html) (en inglés) de Skye Bender-deMoll explica funciones adicionales y propiedades de los paquetes que hemos usado. Me sirvió como guía para aprender sobre el análisis de redes temporal, inspirándome a escribir este tutorial. También puedes adentrarte en la documentación de los paquetes [networkDynamic](https://cran.r-project.org/web/packages/networkDynamic/index.html), [TSNA](https://cran.r-project.org/web/packages/tsna/index.html) y [NDTV](https://cran.r-project.org/web/packages/networkDynamic/index.html). diff --git a/es/lecciones/analisis-voyant-tools.md b/es/lecciones/analisis-voyant-tools.md index f680a94fce..e180f680c9 100644 --- a/es/lecciones/analisis-voyant-tools.md +++ b/es/lecciones/analisis-voyant-tools.md @@ -30,9 +30,9 @@ En este tutorial se aprenderá cómo organizar un conjunto de textos para la inv ## Análisis de corpus -El análisis de corpus es un tipo de [análisis de contenido](http://vocabularios.caicyt.gov.ar/portalthes/42/term/26) que permite hacer comparaciones a gran escala sobre un conjunto de textos o corpus. +El análisis de corpus es un tipo de [análisis de contenido](https://vocabularios.caicyt.gov.ar/portalthes/42/term/26) que permite hacer comparaciones a gran escala sobre un conjunto de textos o corpus. -Desde el inicio de la informática, tanto lingüistas computacionales como especialistas de la [recuperación de la información](http://vocabularios.caicyt.gov.ar/portalthes/42/term/178) han creado y utilizado software para apreciar patrones que no son evidentes en una lectura tradicional o bien para corroborar hipótesis que intuían al leer ciertos textos pero que requerían de trabajos laboriosos, costosos y mecánicos. Por ejemplo, para obtener los patrones de uso y decaimiento de ciertos términos en una época dada era necesario contratar a personas que revisaran manualmente un texto y anotaran cuántas veces aparecía el término buscado. Muy pronto, al observar las capacidades de "contar" que tenían las computadoras, estos especialistas no tardaron en escribir programas que facilitaran la tarea de crear listas de frecuencias o tablas de concordancia (es decir, tablas con los contextos izquierdos y derechos de un término). El programa que aprenderás a usar en este tutorial, se inscribe en este contexto. +Desde el inicio de la informática, tanto lingüistas computacionales como especialistas de la [recuperación de la información](https://vocabularios.caicyt.gov.ar/portalthes/42/term/178) han creado y utilizado software para apreciar patrones que no son evidentes en una lectura tradicional o bien para corroborar hipótesis que intuían al leer ciertos textos pero que requerían de trabajos laboriosos, costosos y mecánicos. Por ejemplo, para obtener los patrones de uso y decaimiento de ciertos términos en una época dada era necesario contratar a personas que revisaran manualmente un texto y anotaran cuántas veces aparecía el término buscado. Muy pronto, al observar las capacidades de "contar" que tenían las computadoras, estos especialistas no tardaron en escribir programas que facilitaran la tarea de crear listas de frecuencias o tablas de concordancia (es decir, tablas con los contextos izquierdos y derechos de un término). El programa que aprenderás a usar en este tutorial, se inscribe en este contexto. ## Qué aprenderás en este tutorial @@ -63,7 +63,7 @@ Lo primero que debes hacer es buscar la información que te interesa. Para este ### 2. Copiar en editor de texto plano Una vez localizada la información, el segundo paso es copiar el texto que te interesa desde la primera palabra dicha hasta la última y guardarla en un editor de texto sin formato. Por ejemplo: -* en Windows podría guardarse en [Bloc de Notas](https://web.archive.org/web/20091013225307/http://windows.microsoft.com/en-us/windows-vista/Notepad-frequently-asked-questions) +* en Windows podría guardarse en [Bloc de Notas](https://web.archive.org/web/20091013225307/https://windows.microsoft.com/en-us/windows-vista/Notepad-frequently-asked-questions) * en Mac, en [TextEdit](https://support.apple.com/es-mx/guide/textedit/welcome/mac); * y en Linux, en [Gedit](https://wiki.gnome.org/Apps/Gedit). @@ -407,9 +407,9 @@ Hockey, Susan. 2004 “The History of Humanities Computing”. _A Companion to D Peña, Gilberto Anguiano, y Catalina Naumis Peña. 2015. «Extracción de candidatos a términos de un corpus de la lengua general». _Investigación Bibliotecológica: Archivonomía, Bibliotecología e Información_ 29 (67): 19-45. [https://doi.org/10.1016/j.ibbai.2016.02.035](https://doi.org/10.1016/j.ibbai.2016.02.035). -Sinclair, Stéfan and Geoffrey Rockwell, 2016. _Voyant Tools_. Web. [http://voyant-tools.org/](http://voyant-tools.org/). +Sinclair, Stéfan and Geoffrey Rockwell, 2016. _Voyant Tools_. Web. [https://voyant-tools.org/](https://voyant-tools.org/). -Terras, Melissa, 2013. "For Ada Lovelace Day – Father Busa’s Female Punch Card Operatives". _Melissa Terras' Blog_. Web. [http://melissaterras.blogspot.com/2013/10/for-ada-lovelace-day-father-busas.html](http://melissaterras.blogspot.com/2013/10/for-ada-lovelace-day-father-busas.html). +Terras, Melissa, 2013. "For Ada Lovelace Day – Father Busa’s Female Punch Card Operatives". _Melissa Terras' Blog_. Web. [https://melissaterras.blogspot.com/2013/10/for-ada-lovelace-day-father-busas.html](https://melissaterras.blogspot.com/2013/10/for-ada-lovelace-day-father-busas.html).
    Este tutorial fue escrito gracias al apoyo de la Academia Británica y preparado durante el Taller de escritura de The Programming Historian en la Universidad de los Andes en Bogotá, Colombia, el del 31 de julio al 3 de agosto de 2018. diff --git a/es/lecciones/construir-repositorio-de-fuentes.md b/es/lecciones/construir-repositorio-de-fuentes.md index f15eb79079..de69eb87d4 100644 --- a/es/lecciones/construir-repositorio-de-fuentes.md +++ b/es/lecciones/construir-repositorio-de-fuentes.md @@ -54,7 +54,7 @@ El sentido básico de este esquema puede resumirse con la siguiente imagen: {% include figure.html filename="img_1.1.jpg" caption="Síntesis de la tecnología de servidor" %} -Para realizar la instalación local y configurar Omeka en tu ordenador, te recomendamos seguir las instrucciones del paso 2A de la lección [*Installing Omeka*](/en/lessons/installing-omeka#step-2a-for-vps-hosting). También es recomendable consultar el manual elaborado por Manuel Alcaraz (bibliotecónomo con grado en ingeniería informática), en el cual cubre los aspectos necesarios para gestionar una instalación en Omeka desde la interface de administración del sitio. +Para realizar la instalación local y configurar Omeka en tu ordenador, te recomendamos seguir las instrucciones del paso 2A de la lección [*Installing Omeka*](/en/lessons/installing-omeka#step-2a-for-vps-hosting). También es recomendable consultar el manual elaborado por Manuel Alcaraz (bibliotecónomo con grado en ingeniería informática), en el cual cubre los aspectos necesarios para gestionar una instalación en Omeka desde la interface de administración del sitio. En este tutorial te ayudaremos a instalar una máquina virtual, es decir, una serie de programas que permiten ejecutar aplicaciones que requieren tecnología de servidor (como Omeka) sin necesidad de tener contratado un servidor Web. También te mostraremos una forma común de gestionar la base de datos a través del aplicativo phpMyAdmin, una herramienta ampliamente difundida para administrar MySQL. @@ -79,8 +79,8 @@ Tras la instalación, la primera acción que debemos realizar será activar los {% include figure.html filename="img_1.1-modact.jpg" caption="Módulos XAMPP activados" %} -Para probar que todo funciona correctamente, ingresa desde tu navegador a la dirección o . Si la instalación es correcta te mostrará la pantalla de inicio: - +Para probar que todo funciona correctamente, ingresa desde tu navegador a la dirección `` o ``. Si la instalación es correcta te mostrará la pantalla de inicio: +s {% include figure.html filename="img_1.1-xampp-dashboard.jpg" caption="Pantalla de inicio (dashboard) de XAMPP" %} Deberás tener en el menú de inicio de Windows un menú de XAMPP con tres opciones desplegables. Las más útiles para nuestro trabajo serán "XAMPP Control Panel", que abre el panel de control para activar o desactivar los módulos, y "XAMPP htdocs folder", un enlace al directorio donde se guardarán los archivos de Omeka para realizar la instalación, por lo general es `C:\xampp\htdocs` para Windows. En Linux este directorio se encuentra en la ruta `/opt/lampp/htdocs`. @@ -95,7 +95,7 @@ Para la instalación de Omeka es necesario crear una base de datos que albergar Para crear la base de datos es posible utilizar los métodos explicados en el paso 2 de [*Installing Omeka*](/en/lessons/installing-omeka#step-2-install-your-server-and-database). También podemos utilizar *phpMyAdmin* para crear la base de datos de la instalación e incluso para editarla después. -El primer paso consiste en ingresar al entorno de *phpMyAdmin* a través de la dirección XAMPP te dejará ingresar sin contraseña, pero otros servicios (como Bitnami) te exigirán permisos de usuario para ingresar.[^bitnami_ingreso] La página de inicio te mostrará una página con la configuración general del servidor de la base de datos, el servidor web y de la aplicación. Esta pantalla será importante al momento de requerir la versión de MySQL ("Servidor de base de datos >> Versión del servidor"), la versión de PHP ("Servidor web >> Versión de PHP"), o incluso el nombre de usuario del servidor (por lo general "root@localhost"). Esta pantalla es útil no sólo en instalaciones locales, servirá también para comprobar que algún servicio de alojamiento web corresponda con la tecnología necesaria para ejecutar ciertas aplicaciones. +El primer paso consiste en ingresar al entorno de *phpMyAdmin* a través de la dirección XAMPP te dejará ingresar sin contraseña, pero otros servicios (como Bitnami) te exigirán permisos de usuario para ingresar.[^bitnami_ingreso] La página de inicio te mostrará una página con la configuración general del servidor de la base de datos, el servidor web y de la aplicación. Esta pantalla será importante al momento de requerir la versión de MySQL ("Servidor de base de datos >> Versión del servidor"), la versión de PHP ("Servidor web >> Versión de PHP"), o incluso el nombre de usuario del servidor (por lo general "root@localhost"). Esta pantalla es útil no sólo en instalaciones locales, servirá también para comprobar que algún servicio de alojamiento web corresponda con la tecnología necesaria para ejecutar ciertas aplicaciones. En *phpMyAdmin* seleccionaremos la pestaña "Bases de datos" donde veremos un pequeño formulario para crear la base de datos, sólo tenemos que ingresar el *nombre de la base de datos* e indicar el *cotejamiento*. Seleccionaremos el cotejamiento `utf8_spanish_ci` ya que representará una mayor precisión al momento de ordenar los elementos (*items*) en Omeka.[^collate] Esto es particularmente relevante en las instalaciones en Linux que suelen seleccionar de manera predeterminada un cotejamiento `latin1_`. @@ -134,7 +134,7 @@ Es opcional, aunque muy recomendable, que en tanto el repositorio se encuentre e # Un vistazo al "esqueleto" de Omeka -Si vamos a [phpMyAdmin](http://localhost/phpmyadmin) veremos que la base de datos vacía está ahora llena con 19 tablas interdependientes. La estructura de la base de datos (*database schema*) puede describirse de manera sintética agrupando las tablas en cinco grupos de información: datos para los elementos y colecciones, etiquetas, metatados de los tipos de elementos, información de usuarios, texto para búsqueda, y tablas para procesos del sistema. Un mapa resumido de las interdependencias entre las tablas se puede ver en la siguiente imagen: +Si vamos a [phpMyAdmin](https://localhost/phpmyadmin) veremos que la base de datos vacía está ahora llena con 19 tablas interdependientes. La estructura de la base de datos (*database schema*) puede describirse de manera sintética agrupando las tablas en cinco grupos de información: datos para los elementos y colecciones, etiquetas, metatados de los tipos de elementos, información de usuarios, texto para búsqueda, y tablas para procesos del sistema. Un mapa resumido de las interdependencias entre las tablas se puede ver en la siguiente imagen: {% include figure.html filename="img_2.1-omeka_mysql_schema.png" caption="Esquema de la interdependencia de la base de datos de Omeka" %} @@ -189,7 +189,7 @@ Para muchos la palabra "Metadatos" suena oscura y "metafísica", algo que está Los metadatos son independientes del lenguaje de máquina o de programación, es decir, son categorías completamente personalizables que funcionan de manera independiente de la plataforma. Esta libertad conlleva una gran desventaja y es que si cada usuario creara sus elementos de manera arbitraria no habría manera de intercambiar información entre sistemas. Por esa razón, se creó una estrategia de estandarización de los conjuntos de metadatos de tal manera que facilite la interacción entre plataformas, la actualización del software y, sobre todo, el compartir y encontrar información en grandes repositorios. -Omeka Classic se fundamenta en el estándar *Dublin Core*, específicamente en el esquema básico de 15 descriptores Dublin Core Metadata Element Set Version 1.1: +Omeka Classic se fundamenta en el estándar *Dublin Core*, específicamente en el esquema básico de 15 descriptores Dublin Core Metadata Element Set Version 1.1: Título (title) Autor (creator) @@ -241,7 +241,7 @@ Si estamos construyendo un sitio personal no es necesario (aunque sería lo idea # Plugins o complementos -Un plugin es un pequeño programa que añade una función específica a otro programa, por ejemplo, un CMS tipo Wordpress o Joomla puede incorporar una casilla de comentarios, pero un plugin puede hacer que esta casilla se conecte con las redes sociales y comentar desde su perfil de Facebook o Twitter. En esta lección sólo veremos cómo añadir plugins a nuestra instalación de Omeka[^omeka.net], si desea profundizar en la manera de desarrollar un complemento lo más recomendable es consultar la documentación disponible en la página de Omeka. +Un plugin es un pequeño programa que añade una función específica a otro programa, por ejemplo, un CMS tipo Wordpress o Joomla puede incorporar una casilla de comentarios, pero un plugin puede hacer que esta casilla se conecte con las redes sociales y comentar desde su perfil de Facebook o Twitter. En esta lección sólo veremos cómo añadir plugins a nuestra instalación de Omeka[^omeka.net], si desea profundizar en la manera de desarrollar un complemento lo más recomendable es consultar la documentación disponible en la página de Omeka. Las dos fuentes principales de plugins para Omeka son el repositorio oficial de complementos y Github. Ambos listados son dinámicos, por lo que recomendamos visitar periódicamente estos lugares para conocer novedades y actualizaciones. @@ -278,7 +278,7 @@ Para usar Omeka no es realmente necesario ningún complemento, sin embargo, el c La selección de plugins dependerá en buena medida de los objetivos del repositorio, para nuestro caso se requerirán complementos que permitan: 1. Mostrar los documentos y hacerlos legibles a los usuarios, ya sean imágenes, archivos PDF u otros. Por ejemplo, PDF Embed y Universal Viewer. -2. Gestionar los metadatos de cada documento: procedencia, cobertura, fechas, nombres, etc. Por ejemplo Dublin Core Extended y Hide Elements. +2. Gestionar los metadatos de cada documento: procedencia, cobertura, fechas, nombres, etc. Por ejemplo Dublin Core Extended y Hide Elements. 3. Buscar información: complementos que contribuyan a ampliar las capacidades de las búsquedas de Omeka. Por ejemplo Search by Metadata y PDF Text. 4. Interrelacionar elementos y colecciones. Por ejemplo Item Relations, Collection Tree y Geolocation, Reference. 5. Permitir la transcripción de documentos. Scripto es la opción más recomendada, pero también es posible apoyar el proceso de transcripción con el plugin Contribution diff --git a/es/lecciones/contar-frecuencias.md b/es/lecciones/contar-frecuencias.md index c8c802aa82..08811363d0 100644 --- a/es/lecciones/contar-frecuencias.md +++ b/es/lecciones/contar-frecuencias.md @@ -459,8 +459,8 @@ Para seguir a lo largo de las lecciones futuras es importante que tengas los arc - python-es-lecciones5.zip ([zip sync][]) - [lista por comprensión]: http://docs.python.org/tutorial/datastructures.html#list-comprehensions - [informáticos de Glasgow]: http://ir.dcs.gla.ac.uk/resources/linguistic_utils/stop_words - [Regular Expressions]: https://web.archive.org/web/20180416143856/http://www.diveintopython.net/regular_expressions/index.html + [lista por comprensión]: https://docs.python.org/tutorial/datastructures.html#list-comprehensions + [informáticos de Glasgow]: https://ir.dcs.gla.ac.uk/resources/linguistic_utils/stop_words + [Regular Expressions]: https://web.archive.org/web/20180416143856/https://www.diveintopython.net/regular_expressions/index.html [zip]: /assets/python-es-lecciones4.zip [zip sync]: /assets/python-es-lecciones5.zip diff --git a/es/lecciones/corpus-paralelo-lfaligner.md b/es/lecciones/corpus-paralelo-lfaligner.md index dee75b0f94..0b65014b29 100644 --- a/es/lecciones/corpus-paralelo-lfaligner.md +++ b/es/lecciones/corpus-paralelo-lfaligner.md @@ -27,7 +27,7 @@ doi: 10.46430/phes0044 Un corpus paralelo o *bitexto* consiste en la recopilación de varias versiones de un texto. En este tutorial aprenderás a alinear el texto original con sus traducciones para poder cotejarlos con facilidad. ## Introducción -LF Aligner es un programa gratuito, basado en un [algoritmo de código abierto de alineación de oraciones](https://github.com/danielvarga/hunalign), que pertenece al conjunto de herramientas digitales llamadas ***CATs*** (*Computer Assisted Translation Tools*, por sus siglas en inglés) o herramientas de traducción asistida. Principalmente, se usa para la creación de bitextos que facilitan la búsqueda de términos especializados y sus traducciones. Sitios como [Linguee](https://www.linguee.es/) utilizan este tipo de herramientas para crear enormes corpus paralelos que el usuario puede consultar fácilmente. En ciencias sociales y humanidades podemos aprovechar este programa para crear textos que faciliten las tareas de lectura distante y [análisis estilístico](http://vocabularios.caicyt.gov.ar/portalthes/42/term/134). La aplicación puede importar texto de documentos en múltiples formatos y de memorias de traducción generadas con programas de código libre o privativo. En este tutorial nos centraremos en la importación de texto de fuentes digitales usadas comunmente por los investigadores como páginas web o documentos de texto plano, ya que, además, agilizan el proceso de alineación del corpus. +LF Aligner es un programa gratuito, basado en un [algoritmo de código abierto de alineación de oraciones](https://github.com/danielvarga/hunalign), que pertenece al conjunto de herramientas digitales llamadas ***CATs*** (*Computer Assisted Translation Tools*, por sus siglas en inglés) o herramientas de traducción asistida. Principalmente, se usa para la creación de bitextos que facilitan la búsqueda de términos especializados y sus traducciones. Sitios como [Linguee](https://www.linguee.es/) utilizan este tipo de herramientas para crear enormes corpus paralelos que el usuario puede consultar fácilmente. En ciencias sociales y humanidades podemos aprovechar este programa para crear textos que faciliten las tareas de lectura distante y [análisis estilístico](https://vocabularios.caicyt.gov.ar/portalthes/42/term/134). La aplicación puede importar texto de documentos en múltiples formatos y de memorias de traducción generadas con programas de código libre o privativo. En este tutorial nos centraremos en la importación de texto de fuentes digitales usadas comunmente por los investigadores como páginas web o documentos de texto plano, ya que, además, agilizan el proceso de alineación del corpus. Para este tutorial necesitarás los siguientes materiales y conocimientos: --- @@ -36,9 +36,9 @@ Para este tutorial necesitarás los siguientes materiales y conocimientos: * Un texto de partida -digitalizado- y por lo menos una traducción de este. En este caso, alinearemos distintas traducciones de un documento que desde 1948 guía el quehacer y la convivencia humana en todos los ámbitos de la vida pública y privada, la [Declaración Universal de Derechos Humanos](https://es.wikipedia.org/wiki/Declaraci%C3%B3n_Universal_de_los_Derechos_Humanos): en [español](/assets/corpus-paralelo-lfaligner/DDHH_es.txt), [inglés](/assets/corpus-paralelo-lfaligner/DDHH_en.txt), [francés](/assets/corpus-paralelo-lfaligner/DDHH_fr.txt) y [portugués](/assets/corpus-paralelo-lfaligner/DDHH_pt.txt) * Conocimiento básico de las lenguas de traducción, ya que en algunos casos tendremos que modificar algunos de los segmentos alineados. -Adicionalmente, podemos utilizar este programa para alinear distintas versiones de un texto en una misma lengua, lo que es útil para [análisis relacional](http://vocabularios.caicyt.gov.ar/portalthes/42/term/136), pero hay otras iniciativas que cumplen mejor con esta tarea como [Collatex](https://collatex.net/). +Adicionalmente, podemos utilizar este programa para alinear distintas versiones de un texto en una misma lengua, lo que es útil para [análisis relacional](https://vocabularios.caicyt.gov.ar/portalthes/42/term/136), pero hay otras iniciativas que cumplen mejor con esta tarea como [Collatex](https://collatex.net/). -Es importante ser sistemático con la clasificación de los documentos. El nombre de nuestros archivos txt debe acompañarse con el código que alude a la lengua del texto. Con ello aseguramos que la información con la que trabajamos siga convenciones oficiales que serán útiles a la hora de comunicar los resultados de nuestra investigación Para ello nos basaremos en el código [ISO 639-1](http://utils.mucattu.com/iso_639-1.html) que identifica a cada lengua con dos letras. Así, el español se identifica con *es*, el inglés con *en*, el francés con *fr* y el portugués con *pt*. +Es importante ser sistemático con la clasificación de los documentos. El nombre de nuestros archivos txt debe acompañarse con el código que alude a la lengua del texto. Con ello aseguramos que la información con la que trabajamos siga convenciones oficiales que serán útiles a la hora de comunicar los resultados de nuestra investigación Para ello nos basaremos en el código [ISO 639-1](https://utils.mucattu.com/iso_639-1.html) que identifica a cada lengua con dos letras. Así, el español se identifica con *es*, el inglés con *en*, el francés con *fr* y el portugués con *pt*. Si trabajas con lenguas que no estén incluidas en ese código, puedes recurrir al código [ISO 639-3](https://es.wikipedia.org/wiki/ISO_639-3) que utiliza descriptores de 3 letras y abarca la totalidad de las lenguas del mundo. @@ -260,7 +260,7 @@ Si deseamos editar el documento de formas que la herramienta gráfica de LF Alig {% include figure.html filename="lfaligner-21.jpg" caption="Búsqueda simple con el navegador Google Chrome" %} -También puedes guardar, por separado, las versiones recién alineadas en documentos de texto plano (txt) y usar un visualizador sencillo de traducciones paralelas como [AntPConc](http://www.laurenceanthony.net/software/antpconc/). +También puedes guardar, por separado, las versiones recién alineadas en documentos de texto plano (txt) y usar un visualizador sencillo de traducciones paralelas como [AntPConc](https://www.laurenceanthony.net/software/antpconc/). Sobre la base de la imagen anterior, podemos plantear algunas preguntas que podrían ser útiles para nuestra investigación; tanto en la fase preliminar de un proyecto, en la cual no se tiene claridad sobre lo que se quiere observar, como en una fase avanzada, en la que hacemos búsquedas motivadas por preguntas y criterios previamente establecidos. El tutorial sobre [AntConc](/es/lecciones/analisis-de-corpus-con-antconc) alojado en este sitio profundiza más en el concepto de lectura distante. diff --git a/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.md b/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.md index 9dc85d473c..980a25c057 100644 --- a/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.md +++ b/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.md @@ -36,7 +36,7 @@ doi: 10.46430/phes0002 Introducción ------------ -La visualizaciones de redes pueden ayudar a los humanistas a revelar patrones complejos escondidos y estructuras en fuentes textuales. Este tutorial explica cómo extraer datos en red (personas, instituciones, lugares, etcétera.) de fuentes históricas a través del uso de métodos no especializados desarrollados en el marco del análisis de datos qualitativos (Qualitative Data Analysis, QDA) y el análisis de redes sociales (Social Network Analysis, SNA), y cómo visualizar estos datos con [*Palladio*](http://hdlab.stanford.edu/palladio/), una aplicación independiente de plataforma y que es particularmente fácil de usar. +La visualizaciones de redes pueden ayudar a los humanistas a revelar patrones complejos escondidos y estructuras en fuentes textuales. Este tutorial explica cómo extraer datos en red (personas, instituciones, lugares, etcétera.) de fuentes históricas a través del uso de métodos no especializados desarrollados en el marco del análisis de datos qualitativos (Qualitative Data Analysis, QDA) y el análisis de redes sociales (Social Network Analysis, SNA), y cómo visualizar estos datos con [*Palladio*](https://hdlab.stanford.edu/palladio/), una aplicación independiente de plataforma y que es particularmente fácil de usar. {% include figure.html caption="Figura 1: Una visualización de redes en Palladio y lo que vas a poder crear al final de este tutorial." filename="diagramas-de-redes-01.png" %} @@ -44,7 +44,7 @@ La gráfica anterior muestra un fragmento de la red de Ralph Neumann, en particu En general, el análisis de redes provee las herramientas para explorar constelaciones muy complejas de relaciones entre entidades. Piensa en tus amigos: sería fácil mapear quiénes son cercanos y quiénes no se llevan bien. Ahora, imagina que quieres explicar estas relaciones a alguien que no conoce a ninguno de tus amigos, o que quieres incluir las relaciones entre los amigos de tus amigos. En situaciones como esta el lenguaje y nuestra capacidad de comprender estructuras sociales llega a sus límites rápidamente. Las visualizaciones gráficas pueden ser medios para comunicar y explorar efectivamente estas complejas constelaciones. En general tu puedes pensar el análisis de redes sociales (ARS) como un medio para transformar la complejidad de un problema en un objeto de investigación. A menudo, los nodos en una red representan humanos conectados con otros humanos por todos los tipos de relaciones sociales imaginables. Pero casi que cualquier cosa puede ser entendida como un nodo: una película, un lugar, un título laboral, un punto en el tiempo, un lugar de reunión. En forma similar el concepto de vínculo (también llamado arista) entre nodos es igualmente flexible: dos teatros pueden estar conectados por una película mostrada en ambos, o por co-propiedad, proximidad geográfica, o haber empezado a funcionar el mismo año. Todo esto depende de tus intereses de investigación y cómo los expresas en forma de nodos y relaciones en una red. -Esta lección no reemplaza ninguno de los muchos manuales genéricos de análisis de redes, como el libro de [John Scott _Social Network Analysis_](https://uk.sagepub.com/en-gb/eur/the-sage-handbook-of-social-network-analysis/book277881). Para una introducción general al campo y sus dificultades para los humanistas recomiendo [ ](https://web.archive.org/web/20240203222438/https://www.scottbot.net/HIAL/index.html@p=6279.html)[*la serie de blog posts de Scott Weingart "Networks Demystified"*](https://web.archive.org/web/20240203222438/https://www.scottbot.net/HIAL/index.html@p=6279.html) así como también[ ](http://hal.archives-ouvertes.fr/docs/00/64/93/16/PDF/lemercier_A_zg.pdf)[*el artículo de Claire Lemercier "Formal network methods in history: why and how?"*](http://hal.archives-ouvertes.fr/docs/00/64/93/16/PDF/lemercier_A_zg.pdf). También podrías querer explorar la bibliografía y calendario de eventos en [_Historical Network Research_](http://historicalnetworkresearch.org/) para darte una idea de cómo los historiadores han usado las redes en sus investigaciones. +Esta lección no reemplaza ninguno de los muchos manuales genéricos de análisis de redes, como el libro de [John Scott _Social Network Analysis_](https://uk.sagepub.com/en-gb/eur/the-sage-handbook-of-social-network-analysis/book277881). Para una introducción general al campo y sus dificultades para los humanistas recomiendo [ ](https://web.archive.org/web/20240203222438/https://www.scottbot.net/HIAL/index.html@p=6279.html)[*la serie de blog posts de Scott Weingart "Networks Demystified"*](https://web.archive.org/web/20240203222438/https://www.scottbot.net/HIAL/index.html@p=6279.html) así como también[ ](https://hal.archives-ouvertes.fr/docs/00/64/93/16/PDF/lemercier_A_zg.pdf)[*el artículo de Claire Lemercier "Formal network methods in history: why and how?"*](https://hal.archives-ouvertes.fr/docs/00/64/93/16/PDF/lemercier_A_zg.pdf). También podrías querer explorar la bibliografía y calendario de eventos en [_Historical Network Research_](https://historicalnetworkresearch.org/) para darte una idea de cómo los historiadores han usado las redes en sus investigaciones. Este tutorial se enfoca en la extracción de datos de un texto desestrucurado y muestra una forma de visulizarlos utilizando Palladio. Está diseñado a propósito para ser lo más simple y robusto posible. Por el alcance limitado de este tutorial, es suficiente decir que un actor se refiere a las personas, instituciones, etcétera., que son el objeto de estudio y que están conectadas por relaciones. Dentro del contexto del Análisis de Redes Sociales (ARS) (también llamada gráfica o grafo de red), a los actores o puntos centrales en cuestión, les llamamos nodos, y a las conexiones que existen entre ellos, les llamamos lazos o vínculos. En todos los casos es importante recordar que los nodos y los lazos son modelos drásticamente simplificados utilizados para representar la complejidad de eventos pasados y en sí mismos muchas veces no son suficientes para generar conocimiento. Pero es posible que el gráfico resalte algunos aspectos interesantes, desafíe tu hipótesis, y/o te lleve a generar nuevas hipótesis. *Los digramas de redes se vuelven más significativos cuando son parte de un diálogo con datos y otras fuentes de información*. @@ -56,7 +56,7 @@ En otras palabras, el reto es sistematizar la interpretación textual. Las redes Sobre el caso de estudio -------------------- -El caso de estudio que utilizo para este tutorial es una narrativa en primera persona de Ralph Neumann, un judío que sobrevivió al Holocausto. Puedes encontrar en texto en [*internet*](http://web.archive.org/web/20180422010025/http://www.gdw-berlin.de/fileadmin/bilder/publ/publikationen_in_englischer_sprache/2006_Neuman_eng.pdf). El esquema de codificación que presento abajo es una versión simplificada del que desarrollé durante [*mi proyecto doctoral sobre redes de apoyo encubierto durante la Segunda Guerra Mundial*](http://martenduering.com/research/covert-networks-during-the-holocaust/). Mi investigación estuvo guiada por tres preguntas: ¿En qué medida las relaciones sociales pueden ayudar a explicar por qué personas comunes tomaron los riesgos asociados a ayudar a otros? ¿Cómo dichas relaciones permitieron a la gente prestar ayuda dado que tenían a su disposición recursos muy limitados? ¿Cómo ayudaron las relaciones sociales a los refugiados judíos a sobrevivir clandestinamente? +El caso de estudio que utilizo para este tutorial es una narrativa en primera persona de Ralph Neumann, un judío que sobrevivió al Holocausto. Puedes encontrar en texto en [*internet*](https://web.archive.org/web/20180422010025/https://www.gdw-berlin.de/fileadmin/bilder/publ/publikationen_in_englischer_sprache/2006_Neuman_eng.pdf). El esquema de codificación que presento abajo es una versión simplificada del que desarrollé durante [*mi proyecto doctoral sobre redes de apoyo encubierto durante la Segunda Guerra Mundial*](https://martenduering.com/research/covert-networks-during-the-holocaust/). Mi investigación estuvo guiada por tres preguntas: ¿En qué medida las relaciones sociales pueden ayudar a explicar por qué personas comunes tomaron los riesgos asociados a ayudar a otros? ¿Cómo dichas relaciones permitieron a la gente prestar ayuda dado que tenían a su disposición recursos muy limitados? ¿Cómo ayudaron las relaciones sociales a los refugiados judíos a sobrevivir clandestinamente? En este proyecto las visualizaciones en red me ayudaron a descubrir intermediarios hasta el momento olvidados pero muy importantes, resaltar la importancia general de los refugiados judíos como intermediarios, y navegar los casi 5,000 actos de ayuda que conectaron alrededor de 1,400 personas entre 1942 y 1945. @@ -142,7 +142,7 @@ Los siguientes pasos explican cómo visualizar datos en red en Palladio, pero ta Paso a paso: -**1. Palladio.** Entra a [*http://hdlab.stanford.edu/palladio/*](http://hdlab.stanford.edu/palladio/)*.* +**1. Palladio.** Entra a [*https://hdlab.stanford.edu/palladio/*](https://hdlab.stanford.edu/palladio/)*.* **2. Comienza.** En el sitio web haz clic en el botón "Start". @@ -164,7 +164,7 @@ Paso a paso: {% include figure.html caption="Figura 9: Enlanzando personas y relaciones." filename="diagramas-de-redes-09.png" %} -**7. Identifica datos temporales.** Palladio tiene una característica especial para visualizar tiempo. La puedes usar si sabes cuándo empieza y cuando termmina cada relación. La muestra de datos contiene dos columnas con los datos necesarios para la categoría de tiempo. Haz clic en "Tiempo en que paso comienza" y selecciona el tipo de datos "Date" (Fecha). Haz lo mismo para "Tiempo en que paso termina" (Figura 10). El equipo de Palladio recomienda que tus datos estén en el formato de YYYY-MM-DD (AAAA-MM-DD), pero mi tiempo en formato más abstracto funciona bien. Si quisieras cargar coordenadas geográficas (no cubiertas en este tutorial pero disponible acá: [*Palladio Simple Map Scenario*](http://hdlab.stanford.edu/doc/scenario-simple-map.pdf)) tendrías que seleccionar el tipo de datos "Coordinates". +**7. Identifica datos temporales.** Palladio tiene una característica especial para visualizar tiempo. La puedes usar si sabes cuándo empieza y cuando termmina cada relación. La muestra de datos contiene dos columnas con los datos necesarios para la categoría de tiempo. Haz clic en "Tiempo en que paso comienza" y selecciona el tipo de datos "Date" (Fecha). Haz lo mismo para "Tiempo en que paso termina" (Figura 10). El equipo de Palladio recomienda que tus datos estén en el formato de YYYY-MM-DD (AAAA-MM-DD), pero mi tiempo en formato más abstracto funciona bien. Si quisieras cargar coordenadas geográficas (no cubiertas en este tutorial pero disponible acá: [*Palladio Simple Map Scenario*](https://hdlab.stanford.edu/doc/scenario-simple-map.pdf)) tendrías que seleccionar el tipo de datos "Coordinates". {% include figure.html caption="Figura 10: Cambiando el tipo de datos a 'Date' (Fecha)" filename="diagramas-de-redes-10.png"%} @@ -210,7 +210,7 @@ Ten en cuenta que si quisieras ver "Proveedor" y "Receptor" como un tipo de nodo {% include figure.html caption="Figure 18: Vizualización de pasos en el tiempo en línea del tiempo." filename="diagramas-de-redes-18.png" %} -**15. Tamaño del nodo.** Palladio te deja cambiar el tamaño de tus nodos con base en los atributos de los actores. Ten en cuenta que esto no tiene sentido para los datos de la muestra dado que los valores numéricos representan categorías. Sin embargo, los tamaños de los nodos puedes ser útiles si fueras a representar las suma de los actos de ayuda de una persona, lo que en este caso correspondería a su [*Grado de salida*](http://en.wikipedia.org/wiki/Directed_graph#Indegree_and_outdegree), el número de relaciones salientes para un nodo. +**15. Tamaño del nodo.** Palladio te deja cambiar el tamaño de tus nodos con base en los atributos de los actores. Ten en cuenta que esto no tiene sentido para los datos de la muestra dado que los valores numéricos representan categorías. Sin embargo, los tamaños de los nodos puedes ser útiles si fueras a representar las suma de los actos de ayuda de una persona, lo que en este caso correspondería a su [*Grado de salida*](https://en.wikipedia.org/wiki/Directed_graph#Indegree_and_outdegree), el número de relaciones salientes para un nodo. **16. Exporta tu visualización.** Palladio te deja exportar tus redes como archivos .svg, un formato de imagen hecho con vectores. Utiliza tu navegador preferido para abrirlas. @@ -245,15 +245,15 @@ Finalmente, cualquiera de las visualizaciones que puedes crear con el conjunto d Otras herramientas de visualización para tener en cuenta ------------------------------------------------------------ -[*Nodegoat*](http://nodegoat.net/) – similar a Palladio en cuanto que hace fácil la recolección de datos, el mapeo y la visualización en gráficas. Permite confirgurar fácilmente bases de datos relacionales y deja a los usuarios almacenar sus datos en servidores. [*El tutorial está disponible acá*](http://nodegoat.net/cms/UPLOAD/AsmallguidebyYanan11082014.pdf). +[*Nodegoat*](https://nodegoat.net/) – similar a Palladio en cuanto que hace fácil la recolección de datos, el mapeo y la visualización en gráficas. Permite confirgurar fácilmente bases de datos relacionales y deja a los usuarios almacenar sus datos en servidores. [*El tutorial está disponible acá*](https://nodegoat.net/cms/UPLOAD/AsmallguidebyYanan11082014.pdf). -[*NodeXL*](https://www.smrfoundation.org/nodexl/) – capaz de hacer varias tareas comunes en el análisis de redes sociales, fácil de usar, de código abierto pero requiere Windows y MS Office 2007 o más nuevo para correr.[ ](https://www.youtube.com/watch?v=pwsImFyc0lE)[*Tutorial 1*](https://www.youtube.com/watch?v=pwsImFyc0lE), [*Tutorial 2*](http://www.youtube.com/watch?v=xKhYGRpbwOc). +[*NodeXL*](https://www.smrfoundation.org/nodexl/) – capaz de hacer varias tareas comunes en el análisis de redes sociales, fácil de usar, de código abierto pero requiere Windows y MS Office 2007 o más nuevo para correr.[ ](https://www.youtube.com/watch?v=pwsImFyc0lE)[*Tutorial 1*](https://www.youtube.com/watch?v=pwsImFyc0lE), [*Tutorial 2*](https://www.youtube.com/watch?v=xKhYGRpbwOc). -[*Gephi*](https://gephi.github.io/) – programa de código abierto para cualquier plataforma. Es la más versátil y mejor conocida herramienta de visualización excepto por una curva de aprendizaje muy alta. Los desarrolladores anuncian soporte para lados paralelos en la versión 1.0. Tutoriales: por [*Clement Levallois*](http://www.clementlevallois.net/training.html) y [*Sebastien Heymann*](http://www.youtube.com/watch?v=L6hHv6y5GsQ). +[*Gephi*](https://gephi.github.io/) – programa de código abierto para cualquier plataforma. Es la más versátil y mejor conocida herramienta de visualización excepto por una curva de aprendizaje muy alta. Los desarrolladores anuncian soporte para lados paralelos en la versión 1.0. Tutoriales: por [*Clement Levallois*](https://www.clementlevallois.net/training.html) y [*Sebastien Heymann*](https://www.youtube.com/watch?v=L6hHv6y5GsQ). [*VennMaker*](https://www.vennmaker.com) – es independiente de plataforma y puede probarse de manera gratuita. VennMaker invierte el proceso de recolección de datos: los usuarios comienzan con un lienzo personalizable y dibujan los nodos auto-definidos en él. La herramienta recolecta los datos correspondientes tras bastidores. -Las herramientas más comunmente utilizadas para análisis más matemáticos son [*UCINET*](https://sites.google.com/site/ucinetsoftware/home) (tiene licencia y turoriales disponibles en su página web) y [*Pajek*](http://pajek.imfm.si/doku.php) (gratuito) por el cual existe un muy buen [*libro de guía*](http://www.cambridge.org/us/academic/subjects/sociology/research-methods-sociology-and-criminology/exploratory-social-network-analysis-pajek-2nd-edition). Ambos fueron desarrollados por Windows pero corren bien en otros sistemas utilizando Wine. +Las herramientas más comunmente utilizadas para análisis más matemáticos son [*UCINET*](https://sites.google.com/site/ucinetsoftware/home) (tiene licencia y turoriales disponibles en su página web) y [*Pajek*](https://pajek.imfm.si/doku.php) (gratuito) por el cual existe un muy buen [*libro de guía*](https://www.cambridge.org/us/academic/subjects/sociology/research-methods-sociology-and-criminology/exploratory-social-network-analysis-pajek-2nd-edition). Ambos fueron desarrollados por Windows pero corren bien en otros sistemas utilizando Wine. Para usuarios de Python el muy bien documentado paquete[ ](https://networkx.github.io/)[*Networkx*](https://networkx.github.io/) es un gran punto de partida; existen otros paquetes para otros lenguajes de programación. diff --git a/es/lecciones/crear-y-ver-archivos-html-con-python.md b/es/lecciones/crear-y-ver-archivos-html-con-python.md index c11060195f..cb4f8516a3 100644 --- a/es/lecciones/crear-y-ver-archivos-html-con-python.md +++ b/es/lecciones/crear-y-ver-archivos-html-con-python.md @@ -150,7 +150,7 @@ Para seguir a lo largo de las lecciones futuras es importante que tengas los arc - python-es-lecciones6.zip [zip sync] [archivo zip de las lecciones anteriores]: /assets/python-es-lecciones5.zip - [Zotero]: http://zotero.org - [tutorial de HTML de W3 Schools]: http://www.w3schools.com/html/default.asp - [declaración doctype]: http://www.w3schools.com/tags/tag_doctype.asp + [Zotero]: https://zotero.org + [tutorial de HTML de W3 Schools]: https://www.w3schools.com/html/default.asp + [declaración doctype]: https://www.w3schools.com/tags/tag_doctype.asp [zip sync]: /assets/python-es-lecciones6.zip diff --git a/es/lecciones/datos-de-investigacion-con-unix.md b/es/lecciones/datos-de-investigacion-con-unix.md index c209334ab2..9dd249f667 100644 --- a/es/lecciones/datos-de-investigacion-con-unix.md +++ b/es/lecciones/datos-de-investigacion-con-unix.md @@ -49,7 +49,7 @@ _____ ## *Software* y configuración -Los usuarios de Windows deben instalar Git Bash. Lo pueden hacer descargando el más reciente instalador de la [página web de Git para Windos](http://msysgit.github.io/). Las instrucciones para su instalación están disponibles en [Open Hatch](https://web.archive.org/web/20190114082523/https://openhatch.org/missions/windows-setup/install-git-bash) (en inglés). +Los usuarios de Windows deben instalar Git Bash. Lo pueden hacer descargando el más reciente instalador de la [página web de Git para Windos](https://msysgit.github.io/). Las instrucciones para su instalación están disponibles en [Open Hatch](https://web.archive.org/web/20190114082523/https://openhatch.org/missions/windows-setup/install-git-bash) (en inglés). Los usuarios de OS X y Linux necesitarán utilizar la Terminal, o intérprete de línea de comandos, como se explica en la "[Introducción a la línea de comandos de Bash](/es/lecciones/introduccion-a-bash)." @@ -57,7 +57,7 @@ Esta lección se escribió utilizando Git Bash 1.9.0 en sistema operativo Window Los archivos utilizados en esta lección están disponibles en "[Figshare](https://doi.org/10.6084/m9.figshare.1172094)". Estos contienen metadatos de artículos académicos catalogados en el rubro 'Historia' en la base de datos ESTAR de la Biblioteca Británica. Los datos son distribuidos bajo una renuncia de derechos de autor CC0. -Descarga los datos requeridos en tu ordenador y descomprime el archivo zip. Si no cuentas con un software adecuado para descomprimir archivos .zip, te recomendamos [7-zip](http://www.7-zip.org/). En Windows, te aconsejamos descomprimir la carpeta en tu disco C: para que los archivos queden en tu directorio `c:\proghist\`. No obstante, cualquier locación trabajará bien, pero entonces es posible que tengas que ajustar tus comandos conforme vayas siguiendo la lección. En OS X o Linux, también te aconsejamos descomprimir en tu directorio de usuario para que aparezcan en `/user/NOMBREDEUSUARIO/proghist/`. En ambos casos, esto significa que cuando abras una nueva ventana de tu terminal, con solamente teclear `cd proghist` te podrás mover al directorio correcto. +Descarga los datos requeridos en tu ordenador y descomprime el archivo zip. Si no cuentas con un software adecuado para descomprimir archivos .zip, te recomendamos [7-zip](https://www.7-zip.org/). En Windows, te aconsejamos descomprimir la carpeta en tu disco C: para que los archivos queden en tu directorio `c:\proghist\`. No obstante, cualquier locación trabajará bien, pero entonces es posible que tengas que ajustar tus comandos conforme vayas siguiendo la lección. En OS X o Linux, también te aconsejamos descomprimir en tu directorio de usuario para que aparezcan en `/user/NOMBREDEUSUARIO/proghist/`. En ambos casos, esto significa que cuando abras una nueva ventana de tu terminal, con solamente teclear `cd proghist` te podrás mover al directorio correcto. _____ @@ -73,9 +73,9 @@ Escribe `ls` y oprime Enter. Esto imprime o muestra una lista que incluye dos ar Los archivos en este directorio son: el conjunto de datos `2014-01_JA.csv` que contiene los metadatos de los artículos académicos y un archivo con documentación acerca de `2014-01_JA.csv`, llamado `2014-01_JA.txt`. -El subdirectorio se llama `derived_data`. Contiene cuatro archivos [.tsv](http://en.wikipedia.org/wiki/Tab-separated_values) derivados del archivo `2014-01_JA.csv`. Cada uno de estos incluye los datos en los que aparece una palabra clave como `africa` o `america` en el campo 'Title' de `2014-01_JA.csv`. El directorio `derived_data` también incluye un subdirectorio llamado `results`. +El subdirectorio se llama `derived_data`. Contiene cuatro archivos [.tsv](https://en.wikipedia.org/wiki/Tab-separated_values) derivados del archivo `2014-01_JA.csv`. Cada uno de estos incluye los datos en los que aparece una palabra clave como `africa` o `america` en el campo 'Title' de `2014-01_JA.csv`. El directorio `derived_data` también incluye un subdirectorio llamado `results`. -*Nota: Los archivos [CSV](http://en.wikipedia.org/wiki/Comma-separated_values) son aquellos en los que las unidades de datos, o celdas de una tabla, están separados por comas (valores separados por comas) y los archivos TSV son aquellos en los que están separados por tabuladores. Ambos se pueden leer en cualquier editor de texto o en programas de hoja de cálculo como Libre Office Calc o Microsoft Excel.* +*Nota: Los archivos [CSV](https://en.wikipedia.org/wiki/Comma-separated_values) son aquellos en los que las unidades de datos, o celdas de una tabla, están separados por comas (valores separados por comas) y los archivos TSV son aquellos en los que están separados por tabuladores. Ambos se pueden leer en cualquier editor de texto o en programas de hoja de cálculo como Libre Office Calc o Microsoft Excel.* Antes de que comiences a trabajar con estos archivos debes moverte al directorio en el que están almacenados. Navega a `c:\proghist\data\derived_data` en Windows o a `~/users/NOMBREDEUSUARIO/proghist/data/derived_data` en OS X. @@ -131,8 +131,8 @@ _____ En esta lección has aprtendido a realizar recuentos básicos en archivos, buscar entre tus datos cadenas de caracteres comunes y guardar resultados y datos derivados. Aunque esta lección se restringe a contar y extraer información de datos tabulados, el procedimiento se puede extender fácilmente a archivos de texto plano. Para ello te recomandamos dos guías escritas por William Turkel: -- William Turkel, '[Basic Text Analysis with Command Line Tools in Linux](http://williamjturkel.net/2013/06/15/basic-text-analysis-with-command-line-tools-in-linux/)' (15 de junio, 2013) -- William Turkel, '[Pattern Matching and Permuted Term Indexing with Command Line Tools in Linux](http://williamjturkel.net/2013/06/20/pattern-matching-and-permuted-term-indexing-with-command-line-tools-in-linux/)' (20 de junio, 2013) +- William Turkel, '[Basic Text Analysis with Command Line Tools in Linux](https://williamjturkel.net/2013/06/15/basic-text-analysis-with-command-line-tools-in-linux/)' (15 de junio, 2013) +- William Turkel, '[Pattern Matching and Permuted Term Indexing with Command Line Tools in Linux](https://williamjturkel.net/2013/06/20/pattern-matching-and-permuted-term-indexing-with-command-line-tools-in-linux/)' (20 de junio, 2013) Como sugieren estas recomendaciones, en esta lección solo revisamos superficialmente lo que es capaz de hacer el intérprete de Unix. Sin embargo, esperamos haberte proporcionado una prueba suficiente para impulsar una mayor investigación de su uso. diff --git a/es/lecciones/datos-tabulares-en-r.md b/es/lecciones/datos-tabulares-en-r.md index e148ef4f9f..84e4f6048c 100644 --- a/es/lecciones/datos-tabulares-en-r.md +++ b/es/lecciones/datos-tabulares-en-r.md @@ -550,10 +550,10 @@ Para más información sobre R, visita el [Manual de R](https://cran.r-project. También hay numerosos tutoriales de R online, incluyendo: -* [R: A self-learn tutorial](http://web.archive.org/web/20191015004305/https://www.nceas.ucsb.edu/files/scicomp/Dloads/RProgramming/BestFirstRTutorial.pdf) (en inglés) - este tutorial cubre varias funciones y provee ejercicios para practicar. +* [R: A self-learn tutorial](https://web.archive.org/web/20191015004305/https://www.nceas.ucsb.edu/files/scicomp/Dloads/RProgramming/BestFirstRTutorial.pdf) (en inglés) - este tutorial cubre varias funciones y provee ejercicios para practicar. * [DataCamp Introducción a R](https://www.datacamp.com/community/open-courses/introduccion-a-r) (en español) - este es un curso online gratuito que te ofrece comentarios sobre tu código para ayudarte a identificar errores y aprender a escribir código más eficientemente. -Finalmente, un buen recurso para los historiadores digitales es el libro [_Digital History Methods in R_](http://dh-r.lincolnmullen.com) de Lincoln Mullen. +Finalmente, un buen recurso para los historiadores digitales es el libro [_Digital History Methods in R_](https://dh-r.lincolnmullen.com) de Lincoln Mullen. ## Notas diff --git a/es/lecciones/de-html-a-lista-de-palabras-1.md b/es/lecciones/de-html-a-lista-de-palabras-1.md index b8fa34e098..aced098a7f 100644 --- a/es/lecciones/de-html-a-lista-de-palabras-1.md +++ b/es/lecciones/de-html-a-lista-de-palabras-1.md @@ -156,10 +156,10 @@ Para seguir a lo largo de las lecciones futuras es importante que tengas los arc -[transcripción del juicio criminal contra Benjamin Bowsey de 1780]: http://www.oldbaileyonline.org/browse.jsp?id=t17800628-33&div=t17800628-33 +[transcripción del juicio criminal contra Benjamin Bowsey de 1780]: https://www.oldbaileyonline.org/browse.jsp?id=t17800628-33&div=t17800628-33 [Descargar páginas web con Python]: /es/lecciones/trabajar-con-paginas-web -[HTML]: http://www.w3schools.com/html/ -[1]: http://www.w3schools.com/html/ +[HTML]: https://www.w3schools.com/html/ +[1]: https://www.w3schools.com/html/ [Manipular cadenas de caracteres en Python]: /es/lecciones/manipular-cadenas-de-caracteres-en-python [Reutilizacion de código y modularidad]: /es/lecciones/reutilizacion-de-codigo-y-modularidad [zip]: /assets/python-es-lecciones2.zip diff --git a/es/lecciones/de-html-a-lista-de-palabras-2.md b/es/lecciones/de-html-a-lista-de-palabras-2.md index abbcbbf76b..a91b5c88fe 100644 --- a/es/lecciones/de-html-a-lista-de-palabras-2.md +++ b/es/lecciones/de-html-a-lista-de-palabras-2.md @@ -258,7 +258,7 @@ Para seguir a lo largo de las lecciones futuras es importante que tengas los arc - python-es-lecciones3.zip ([zip sync][]) [De HTML a lista de palabras (parte 1)]: /es/lecciones/de-html-a-lista-de-palabras-1 - [entero]: http://docs.python.org/2.4/lib/typesnumeric.html - [tipos]: http://docs.python.org/3/library/types.html + [entero]: https://docs.python.org/2.4/lib/typesnumeric.html + [tipos]: https://docs.python.org/3/library/types.html [zip]: /assets/python-es-lecciones2.zip [zip sync]: /assets/python-es-lecciones3.zip diff --git a/es/lecciones/descarga-automatizada-con-wget.md b/es/lecciones/descarga-automatizada-con-wget.md index 50e31a64ec..e69f12ffba 100644 --- a/es/lecciones/descarga-automatizada-con-wget.md +++ b/es/lecciones/descarga-automatizada-con-wget.md @@ -191,7 +191,7 @@ En este punto, las personas usuarias de las tres plataformas deben estar en la m La documentación completa para wget se puede encontrar en la página del [Manual de wget de GNU][]. -Tomemos un ejemplo de conjunto de datos. Digamos que deseas descargar todos los documentos alojados en el sitio web ActiveHistory.ca. Todos están ubicados en: ; en el sentido de que están todos contenidos en el directorio `/papers/`. Por ejemplo, el noveno documento publicado en el sitio web es . Piensa en esta estructura de la misma forma que los directorios en tu propia computadora. Si tienes una carpeta con la etiqueta `/Historia/`, es probable que contenga varios archivos dentro de ella. La misma estructura es válida para los sitios web y estamos usando esta lógica para decirle a nuestra computadora qué archivos queremos descargar. +Tomemos un ejemplo de conjunto de datos. Digamos que deseas descargar todos los documentos alojados en el sitio web ActiveHistory.ca. Todos están ubicados en: ; en el sentido de que están todos contenidos en el directorio `/papers/`. Por ejemplo, el noveno documento publicado en el sitio web es . Piensa en esta estructura de la misma forma que los directorios en tu propia computadora. Si tienes una carpeta con la etiqueta `/Historia/`, es probable que contenga varios archivos dentro de ella. La misma estructura es válida para los sitios web y estamos usando esta lógica para decirle a nuestra computadora qué archivos queremos descargar. Si deseas descargarlos todos manualmente deberás escribir un programa personalizado o hacer clic derecho en cada papel para hacerlo. Si los archivos están organizados de una manera que se ajuste a tus necesidades de investigación, wget es el abordaje más rápido. @@ -228,7 +228,7 @@ Saving to: `index.html.1' 2012-05-15 15:50:26 (374 KB/s) - `index.html.1' saved [37668] ``` -Wget descargó la primera página de , que contiene el índice de los documentos, a tu nuevo directorio. Si lo abres, verás el texto principal en la página de inicio de ActiveHistory.ca. Así que de un golpe ya hemos descargado algo rápidamente. +Wget descargó la primera página de , que contiene el índice de los documentos, a tu nuevo directorio. Si lo abres, verás el texto principal en la página de inicio de ActiveHistory.ca. Así que de un golpe ya hemos descargado algo rápidamente. Pero lo que queremos hacer ahora es descargar cada uno de los papeles. Así que necesitamos agregar algunos comandos a wget. @@ -244,7 +244,7 @@ Acabamos de aprender cosas sobre el componente [URL] en el ejemplo anterior, ya -r ``` -La recuperación recursiva es la parte más importante de wget. Lo que esto significa es que el programa comienza siguiendo los enlaces del sitio web y también los descarga. Entonces, por ejemplo, tiene un enlace a , por lo que también se descargará si utilizamos la recuperación recursiva. Sin embargo, también seguirá a cualquier otro enlace: si hubiera un enlace a en algún lugar de esa página, seguiría eso y lo descargaría también. De forma predeterminada, `-r` envía wget a una profundidad de cinco sitios después del primero. Esto es siguiendo los enlaces, hasta un límite de cinco clics después del primer sitio web. En este punto, será bastante indiscriminado. Así que necesitamos más comandos: +La recuperación recursiva es la parte más importante de wget. Lo que esto significa es que el programa comienza siguiendo los enlaces del sitio web y también los descarga. Entonces, por ejemplo, tiene un enlace a , por lo que también se descargará si utilizamos la recuperación recursiva. Sin embargo, también seguirá a cualquier otro enlace: si hubiera un enlace a en algún lugar de esa página, seguiría eso y lo descargaría también. De forma predeterminada, `-r` envía wget a una profundidad de cinco sitios después del primero. Esto es siguiendo los enlaces, hasta un límite de cinco clics después del primer sitio web. En este punto, será bastante indiscriminado. Así que necesitamos más comandos: ``` bash @@ -253,7 +253,7 @@ La recuperación recursiva es la parte más importante de wget. Lo que esto sign (El doble guión indica el texto completo de un comando. Todos los comandos también tienen una versión corta, éste podría iniciarse usando -np). -Esto es muy importante. Quiere decir que wget debe seguir los enlaces pero no más allá del último directorio principal. En nuestro caso, eso significa que no irá a ninguna sitio que no sea parte de la jerarquía de http://activehistory.ca/papers/. Si se tratara de una ruta larga como http://niche-canada.org/projects/events/new-events/not-yet-happened-events/, solo encontraría archivos en la carpeta `/not-yet-happened-events/`. Es un comando crítico para delimitar tu búsqueda. +Esto es muy importante. Quiere decir que wget debe seguir los enlaces pero no más allá del último directorio principal. En nuestro caso, eso significa que no irá a ninguna sitio que no sea parte de la jerarquía de https://activehistory.ca/papers/. Si se tratara de una ruta larga como https://niche-canada.org/projects/events/new-events/not-yet-happened-events/, solo encontraría archivos en la carpeta `/not-yet-happened-events/`. Es un comando crítico para delimitar tu búsqueda. Aquí una representación gráfica: @@ -320,12 +320,12 @@ Aquí solo he dado una instantánea de algunas de las funcionalidades de wget. P [^2]: La versión más reciente es wget 1.19, desde el 3 de febrero de 2017. -[Command Line Bootcamp]: http://praxis.scholarslab.org/scratchpad/bash/ +[Command Line Bootcamp]: https://praxis.scholarslab.org/scratchpad/bash/ [descargar XCode a través de este enlace]: https://itunes.apple.com/us/app/xcode/id497799835?mt=12 [sitio web de desarrolladores de Apple]: https://developer.apple.com/xcode/ [Ver descargas]: https://developer.apple.com/downloads/ -[sitio web de GNU]: http://www.gnu.org/software/wget/ -[HTTP]: http://ftp.gnu.org/gnu/wget/ +[sitio web de GNU]: https://www.gnu.org/software/wget/ +[HTTP]: https://ftp.gnu.org/gnu/wget/ [FTP]: ftp://ftp.gnu.org/gnu/wget/ -[ugent website]: http://users.ugent.be/~bpuype/wget/ -[Manual de wget de GNU]: http://www.gnu.org/software/wget/manual/wget.html +[ugent website]: https://users.ugent.be/~bpuype/wget/ +[Manual de wget de GNU]: https://www.gnu.org/software/wget/manual/wget.html diff --git a/es/lecciones/editar-audio-con-audacity.md b/es/lecciones/editar-audio-con-audacity.md index df452247a1..57fc5ee841 100644 --- a/es/lecciones/editar-audio-con-audacity.md +++ b/es/lecciones/editar-audio-con-audacity.md @@ -33,7 +33,7 @@ doi: 10.46430/phes0007 Para aquellos interesados en audio, las habilidades básicas de edición de sonido les serán de mucha ayuda. Ser capaz de manipular los materiales puede ayudarte a dominar tu objeto de estudio: puedes ampliar y extraer momentos específicos para analizar, procesar el audio, y subir los materiales a un servidor para complementar la entrada de un blog en la materia. En un nivel más práctico, estas habilidades te permitirán grabar y comprimir grabaciones, tuyas o de otros, para su distribución. ¿Esa conferencia de un profesor invitado a tu facultad? ¡Grábala y edítala tú mismo! Hacerlo así es una forma sencilla de distribuir recursos entre varias instituciones, y también ayuda a hacer los materiales más accesibles pera lectores y escuchas con una amplia variedad de necesidades de aprendizaje. -En esta lección aprenderás a utilizar [Audacity](http://www.audacityteam.org/) para cargar, grabar, editar, mezclar y exportar archivos de audio. Con frecuencia, las plataformas de edición de audio son costosas y ofrecen numerosas funciones que pueden ser abrumadoras para el usuario que no tiene experiencia previa, al contrario, *Audacity* es una alternativa gratuita y de código abierto que ofrece gran funcionalidad y fácil acceso para editar archivos de audio. +En esta lección aprenderás a utilizar [Audacity](https://www.audacityteam.org/) para cargar, grabar, editar, mezclar y exportar archivos de audio. Con frecuencia, las plataformas de edición de audio son costosas y ofrecen numerosas funciones que pueden ser abrumadoras para el usuario que no tiene experiencia previa, al contrario, *Audacity* es una alternativa gratuita y de código abierto que ofrece gran funcionalidad y fácil acceso para editar archivos de audio. Para esta lección vamos a trabajar con dos archivos de audio: una grabación de las [Variaciones Goldberg de Bach](/assets/editing-audio-with-audacity/bach-goldberg-variations.mp3), y otra grabación de tu propia voz que se hará en el transcurso de la lección. @@ -46,7 +46,7 @@ Primero, descarga los archivos necesarios. Vas a necesitar el [archivo en .mp3 de las Variaciones Goldberg de Bach](/assets/editing-audio-with-audacity/bach-goldberg-variations.mp3). Para descargarlo, haz click con el botón derecho [aquí](/assets/editing-audio-with-audacity/bach-goldberg-variations.mp3) y selecciona "guardar como" para guardar el archivo en tu computadora como un MP3. -A continuación, descarga e instala *Audacity*, que está disponible en el [sitio del proyecto]( http://www.audacityteam.org/). *Audacity* puede utilizarse en Mac OSX, Windows o Linux. +A continuación, descarga e instala *Audacity*, que está disponible en el [sitio del proyecto]( https://www.audacityteam.org/). *Audacity* puede utilizarse en Mac OSX, Windows o Linux. Descarga el programa y haz doble clic para instalar. @@ -58,7 +58,7 @@ La interfaz cargará y mostrará los archivos cargados: *Audacity* convierte el sonido en un diagrama de onda, una forma frecuentemente utilizada para representar sonido. El eje horizontal representa el tiempo en forma de segundos (o minutos y segundos, dependiendo de la extensión del clip). El inicio del sonido se visualiza del lado izquierdo de la interfaz y *Audacity* coloca marcadores a lo largo de la onda hacia la derecha. Si damos clic en el botón de reproducir *Audacity* se moverá sobre el sonido de izquierda a derecha, entre tanto una línea vertical representará nuestra posición en el clip de audio. -El eje vertical representa la amplitud, que experimentamos como intensidad sonora o volumen. De manera predeterminada, el eje vertical mide el volumen en una regla vertical de -1 a 1: los extremos de -1 y 1 representan la intensidad sonora posible de la grabación sin distorsión, mientras que 0 representa silencio. Así, el silencio comienza como una línea plana desde la cual el sonido será más alto y más profundo a medida que aumente su intensidad. Para mayor información acerca del porqué algunos de los números son negativos, revisa la [**introducción a la acústica**](http://web.archive.org/web/20161119231053/http://www.indiana.edu:80/~emusic/acoustics/amplitude.htm) de Jeffrey Hass (en inglés). +El eje vertical representa la amplitud, que experimentamos como intensidad sonora o volumen. De manera predeterminada, el eje vertical mide el volumen en una regla vertical de -1 a 1: los extremos de -1 y 1 representan la intensidad sonora posible de la grabación sin distorsión, mientras que 0 representa silencio. Así, el silencio comienza como una línea plana desde la cual el sonido será más alto y más profundo a medida que aumente su intensidad. Para mayor información acerca del porqué algunos de los números son negativos, revisa la [**introducción a la acústica**](https://web.archive.org/web/20161119231053/https://www.indiana.edu:80/~emusic/acoustics/amplitude.htm) de Jeffrey Hass (en inglés). La representación de tiempo y amplitud de *Audacity* es tu primer y más fácil punto de referencia para la edición de sonido, y la herramienta facilita la navegación por el mismo. Sigo llamándole a esto una onda, pero aún no se parece mucho a una. Vamos a echar un vistazo más de cerca al seleccionar una parte de la pieza de audio. @@ -96,7 +96,7 @@ Se mostrará algo parecido a esto: Nuestra grabación original de “Bach” se mantiene en la parte superior de la interface, mientras que nuestra nueva grabación está por debajo de ella. De forma predeterminada, *Audacity* no sobreescribirá una grabación anterior. Por el contrario, aísla ambos sonidos o pistas, permitiéndonos manipular componentes separados antes de mezclarlos en una grabación final. Podemos hacer cambios a uno sin afectar al otro. Observa cómo, con respecto al tiempo, la nueva pista se grabó de manera predeterminada al principio del proyecto de Audacity. Por ahora, las pistas de “Bach” y la vocal comienzan al mismo tiempo. Existen otras imperfecciones potenciales en tu grabación única, algunas de las cuales podemos corregir. -Finalmente, observa cómo en mi ejemplo existen dos formas de onda para la grabación de Bach, pero solo una para la grabación de mi voz. La grabación de Bach fue hecha en estéreo, lo que significa que había dos canales de entrada, mientras que la grabación de mi voz fue hecha en *monoauraL*. *Audacity* permite grabar en ambos, y cualquiera de las dos funcionará para esta lección, así que no te preocupes si tu grabación aparece en estéreo. Puedes cambiar de mono a estéreo y viceversa desde “Editar”, disponible en la sección “Barra de herramientas” del menú “ver”. Para más información sobre mono contra estéreo, revista esta [*lectura*](http://www.diffen.com/difference/Mono_vs_Stereo/) (en inglés). +Finalmente, observa cómo en mi ejemplo existen dos formas de onda para la grabación de Bach, pero solo una para la grabación de mi voz. La grabación de Bach fue hecha en estéreo, lo que significa que había dos canales de entrada, mientras que la grabación de mi voz fue hecha en *monoauraL*. *Audacity* permite grabar en ambos, y cualquiera de las dos funcionará para esta lección, así que no te preocupes si tu grabación aparece en estéreo. Puedes cambiar de mono a estéreo y viceversa desde “Editar”, disponible en la sección “Barra de herramientas” del menú “ver”. Para más información sobre mono contra estéreo, revista esta [*lectura*](https://www.diffen.com/difference/Mono_vs_Stereo/) (en inglés). Aparte: a menudo puede ser de utilidad convertir la salida de sonido de tu laptop en entrada, para que puedas grabar los sonidos que se reproducen en tu computadora sin preocuparte del ruido externo o volver a grabar audio digital. Para obtener información sobre cómo llevar a cabo éste proceso, consulta [*Soundflower*](https://github.com/mattingalls/Soundflower/). @@ -152,7 +152,7 @@ Pero eventualmente vamos a querer cambiar el enfoque de la pista por completo de - Seleccionar “Crossfade Tracks”, del menú Efecto, esto le indicará a Audacity que realice el desvanecimiento de salida de la pista superior mientras hace el desvanecimiento de entrada de la pista inferior; en este caso, el posicionamiento de las pistas es importante. -*Audacity* te ofrecerá opciones para el *crossfade* de la pista, pero por ahora está bien mantener la configuración preestablecida en “Fade type:constant gain”. Ésta configuración garantiza que ambas pistas se desvanecerán o alinearán (para mayor información, revisa la documentación de *["crossfades” de Audacity](http://manual.audacityteam.org/man/crossfade_clips.html)* +*Audacity* te ofrecerá opciones para el *crossfade* de la pista, pero por ahora está bien mantener la configuración preestablecida en “Fade type:constant gain”. Ésta configuración garantiza que ambas pistas se desvanecerán o alinearán (para mayor información, revisa la documentación de *["crossfades” de Audacity](https://manual.audacityteam.org/man/crossfade_clips.html)* ![Post-crossfade](/images/editing-audio-with-audacity/editing-audio-with-audacity-13.png) @@ -166,6 +166,6 @@ De forma predeterminada, todo lo que hagas en *Audacity* es guardado en el forma Al hacer esto, mezclarás las múltiples pistas en un solo archivo de audio, y te dará la oportunidad de proporcionar metadatos a tu trabajo. -Existe un rango de diferentes opciones para refinar el proceso de exportación, pero el más importante es “tipo de archivo”. MP3 y Ogg son buenas opciones para el audio destinado a ser mostrado en la web, ya que ambos comprimen los archivos para que sean rápidos de cargar. Para mejores resultados, puedes incluir ambos formatos y sólo mostrar uno como una alternativa cuando alguno no sea compatible con el navegador web del usuario. Para mayor información, *NCH Software* ofrece un [buen desglose técnico para sus diferentes opciones](http://www.nch.com.au/acm/formats.html), mientras que Jonathan Sterne ha hecho un [trabajo fascinante](https://www.dukeupress.edu/mp3/) sobre las implicaciones culturales de tales decisiones de formato. Y la W3Schools ofrece una [buena comparación](https://www.w3schools.com/html/html5_audio.asp) de estos formatos usados en el desarrollo web. +Existe un rango de diferentes opciones para refinar el proceso de exportación, pero el más importante es “tipo de archivo”. MP3 y Ogg son buenas opciones para el audio destinado a ser mostrado en la web, ya que ambos comprimen los archivos para que sean rápidos de cargar. Para mejores resultados, puedes incluir ambos formatos y sólo mostrar uno como una alternativa cuando alguno no sea compatible con el navegador web del usuario. Para mayor información, *NCH Software* ofrece un [buen desglose técnico para sus diferentes opciones](https://www.nch.com.au/acm/formats.html), mientras que Jonathan Sterne ha hecho un [trabajo fascinante](https://www.dukeupress.edu/mp3/) sobre las implicaciones culturales de tales decisiones de formato. Y la W3Schools ofrece una [buena comparación](https://www.w3schools.com/html/html5_audio.asp) de estos formatos usados en el desarrollo web. ¡Felicidades! Has producido exitosamente un pequeño podcast. Puede que no parezca mucho, pero con frecuencia yo uso estas mismas recomendaciones para presentaciones, sitios web y cuestiones académicas. De ninguna manera esta lección pretende agotar los múltiples temas al respecto, pero debe haberte proporcionado algunas herramientas básicas para trabajar con sonido en proyectos de humanidades digitales. diff --git a/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.md b/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.md index 4d64eaf9a8..0b5c46662d 100644 --- a/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.md +++ b/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.md @@ -57,7 +57,7 @@ Aquí es donde brilla Markdown. Markdown es una sitaxis para el marcado semánti Escribir en esta forma libera al autor de la herramienta. Markdown se puede escribir en cualquier editor de texto y ofrece un rico ecosistema de *software* que puede representar ese texto en documentos con aspecto atractivo. Por esta razón, Markdown está experimentando un periodo de crecimiento, no solamente como un medio para la escritura de documentos académicos sino como una convención para la edición en línea en general. -Los editores de texto para todo prósito más populares incluyen [Atom](https://atoms.io/) (para todas las plataformas) y [Notepad++](http://notepad-plus-plus.org) (para Windows). +Los editores de texto para todo prósito más populares incluyen [Atom](https://atoms.io/) (para todas las plataformas) y [Notepad++](https://notepad-plus-plus.org) (para Windows). Es importante entender que Markdown no es más que una convención. Los archivos Markdown se almacenan como texto plano, además de añadir la flexibilidad del formato. Los archivos de texto plano han existido desde los tiempos de las máquinas de escribir eléctrónicas. La longevidad de este estándar hace, de manera inherente, que sean más sostenibles y más estables que los formatos propietarios. Mientras que los archivos producidos hace diez años en Microsfot Word o en Pages de Apple pueden causar serios problemas cuando se abren con la última versión del programa, aún es posible abrir un archivo de texto plano escrito en alguno de los editores de texto "muertos", del pasado, muchas décadas después: AlphaPlus, Perfect Writer, Text Wizard, Spellbinder, WordStar o SCRIPSIT2.0, el favorito de Isaac Asimov producido por Radio Shack. Escribir en texto plano te garantiza que tus archivos permanecerán legibles diez, quince o veinte años a partir de ahora. En esta lección se describe un flujo de trabajo que libera al investigador de programas de procesamiento de texto propietarios y archivos de formatos frágiles. @@ -186,7 +186,7 @@ Y como veremos en breve, este archivo de texto plano se puede representar como u {% include figure.html filename="Screen-Shot-2014-11-06.png" caption="Captura de pantalla de un PDF interpretado por Pandoc" %} -Si quieres tener una idea de cómo serán interpretado en un fomato HTML este tipo de marcado, prueba este [sitio de prueba en línea](http://daringfireball.net/projects/markdown/dingus) y juega con varios tipos de sintaxis. Recuerda que ciertos elementos del *Pandoc-flavored markdown* (como el bloque de título o las notas al pie) no funcionan en esta versión web ya que solamente acepta lo básico. +Si quieres tener una idea de cómo serán interpretado en un fomato HTML este tipo de marcado, prueba este [sitio de prueba en línea](https://daringfireball.net/projects/markdown/dingus) y juega con varios tipos de sintaxis. Recuerda que ciertos elementos del *Pandoc-flavored markdown* (como el bloque de título o las notas al pie) no funcionan en esta versión web ya que solamente acepta lo básico. En este punto, deberás ocupar algún tiempo explorando algunas de las características de Markdown como las citas de texto (referidas con el símbolo `>`), los listados que empiezan con `*` o `-`, los saltos de línea literales que empiezan con `|` (útiles para poesía), las tablas y algunas otras funciones señaladas en la página sobre Markdown de Pandoc. @@ -314,7 +314,7 @@ El filtro "citeproc" compila todas tus etiquetas de citas. El resultado debe ser ## Cambiar los estilos de citación -El estilo de citación por defecto en Pandoc es el de Chicago Autor-fecha. Podemos especificar un estilo diferente utilizando una hoja de estilo escrita en "lenguaje de estilo de citación" (CSL por *citation style language*, otra convención en texto plano utilizada para describir estilos de citas) y que es designado por la extensión de archivo `.csl`. Afortunadamente, el proyecto CSL mantiene un repositorio de estilos de citaciones comunes, algunas incluso ajustadas a ciertas revistas en específico. Visita para encontrar el archivo `.csl` para el estilo Modern Language Association (MLA), descarga el archivo `modern-language-association.csl` y guárdalo en la carpeta de tu proyecto como `mla.csl`. Ahora, necesitamos indicarle a Pandoc que utilice la hoja de estilo de MLA en vez de la de Chicago que tiene por defecto. Haremos esto actualizando el encabezado o bloque YAML: +El estilo de citación por defecto en Pandoc es el de Chicago Autor-fecha. Podemos especificar un estilo diferente utilizando una hoja de estilo escrita en "lenguaje de estilo de citación" (CSL por *citation style language*, otra convención en texto plano utilizada para describir estilos de citas) y que es designado por la extensión de archivo `.csl`. Afortunadamente, el proyecto CSL mantiene un repositorio de estilos de citaciones comunes, algunas incluso ajustadas a ciertas revistas en específico. Visita para encontrar el archivo `.csl` para el estilo Modern Language Association (MLA), descarga el archivo `modern-language-association.csl` y guárdalo en la carpeta de tu proyecto como `mla.csl`. Ahora, necesitamos indicarle a Pandoc que utilice la hoja de estilo de MLA en vez de la de Chicago que tiene por defecto. Haremos esto actualizando el encabezado o bloque YAML: ``` --- @@ -349,19 +349,19 @@ Trata tus archivos de origen como versiones autorizadas de tu texto y los archiv ## Recursos útiles -En caso de meterte en problemas no hay un mejor lugar para empezar a buscar soluciones que el [sitio web de Pandoc](https://pandoc.org/) de John MacFarlane y la [lista de correos](https://groups.google.com/forum/#!forum/pandoc-discuss) afiliada (en inglés). Al menos en dos sitios de tipo "Pregunta y respuesta" puedes encontrar respuestas a preguntas sobre Pandoc: [Stack Overflow](http://stackoverflow.com/questions/tagged/pandoc) y [Digital Humanities Q&A](http://web.archive.org/web/20190203062832/http://digitalhumanities.org/answers/). Puedes hacer preguntas en vivo en Freenode IRC, \#Pandoc channel, frecuentado por un amistoso grupo de asiduos. A medida que aprendas más acerca de Pandoc, puedes explorar una de sus particularidades más poderosa: [filtros](https://github.com/jgm/pandoc/wiki/Pandoc-Filters). +En caso de meterte en problemas no hay un mejor lugar para empezar a buscar soluciones que el [sitio web de Pandoc](https://pandoc.org/) de John MacFarlane y la [lista de correos](https://groups.google.com/forum/#!forum/pandoc-discuss) afiliada (en inglés). Al menos en dos sitios de tipo "Pregunta y respuesta" puedes encontrar respuestas a preguntas sobre Pandoc: [Stack Overflow](https://stackoverflow.com/questions/tagged/pandoc) y [Digital Humanities Q&A](https://web.archive.org/web/20190203062832/https://digitalhumanities.org/answers/). Puedes hacer preguntas en vivo en Freenode IRC, \#Pandoc channel, frecuentado por un amistoso grupo de asiduos. A medida que aprendas más acerca de Pandoc, puedes explorar una de sus particularidades más poderosa: [filtros](https://github.com/jgm/pandoc/wiki/Pandoc-Filters). -Aunque te sugerimos comenzar con un simple editor de texto plano, hay muchas más alternativas (más de 70, de acuerdo con [esta entrada de blog](http://web.archive.org/web/20140120195538/http://mashable.com/2013/06/24/markdown-tools/) a MS Word para trabajar específicamente con Markdown, disponibles en línea y a menudo sin costo. Para las autónomas nos gustan [Mou](http://mouapp.com/), [Write Monkey](http://writemonkey.com), y [Sublime Text](http://www.sublimetext.com/). Varias plataformas web que han surgido recientemente proporcionan interfaces gráficas adecuadas para desarrollar una escritura colaborativa con seguimiento de cambios en las versiones utilizando Markdown. Éstas incluyen: [prose.io](http://prose.io), [Authorea](http://www.authorea.com), [Draft](http://www.draftin.com), y [StackEdit](https://stackedit.io). +Aunque te sugerimos comenzar con un simple editor de texto plano, hay muchas más alternativas (más de 70, de acuerdo con [esta entrada de blog](https://web.archive.org/web/20140120195538/https://mashable.com/2013/06/24/markdown-tools/) a MS Word para trabajar específicamente con Markdown, disponibles en línea y a menudo sin costo. Para las autónomas nos gustan [Mou](https://mouapp.com/), [Write Monkey](https://writemonkey.com), y [Sublime Text](https://www.sublimetext.com/). Varias plataformas web que han surgido recientemente proporcionan interfaces gráficas adecuadas para desarrollar una escritura colaborativa con seguimiento de cambios en las versiones utilizando Markdown. Éstas incluyen: [prose.io](https://prose.io), [Authorea](https://www.authorea.com), [Draft](https://www.draftin.com), y [StackEdit](https://stackedit.io). -Pero el ecosistema no está limitado sólo a editores. [Gitit](http://gitit.net/) e [Ikiwiki](https://github.com/dubiousjim/pandoc-iki) soportan escritura en Markdown utilizando Pandoc como compilador. A esta lista se puede agregar una serie de herramientas que generan páginas web estáticas de manera rápida: [Yst](https://github.com/jgm/yst), [Jekyll](http://github.com/fauno/jekyll-pandoc-multiple-formats), [Hakyll](http://jaspervdj.be/hakyll/) y [bash shell script](https://github.com/wcaleb/website) por el historiador Caleb McDaniel. +Pero el ecosistema no está limitado sólo a editores. [Gitit](https://gitit.net/) e [Ikiwiki](https://github.com/dubiousjim/pandoc-iki) soportan escritura en Markdown utilizando Pandoc como compilador. A esta lista se puede agregar una serie de herramientas que generan páginas web estáticas de manera rápida: [Yst](https://github.com/jgm/yst), [Jekyll](https://github.com/fauno/jekyll-pandoc-multiple-formats), [Hakyll](https://jaspervdj.be/hakyll/) y [bash shell script](https://github.com/wcaleb/website) por el historiador Caleb McDaniel. -Finalmente, se están creando plataformas de publicación enteras basadas en el uso de Markdown. La plataforma de mercado [Leanpub](https://leanpub.com) puede ser una alternativa interesante al modelo tradicional de publicación y nosotros mismos estamos experimentando con el diseño de una revista académica en GitHub y [readthedocs.org](http://readthedocs.org) (herramientas que suelen utilizarse para técnicas de documentación). +Finalmente, se están creando plataformas de publicación enteras basadas en el uso de Markdown. La plataforma de mercado [Leanpub](https://leanpub.com) puede ser una alternativa interesante al modelo tradicional de publicación y nosotros mismos estamos experimentando con el diseño de una revista académica en GitHub y [readthedocs.org](https://readthedocs.org) (herramientas que suelen utilizarse para técnicas de documentación). [^1]: ¡No te preocupes si no entiendes aún esta terminología! [^2]: [GitHub](https://github.com/dhcolumbia/pandoc-workflow). Utiliza la opción "raw" cuando lo veas en GitHub para observar la fuente de Markdown. Los autores queremos agradecer a Alex Gil y sus colegas del Columbia's Digital Humanities Center, y a los participantes de openLab en el Studio de la Bilioteca Butler por probar el código de este tutorial en diversas plataformas. -[^3]: Véase la excelente discusión sobre este tema, por Charlie Stross, en [Why Microsoft Word Must Die](http://www.antipope.org/charlie/blog-static/2013/10/why-microsoft-word-must-die.html). +[^3]: Véase la excelente discusión sobre este tema, por Charlie Stross, en [Why Microsoft Word Must Die](https://www.antipope.org/charlie/blog-static/2013/10/why-microsoft-word-must-die.html). [^4]: Considera que la extensión `.bib` debe estar "vinculada" a Zotero en tu sistema operativo. Esto significa que si haces doble click en un archivo `.bib`, es probable que Zotero intente abrir el archivo mientras que nosotros queremos abrirlo con un editor de texto. Es posible que en el futuro quieras asociar la extensión `.bib` a tu editor de texto. diff --git a/es/lecciones/exhibicion-con-collection-builder.md b/es/lecciones/exhibicion-con-collection-builder.md index 8cb992a095..5fa42e92e9 100644 --- a/es/lecciones/exhibicion-con-collection-builder.md +++ b/es/lecciones/exhibicion-con-collection-builder.md @@ -64,7 +64,7 @@ El objetivo de la metodología Lib-STATIC es utilizar la tecnología de webs est CollectionBuilder-GH (CB-GH) es una de las alternativas de tipo computación mínima (minimal computing), que se refiere a "la actividad computacional que se realiza bajo restricciones significativas de hardware, software, educación, capacidad de red, energía u otros factores" ([_Minimal Computing: a working group of GO::DH_](https://go-dh.github.io/mincomp/about/)). Precisamente, por su formato, las exhibiciones digitales creadas con CollectionBuilder en GitHub necesitarán de menos hardware o tecnología y menos ancho de banda de Internet. Además, está totalmente adaptada a sistemas celulares. Este sistema es una buena alternativa a sistemas de exhibiciones digitales como [Omeka](https://es.wikipedia.org/wiki/Omeka) y al algo más complejo sistema [Wax](https://minicomp.github.io/wax/), para aquellos que no tengan recursos informáticos avanzados a su alcance, que no dispongan del tiempo para aprender a utilizar algo más complicado y, en resumidas cuentas, que quieran reutilizar de forma rápida las colecciones digitalizadas en sus archivos para dar acceso a su comunidad. -El trabajo para crear CB-GH está financiada por una beca National Leadership Grants for Libraries Planning Grant ofrecida por el Instituto de Servicios de Museos y Bibliotecas ([IMLS](https://www.imls.gov), por sus siglas en inglés). Varias bibliotecas y museos ya han utilizado esta herramienta para la diseminación de sus colecciones u otros propósitos, como son [Colors of Ozu](https://drodz11.github.io/colors-of-ozu/), de Dave Rodriguez o la [Namibia Heritage Week 2020](http://dna.nust.na/heritage_week/), de Namibia University of Science and Technology. +El trabajo para crear CB-GH está financiada por una beca National Leadership Grants for Libraries Planning Grant ofrecida por el Instituto de Servicios de Museos y Bibliotecas ([IMLS](https://www.imls.gov), por sus siglas en inglés). Varias bibliotecas y museos ya han utilizado esta herramienta para la diseminación de sus colecciones u otros propósitos, como son [Colors of Ozu](https://drodz11.github.io/colors-of-ozu/), de Dave Rodriguez o la [Namibia Heritage Week 2020](https://dna.nust.na/heritage_week/), de Namibia University of Science and Technology. ## 1. Preparar los archivos básicos para la colección @@ -72,7 +72,7 @@ CollectionBuilder-GH está basado en cuatro componentes básicos que generan la ### Colección de objetos -CollectionBuilder-GH está pensado para ser utilizado con colecciones pequeñas. Para poder utilizar la versión gratuita de GitHub, el total de los archivos de la exhibición no puede superar 1GB de peso. Por eso, se recomienda que la carpeta de imágenes no supere los 500MB. Puedes realizar la edición necesaria en imágenes de alta resolución que ya tengas con cualquier software de editado de imágenes, como [GIMP](http://www.gimp.org.es/descargar-gimp.html). Es importante tener en cuenta las siguientes consideraciones: +CollectionBuilder-GH está pensado para ser utilizado con colecciones pequeñas. Para poder utilizar la versión gratuita de GitHub, el total de los archivos de la exhibición no puede superar 1GB de peso. Por eso, se recomienda que la carpeta de imágenes no supere los 500MB. Puedes realizar la edición necesaria en imágenes de alta resolución que ya tengas con cualquier software de editado de imágenes, como [GIMP](https://www.gimp.org.es/descargar-gimp.html). Es importante tener en cuenta las siguientes consideraciones: - Formato de los objetos: GitHub y esta herramienta aceptan los formatos más comunes de imágenes y audio con los que ya estarás familiarizado: jpg, png y mp3. También puedes utilizar enlaces externos a objetos en YouTube o Vimeo, pero estos no aparecerán dentro de la exhibición diff --git a/es/lecciones/generadores-aventura.md b/es/lecciones/generadores-aventura.md index ffc53199f1..d1a8856cdd 100644 --- a/es/lecciones/generadores-aventura.md +++ b/es/lecciones/generadores-aventura.md @@ -63,7 +63,7 @@ En esta lección nos concentraremos en un tipo particular de literatura electró Existe una larga tradición del uso de procesos mecánicos combinados con elementos aleatorios para la creación en las artes y la literatura. Con mecánicos nos referimos a que están guiados por sistemas de reglas claramente definidos, es decir, si usamos términos computacionales, por algoritmos. Y por aleatorios queremos decir que alguna parte del proceso creativo está definido por una fuente de incertidumbre, como el lanzamiento de una moneda o la selección espontánea de elementos de una lista. Esta combinación creativa entre orden y azar permite un equilibrio entre el control sobre los resultados de una obra creativa y la sorpresa con respecto a la configuración final de la misma obra. -Pensemos, por ejemplo, en el ejercicio [S + 7](https://perma.cc/S6LR-U5AN) propuesto por el poeta surrealista Jean Lescure en los años sesenta: el ejercicio consiste en tomar un texto preexistente, por ejemplo un poema, y reemplazar cada sustantivo por la séptima palabra que se encuentre después de este en un diccionario de sustantivos. En [este enlace](http://www.spoonbill.org/n+7/) encuentras un programa en inglés que genera textos con el ejercicio S + 7. Aquí podemos ver que hay una parte mecánica, las reglas que definen cómo proceder con el ejercicio, y una parte aleatoria, el resultado impredecible de cómo resultará el nuevo texto causado por el orden fortuito del diccionario usado. +Pensemos, por ejemplo, en el ejercicio [S + 7](https://perma.cc/S6LR-U5AN) propuesto por el poeta surrealista Jean Lescure en los años sesenta: el ejercicio consiste en tomar un texto preexistente, por ejemplo un poema, y reemplazar cada sustantivo por la séptima palabra que se encuentre después de este en un diccionario de sustantivos. En [este enlace](https://www.spoonbill.org/n+7/) encuentras un programa en inglés que genera textos con el ejercicio S + 7. Aquí podemos ver que hay una parte mecánica, las reglas que definen cómo proceder con el ejercicio, y una parte aleatoria, el resultado impredecible de cómo resultará el nuevo texto causado por el orden fortuito del diccionario usado. Este tipo de estrategias creativas, que en principio no requieren de un computador, han sido posteriormente adaptadas por la literatura electrónica, pues sus autoras y autores comúnmente aprovechan los sistemas algorítmicos que permiten los lenguajes de programación y el azar que proveen los generadores de números aleatorios para dar lugar a la conjunción mecánica-aleatoria. Un ejemplo concreto de esta estrategia en el campo computacional es la producción de generadores de texto —y otros medios como imágenes o sonido— por medio de sistemas algorítmicos llamados gramáticas libres de contexto; este es justamente el sistema que usaremos en esta lección. Cabe anotar que existen otros métodos para la generación de textos, como las [cadenas de Márkov](https://perma.cc/Y7FK-FM3X) o los modelos de lenguaje basados en [aprendizaje automático](https://perma.cc/D73Q-MMXM), pero no nos ocuparemos de ellos aquí. @@ -177,7 +177,7 @@ let gramatica = { } ``` -Una vez te familiarices con el proceso, puedes crear nuevas reglas y opciones. Para crear la lista de opciones nos apoyaremos en los datos recogidos por el proyecto [Sobremesa Digital](https://perma.cc/2U6N-94GV) de Clementina Grillo. Este proyecto hizo un recuento de todos los objetos, por capítulos y tipos, que se mencionan en la novela *De sobremesa* escrita por José Asunción Silva, y está disponible en un archivo en formato [JSON](http://clementinagrillo.com/sobremesadigital/flare.json). Básicamente, un archivo en formato JSON es equivalente a un objeto de JavaScript, sus siglas quieren decir, justamente, "JavaScript Object Notation". Estos datos son muy adecuados para el proyecto de esta lección, así que los usaremos como insumo para la siguiente parte. Así, en vez de poner elementos fijos en la rama de objetos, pondremos elementos escogidos al azar tomados de la base de datos de "Sobremesa digital": +Una vez te familiarices con el proceso, puedes crear nuevas reglas y opciones. Para crear la lista de opciones nos apoyaremos en los datos recogidos por el proyecto [Sobremesa Digital](https://perma.cc/2U6N-94GV) de Clementina Grillo. Este proyecto hizo un recuento de todos los objetos, por capítulos y tipos, que se mencionan en la novela *De sobremesa* escrita por José Asunción Silva, y está disponible en un archivo en formato [JSON](https://clementinagrillo.com/sobremesadigital/flare.json). Básicamente, un archivo en formato JSON es equivalente a un objeto de JavaScript, sus siglas quieren decir, justamente, "JavaScript Object Notation". Estos datos son muy adecuados para el proyecto de esta lección, así que los usaremos como insumo para la siguiente parte. Así, en vez de poner elementos fijos en la rama de objetos, pondremos elementos escogidos al azar tomados de la base de datos de "Sobremesa digital": ```JavaScript let gramatica = { diff --git a/es/lecciones/georreferenciacion-visualizacion-con-recogito-y-visone.md b/es/lecciones/georreferenciacion-visualizacion-con-recogito-y-visone.md index 7a7685ce29..0eeafde48c 100644 --- a/es/lecciones/georreferenciacion-visualizacion-con-recogito-y-visone.md +++ b/es/lecciones/georreferenciacion-visualizacion-con-recogito-y-visone.md @@ -155,7 +155,7 @@ Visone permite generar y visualizar diferentes tipos de redes. Las redes son est ### Descarga e instala Visone -A diferencia de Recogito, necesitaremos instalar [Visone](http://visone.ethz.ch/html/download.html). La versión de descarga recomendada para todos los sistemas operativos es **visone-2.26.jar**. +A diferencia de Recogito, necesitaremos instalar [Visone](https://visone.ethz.ch/html/download.html). La versión de descarga recomendada para todos los sistemas operativos es **visone-2.26.jar**.
    Antes de inciar la instalación de Visone, debemos asegurarnos de tener instalado en nuestra computadora Java 8 o posterior. Si no tienes Java instalado en tu computadora puedes descargarlo aquí. @@ -255,7 +255,7 @@ Hay muchos tutoriales adicionales disponibles para Recogito y Visone. Te recomen - El sitio oficial de Visone tiene [varios tutoriales en inglés](https://visone.info/wiki/index.php/Tutorials#Basic_tutorials) sobre las diferentes aplicaciones de esta herramienta. -- El tutorial de Recogito de Gimena del Río y Valeria Vitale, [Recogito-in-a-Box: From Annotation to Digital Edition](http://dx.doi.org/10.3828/mlo.v0i0.299) (en inglés). +- El tutorial de Recogito de Gimena del Río y Valeria Vitale, [Recogito-in-a-Box: From Annotation to Digital Edition](https://dx.doi.org/10.3828/mlo.v0i0.299) (en inglés). ### Nota diff --git a/es/lecciones/georreferenciar-qgis.md b/es/lecciones/georreferenciar-qgis.md index a2877b0db9..a3535f84f2 100644 --- a/es/lecciones/georreferenciar-qgis.md +++ b/es/lecciones/georreferenciar-qgis.md @@ -72,7 +72,7 @@ Este es el shapefile (archivo de entidades vectoriales) que contiene la capa de - Navegua al siguiente enlace, acepta la licencia de uso y descargua 'lot\_township\_polygon' (En algunas ocasiones te preguntarán tu nombre y correo electrónico antes de poder descargar el archivo). - + - Después de descargar el archivo llamado 'lot \ _township \ _polygon', muévelo a una carpeta que puedas encontrar después y descomprime el archivo. (Recuerda mantener los archivos juntos ya que todos son necesarios para abrir esta capa en tu SIG) @@ -206,16 +206,16 @@ En procesos más avanzados, puedes incluso cubrir esta imagen georreferenciada c *Este tutorial es parte de [Geospatial Historian][].* [Introducción a Google Maps y Google Earth]: /es/lecciones/intro-a-google-maps-y-google-earth - [rubber-sheeting]: http://en.wikipedia.org/wiki/Rubbersheeting - [National Topographic System Maps]: http://maps.library.utoronto.ca/datapub/digital/3400s_63_1929/maptile/Halifax/googlemaps.html - [1]: http://maps.library.utoronto.ca/datapub/PEI/NTS/west/ - [2]: http://maps.library.utoronto.ca/datapub/PEI/NTS/east/ - [Coordinate Reference System]: http://en.wikipedia.org/wiki/Spatial_reference_system + [rubber-sheeting]: https://en.wikipedia.org/wiki/Rubbersheeting + [National Topographic System Maps]: https://maps.library.utoronto.ca/datapub/digital/3400s_63_1929/maptile/Halifax/googlemaps.html + [1]: https://maps.library.utoronto.ca/datapub/PEI/NTS/west/ + [2]: https://maps.library.utoronto.ca/datapub/PEI/NTS/east/ + [Coordinate Reference System]: https://en.wikipedia.org/wiki/Spatial_reference_system [Installing QGIS 2.0 and adding Layers]: /lessons/qgis-layers - [can be downloaded here]: http://geospatialhistorian.files.wordpress.com/2013/02/pei_lakemap1863.jpg - [Island Imagined]: https://web.archive.org/web/20180922004858/http://www.islandimagined.ca:80/fedora/repository/imagined:208687 - [in Atlantic Canada]: http://books.google.ca/books?id=TqCNZYXWXAUC&dq=tilting&source=gbs_navlinks_s - [world file]: http://en.wikipedia.org/wiki/World_file - [Tif]: http://en.wikipedia.org/wiki/Tagged_Image_File_Format + [can be downloaded here]: https://geospatialhistorian.files.wordpress.com/2013/02/pei_lakemap1863.jpg + [Island Imagined]: https://web.archive.org/web/20180922004858/https://www.islandimagined.ca:80/fedora/repository/imagined:208687 + [in Atlantic Canada]: https://books.google.ca/books?id=TqCNZYXWXAUC&dq=tilting&source=gbs_navlinks_s + [world file]: https://en.wikipedia.org/wiki/World_file + [Tif]: https://en.wikipedia.org/wiki/Tagged_Image_File_Format [Creating New Vector Layers in QGIS]: /lessons/vector-layers-qgis - [Geospatial Historian]: http://geospatialhistorian.wordpress.com/ + [Geospatial Historian]: https://geospatialhistorian.wordpress.com/ diff --git a/es/lecciones/instalacion-linux.md b/es/lecciones/instalacion-linux.md index d70525d793..63f22bd0f3 100644 --- a/es/lecciones/instalacion-linux.md +++ b/es/lecciones/instalacion-linux.md @@ -122,5 +122,5 @@ Ahora que tú y tu computadora están en marcha y funcionando, podemos movernos [opciones de editores de texto]: https://wiki.python.org/moin/PythonEditors/ -[sitio web de Komodo Edit]: http://komodoide.com/komodo-edit/ +[sitio web de Komodo Edit]: https://komodoide.com/komodo-edit/ [Para entender páginas web y HTML]: /es/lecciones/ver-archivos-html diff --git a/es/lecciones/instalar-modulos-python-pip.md b/es/lecciones/instalar-modulos-python-pip.md index a9bbb023e9..017b0d49ae 100644 --- a/es/lecciones/instalar-modulos-python-pip.md +++ b/es/lecciones/instalar-modulos-python-pip.md @@ -110,6 +110,6 @@ sudo pip install requests ¡Listo para trabajar! [pip]: https://pip.pypa.io/en/stable/ -[curl]: http://www.thegeekstuff.com/2012/04/curl-examples/ +[curl]: https://www.thegeekstuff.com/2012/04/curl-examples/ [aquí]: https://bootstrap.pypa.io/get-pip.py -[StackOverflow]: http://stackoverflow.com/questions/4750806/how-to-install-pip-on-windows +[StackOverflow]: https://stackoverflow.com/questions/4750806/how-to-install-pip-on-windows diff --git a/es/lecciones/intro-a-google-maps-y-google-earth.md b/es/lecciones/intro-a-google-maps-y-google-earth.md index 55a50ae3d0..b6fcbfccc3 100644 --- a/es/lecciones/intro-a-google-maps-y-google-earth.md +++ b/es/lecciones/intro-a-google-maps-y-google-earth.md @@ -546,7 +546,7 @@ trabajo!** [geo-es22]: /images/intro-a-google-maps-y-google-earth/geo-es22.png [geo-es23]: /images/intro-a-google-maps-y-google-earth/geo-es23.png [geo-es24]: /images/intro-a-google-maps-y-google-earth/geo-es24.png - [www.davidrumsey.com]: http://www.davidrumsey.com/ + [www.davidrumsey.com]: https://www.davidrumsey.com/ [geo-es25]: /images/intro-a-google-maps-y-google-earth/geo-es25.png [geo-es26]: /images/intro-a-google-maps-y-google-earth/geo-es26.png [Georeferencing in QGIS 2.0]: /lessons/georeferencing-qgis @@ -561,7 +561,7 @@ trabajo!** [geo-es33]: /images/intro-a-google-maps-y-google-earth/geo-es33.png [geo-es34]: /images/intro-a-google-maps-y-google-earth/geo-es34.png [geo-es35]: /images/intro-a-google-maps-y-google-earth/geo-es35.png - [Mobile Mapping and Historical GIS in the Field]: http://niche-canada.org/2011/12/14/mobile-mapping-and-historical-gis-in-the-field/ + [Mobile Mapping and Historical GIS in the Field]: https://niche-canada.org/2011/12/14/mobile-mapping-and-historical-gis-in-the-field/ [geo-es36]: /images/intro-a-google-maps-y-google-earth/geo-es36.png [geo-es37]: /images/intro-a-google-maps-y-google-earth/geo-es37.png [geo-es38]: /images/intro-a-google-maps-y-google-earth/geo-es38.png diff --git a/es/lecciones/introduccion-a-bash.md b/es/lecciones/introduccion-a-bash.md index b28fb882fc..88fe16b081 100644 --- a/es/lecciones/introduccion-a-bash.md +++ b/es/lecciones/introduccion-a-bash.md @@ -42,7 +42,7 @@ Muchas de las lecciones en *The Programming Historian en español* requieren que {% include figure.html filename="en-or-intro-to-bash-01.png" caption="Figura 1. GUI de la computadora de Ian Milligan" %} -Las interfaces de línea de comandos ofrecen ventajas para los usuarios de computadoras que necesitan mayor precisión en su trabajo -como los historiadores digitales. Permiten un uso más detallado a la hora de ejecutar algunos programas, ya que puedes agregar parámetros para especificar *exactamente* cómo deseas ejecutar tu programa. Además, se pueden automatizar procesos fácilmente mediante [scripts](http://www.tldp.org/LDP/Bash-Beginners-Guide/html/chap_01.html), que son esencialmente recetas de órdenes escritas en un archivo de texto. +Las interfaces de línea de comandos ofrecen ventajas para los usuarios de computadoras que necesitan mayor precisión en su trabajo -como los historiadores digitales. Permiten un uso más detallado a la hora de ejecutar algunos programas, ya que puedes agregar parámetros para especificar *exactamente* cómo deseas ejecutar tu programa. Además, se pueden automatizar procesos fácilmente mediante [scripts](https://www.tldp.org/LDP/Bash-Beginners-Guide/html/chap_01.html), que son esencialmente recetas de órdenes escritas en un archivo de texto. Hay dos interfaces de línea de comandos principales, o *shells*, que utilizan muchos historiadores digitales. En OS X, así como en muchas de las distribuciones de Linux, el *shell* se conoce como `bash` (*Bourne-again shell*). Para los usuarios de sistemas Windows, la interfaz de línea de comandos está basada en MS-DOS por defecto, y aunque utiliza diferentes comandos y [sintaxis](https://es.wikipedia.org/wiki/Sintaxis), puede realizar tareas similares. Este tutorial proporciona una introducción básica a la terminal `bash`. Los usuarios de Windows pueden seguir instalando algún *shell* popular como [Cygwin](https://www.cygwin.com/) o Git Bash (ver más adelante). @@ -64,7 +64,7 @@ Cuando lo ejecutes verás esto en la ventana: {% include figure.html filename="en-or-intro-to-bash-03.png" caption="Figura 3. Pantalla de Terminal en blanco en nuestra estación de trabajo de OS X" %} -Quizá quieras cambiar la apariencia que por defecto tiene la terminal para no esforzarte de más al mirar continuamente texto negro sobre fondo blanco. En la aplicación por defecto de OS X puedes abrir el menú 'Perfiles' en 'Preferencias', bajo 'Terminal'. Haz clic en la pestaña 'Perfiles' y cámbialo por un nuevo esquema de color. Personalmente preferimos algo con menor contraste entre el fondo y el primer plano, pues lo estarás viendo durante mucho tiempo. 'Novel' es uno muy relajante ya que es la paleta de colores de la popular *suite* [Solarized](http://ethanschoonover.com/solarized). Los usuarios de Windows pueden obtener un efecto similar utilizando la pestaña 'Properties' de Git bash. Para llegar a ella, haz click con el botón derecho en cualquier lugar de la barra superior y seleciona 'Properties'. +Quizá quieras cambiar la apariencia que por defecto tiene la terminal para no esforzarte de más al mirar continuamente texto negro sobre fondo blanco. En la aplicación por defecto de OS X puedes abrir el menú 'Perfiles' en 'Preferencias', bajo 'Terminal'. Haz clic en la pestaña 'Perfiles' y cámbialo por un nuevo esquema de color. Personalmente preferimos algo con menor contraste entre el fondo y el primer plano, pues lo estarás viendo durante mucho tiempo. 'Novel' es uno muy relajante ya que es la paleta de colores de la popular *suite* [Solarized](https://ethanschoonover.com/solarized). Los usuarios de Windows pueden obtener un efecto similar utilizando la pestaña 'Properties' de Git bash. Para llegar a ella, haz click con el botón derecho en cualquier lugar de la barra superior y seleciona 'Properties'. {% include figure.html filename="en-or-intro-to-bash-04.png" caption="Figura 4. Pantalla de configutación en Terminal de OS X" %} @@ -182,7 +182,7 @@ en Windows. Este comando abrirá tu GUI en el directorio actual. Asegúrate de d ## Interactuar con archivos -Además de navegar por directorios, puedes interactuar con archivos a través de la línea de comandos: puedes leerlos, abrirlos, ejecutarlos e incluso editarlos sin tener que salir de la interfaz. Hay cierto debate sobre por qué alguien querría hacer todo esto; la razón principal es la extrema comodidad de trabajar con la línea de comandos: nunca tienes que tocar el ratón o el *track pad* de la computadora y, aunque tiene una curva de aprendizaje pronunciada, eventualmente puede convertirse en el único entorno de escritura. Además, muchos programas requieren la utilización de la línea de comandos para operar con ellos. Puesto que vas a utilizar programas a través de la línea de comandos, a menudo puede ser más rápido crear pequeñas ediciones sin necesidad de cambiar a un programa separado. Para algunos de estos argumentos véase el texto de Jon Beltran de Heredia, ["Why, oh WHY, do those #?@! nutheads use vi?"](http://www.viemu.com/a-why-vi-vim.html). +Además de navegar por directorios, puedes interactuar con archivos a través de la línea de comandos: puedes leerlos, abrirlos, ejecutarlos e incluso editarlos sin tener que salir de la interfaz. Hay cierto debate sobre por qué alguien querría hacer todo esto; la razón principal es la extrema comodidad de trabajar con la línea de comandos: nunca tienes que tocar el ratón o el *track pad* de la computadora y, aunque tiene una curva de aprendizaje pronunciada, eventualmente puede convertirse en el único entorno de escritura. Además, muchos programas requieren la utilización de la línea de comandos para operar con ellos. Puesto que vas a utilizar programas a través de la línea de comandos, a menudo puede ser más rápido crear pequeñas ediciones sin necesidad de cambiar a un programa separado. Para algunos de estos argumentos véase el texto de Jon Beltran de Heredia, ["Why, oh WHY, do those #?@! nutheads use vi?"](https://www.viemu.com/a-why-vi-vim.html). A continuación, presentaremos unas formas básicas de interactuar con archivos. @@ -194,7 +194,7 @@ Esto crea un directorio llamado (¡adivinaste!) `ProgHist-Textos`. En general, e Pero ¡espera! Hay un truco para hacer las cosas un poco más rápido. Ve arriba un directorio (`cd ..`, lo cual te llevará de regreso al escritorio). Para navegar al directorio `ProgHist-Textos` puedes escribir `cd ProgHist-Textos`. Alternativamente puedes escribir `cd Prog` y luego pulsar la tecla de tabulador. Te darás cuenta de que la interfaz completa la línea como `cd ProgHist-Textos`. **Si pulsas el tabulador en cualquier momento dentro del *shell* le pedirás que intente completar automáticamente la línea en función de los archivos o subdirectorios que estén en el directorio actual. Sin embargo, la función es sensible a mayúsculas (así, en el ejemplo anterior, `cd prog` no podrá autocompletarse como `cd ProgHist-Textos`). En donde haya dos archivos con los mismos caracteres, autocompletar solamente llenará la línea hasta el primer punto de diferencia. Sugerimos utilizar este método a lo largo de la lección para ver cómo se comporta.** -Ahora necesitas encontrar un archivo de texto básico para que nos ayude con el ejemplo. ¿Por qué no utilizar un libro que sabes que es largo, como la épica "Guerra y Paz" de Leon Tolstói? El archivo de texto está disponible en [Project Gutenberg](http://www.gutenberg.org/ebooks/2600). Si ya instalaste [wget](/lessons/applied-archival-downloading-with-wget), puedes escribir: +Ahora necesitas encontrar un archivo de texto básico para que nos ayude con el ejemplo. ¿Por qué no utilizar un libro que sabes que es largo, como la épica "Guerra y Paz" de Leon Tolstói? El archivo de texto está disponible en [Project Gutenberg](https://www.gutenberg.org/ebooks/2600). Si ya instalaste [wget](/lessons/applied-archival-downloading-with-wget), puedes escribir: `wget http://www.gutenberg.org/files/2600/2600-0.txt` @@ -272,7 +272,7 @@ Verás aparecer Vim frente a ti, un editor de texto en línea de comandos. {% include figure.html filename="en-or-intro-to-bash-06.png" caption="Figura 6. Vim" %} -Si quieres aprender más de Vim, aquí tienes una [buena guía](http://vimdoc.sourceforge.net/htmldoc/quickref.html) disponible. +Si quieres aprender más de Vim, aquí tienes una [buena guía](https://vimdoc.sourceforge.net/htmldoc/quickref.html) disponible. El uso de Vim para leer archivos es relativamente simple. Puedes usar las teclas de flechas para navegar alrededor y teóricamente leer *Guerra y Paz* a través de línea de comandos (lo cual sería todo un logro, por cierto). A continuación hay algunos comandos de navegación básica: diff --git a/es/lecciones/introduccion-a-ffmpeg.md b/es/lecciones/introduccion-a-ffmpeg.md index 9d4650c9c0..6d0959856e 100644 --- a/es/lecciones/introduccion-a-ffmpeg.md +++ b/es/lecciones/introduccion-a-ffmpeg.md @@ -1,445 +1,445 @@ ---- -title: Introducción a la transcodificación, edición y visualización de datos audiovisuales con FFmpeg -authors: -- Dave Rodriguez -editors: -- Brandon Walsh -reviewers: -- Tesla Cariani -- Josh Romphf -original: introduction-to-ffmpeg -date: 2018-12-20 -translator: -- Dave Rodriguez -- Sebastian Fiori -translation_date: 2020-12-11 -translation-editor: -- Antonio Rojas Castro -translation-reviewer: -- Jennifer Isasi -- José Antonio Motilla -original: introduction-to-ffmpeg -review-ticket: https://github.com/programminghistorian/ph-submissions/issues/302 -difficulty: 2 -activity: analyzing -topics: [data-manipulation, data-visualization] -abstract: Esta lección introduce las funciones básicas de FFmpeg, una herramienta libre de línea de comandos utilizada para manipular y analizar materiales audiovisuales. -avatar_alt: Una cámara antigua -doi: 10.46430/phes0049 -layout: lesson ---- - -{% include toc.html %} - -# Introducción -Historicamente, las Humanidades Digitales se han enfocado casi exclusivamente en el analisis de fuentes textuales a través de métodos computacionales (Hockey, 2004). Sin embargo, hay un interés creciente en el campo de la utilización de métodos computacionales para el análisis de materiales audiovisuales de patrimonio cultural, como refleja la creación de la [Alianza de Organizaciones de Humanidades Digitales Grupo de Interés Especial: Materiales audiovisuales en Humanidades Digitales](https://avindhsig.wordpress.com/) y [el aumento de las presentaciones relacionadas con temas audiovisuales en la conferencia global de AOHD](https://figshare.com/articles/AV_in_DH_State_of_the_Field/5680114) en los años anteriores. Investigaciones recientes, tal como [Distant Viewing TV](https://distantviewing.org), indican un cambio en el campo hacia proyectos relacionados con el uso de técnicas computacionales para ampliar el alcance de los materiales que los y las humanistas digitales pueden explorar. Como afirma Erik Champion, "la audiencia de Humanidades Digitales no siempre está enfocada en la literatura o está interesada en las formas tradicionales de alfabetización" y la aplicación de metodologías digitales para estudiar cultura audiovisual es una faceta emergente y emocionante de las humanidades digitales (Champion, 2017, traducido por el autor). Hay muchas herramientas valiosas, gratuitas y de código abierto disponibles para aquellos interesados en trabajar con materiales audiovisuales (por ejemplo, el tutorial de _Programming Historian_ [Editar Audio con Audacity](/es/lecciones/editar-audio-con-audacity)). Este tutorial presentará otra: FFmpeg. - -[FFmpeg](https://www.ffmpeg.org/about.html) es el _framework_ multimedia de código abierto líder para transcodificar, editar, filtrar y reproducir casi cualquier tipo de formato audiovisual digital (sitio web de FFmpeg - "About"). Muchos programas comunes y sitios web usan FFmpeg para leer y escribir archivos audiovisuales, por ejemplo, VLC, Google Chrome, YouTube y [muchos más](https://trac.ffmpeg.org/wiki/Projects). Además de ser una herramienta de programa y de desarrollo web, FFmpeg se puede usar en la interfaz de la línea de comandos para realizar muchas tareas comunes, complejas e importantes, relacionadas con la gestión, modificación y análisis de archivos audiovisuales. Estos tipos de procesos, tales como editar, transcodificar o extraer los metadatos de archivos, generalmente requieren acceso a otro programa (tal como editores de vídeo no lineal, como Adobe Premiere o Final Cut Pro); sin embargo, FFmpeg permite a un usuario operar directamente en archivos audiovisuales sin el uso de interfaces o programa de terceros. Como tal, el conocimiento del _framework_ permite a los usuarios manipular materiales audiovisuales para satisfacer sus necesidades con una solución de código abierto y gratuita, que ofrece gran parte de la funcionalidad de un costoso programa de audio y vídeo. Este tutorial ofrece una introducción a la lectura y escritura de comandos de FFmpeg y una guía paso a paso a partir de un caso práctico para aprender a utilizar el _framework_ en un trabajo específico para los humanistas digitales. Específicamente, se mostrará cómo FFmpeg puede ser utilizado para extraer y analizar datos de color en un video archivístico. - -## Objetivos de aprendizaje -* Instalar FFmpeg en tu computadora o usar una versión "demo" en el navegador web -* Comprender la estructura básica y la sintaxis de los comandos de FFmpeg -* Aprender varios comandos útiles, tales como: - * "Re-wrap" (cambiar el contenedor) y transcodificar (recodificar archivos) - * "Demux" de archivos (separar audio y vídeo) - * Recortar/Editar archivos - * Usar FFplay para reproducir archivos - * Crear vectorscopios para visualizar los datos de color - * Usar FFprobe para generar informes de los datos de color -* Introducir recursos para mayor exploración y experimentación - -## Requisitos previos -Antes de comenzar con este tutorial, es necesario que localices la [Terminal](https://es.wikipedia.org/wiki/Terminal_(macOS)) de tu computadora u otra interfaz de línea de comandos, ya que ahí es donde ingresarás y ejecutarás los comandos de FFmpeg. Si necesitas instrucción para acceder y usar la interfaz de línea de comandos, te recomendamos la lección de _Programming Historian_ [Introducción a la línea de comandos en Bash](/es/lecciones/introduccion-a-bash) para usarios de Mac y Linux o, para usarios de Windows, [Introducción a la línea de comandos de Windows con PowerShell](/es/lecciones/introduccion-a-powershell). Adicionalmente, será de utilidad tener conocimientos básicos de [códecs](https://es.wikipedia.org/wiki/C%C3%B3dec) y [contenedores](https://es.wikipedia.org/wiki/Formato_contenedor) audiovisuales para entender con mayor detalle el funcionamiento de FFmpeg. Proporcionaremos información adicional y revisaremos con mayor detalle sobre códecs y contenedores en la sección sobre ejemplos de comandos preliminares de este tutorial. - -# Cómo instalar FFmpeg -La instalación de FFmpeg es posiblemente la parte más difícil de usar esta herramienta. Afortunadamente, existen algunas guías y recursos disponibles para instalar el _framework_ para cada sistema operativo. - -
    -Nuevas versiones de FFmpeg son lanzadas aproximadamente cada seis meses. Para mantenerse al tanto de ellas, es recomendable seguir a FFmpeg en Twitter o en su sitio web. Las nuevas versiones de FFmpeg generalmente contienen características tales como filtros nuevos y actualizados, compatibilidades de códecs y corrección de errores. La sintaxis de FFmpeg no cambia con estas actualizaciones y las capacidades antiguas rara vez se eliminan. Puedes aprender más sobre estas actualizaciones consultando los anuncios de actualizaciones anteriores en la sección de News en el sitio web de FFmpeg. -
    - -## Para usuarios de Mac OS -La opción más simple es usar un administrador de paquetes como [Homebrew](https://brew.sh/) para instalar FFmpeg y asegurar que permanezca en la versión más reciente. Para completar este tipo de instalación, sigue estos pasos: -* Instala Homebrew de acuerdo a las instrucctiones en el enlace de arriba -* Para comenzar con una instalación básica, ejecuta `brew install ffmpeg` en tu Terminal para comenzar una instalación básica - **Nota**: generalmente se recomienda instalar FFmpeg con opciones adicionales a la incluidas en la instalación básica; esto proporcionará acceso a más herramientas y funciones. [La Guía de Instalación de Apple de Reto Kromer](https://avpres.net/FFmpeg/install_Apple.html) proporciona un buen conjunto de opciones adicionales: - - ```bash - brew install ffmpeg --with-freetype --with-openjpeg --with-x265 --with-rubberband --with-tesseract - ``` - - * Para una explicación de estas opciones adicionales, revisa [La Guía FFmpeg de Ashley Blewer](https://training.ashleyblewer.com/presentations/ffmpeg.html#10). - * Además, puedes ejecutar `brew options ffmpeg` para ver qué características están o han estado disponibles en la versión actual de FFmpeg - * Para actualizar tu instalación a la versión más reciente, ejecuta: - - ```bash - brew update && brew upgrade ffmpeg - ``` - -* Para más opciones de instalación para Mac OS, revisa [La Guía de Compilación de FFmpeg para Mac OS](https://trac.ffmpeg.org/wiki/CompilationGuide/macOS) (la guía solo está disponible en inglés). - -## Para usuarios de Windows -Los usarios de Windows pueden usar el adminstratdor de paquetes [Chocolately](https://chocolatey.org/) para instalar y mantener FFmpeg. [La Guía de Instalación de Windows de Reto Kromer](https://avpres.net/FFmpeg/install_Windows.html) proporciona toda la información necesaria para usar Chocolately o construir el _framework_ a partir del código fuente (la guía solo está disponible en inglés). - -## Para usuarios de Linux -[Linuxbrew](ttp://linuxbrew.sh/) es un programa similar a Homebrew que se puede utilizar para instalar y mantener FFmepg en Linux. Reto Kromer también proporciona una guía útil, [la Guía de Instalación de Linux](https://avpres.net/FFmpeg/install_Linux.html), que es similar a la instalación en Mac OS. Tu distribución de Linux puede tener su [propio administrador de paquetes](https://www.linode.com/docs/tools-reference/linux-package-management/) que incluye paquetes FFmpeg (la guía solo está disponible en inglés). Dependiendo de tu distribución de Linux (Ubuntu, Fedora, Arch Linux, etc.) estas versiones pueden variar, así que usar Linuxbrew podría ser útil para asegurar que la versión es la misma independientemente del tipo de Linux que utilices. - -## Otros recursos de instalación - -* [Descarga de paquetes](https://www.ffmpeg.org/download.html) - * FFmpeg permite el accesso a archivos binarios, código fuente y versiones estáticas para Mac, Windows y Linux directamente en su sitio web. Los usuarios pueden construir el _framework_ sin un administrador de paquetes con estos recursos. Es probable que solo los usuarios avanzados quieran usar esta opción. -* [La Guía de Compilación de FFmpeg](https://trac.ffmpeg.org/wiki/CompilationGuide) - * La página Wiki de FFmpeg también proporciona un compendio de guías y estrategias para instalar FFmpeg en tu computadora (la guía solo está disponible en inglés). - -## Probando la instalación -* Para asegurarte de que FFmpeg se haya instalado correctamente, ejecuta: - - ```bash - ffmpeg -version - ``` - -* Si ves una lista larga con información, ¡la instalación fue exitosa! Debe ser similar a lo siguiente: - -```bash -ffmpeg version 4.0.1 Copyright (c) 2000-2018 the FFmpeg developers -built with Apple LLVM version 9.1.0 (clang-902.0.39.1) -configuration: --prefix=/usr/local/Cellar/ffmpeg/4.0.1 --enable-shared --enable-pthreads --enable-version3 --enable-hardcoded-tables --enable-avresample --cc=clang --host-cflags= --host-ldflags= --enable-gpl --enable-ffplay --enable-libfreetype --enable-libmp3lame --enable-librubberband --enable-libtesseract --enable-libx264 --enable-libx265 --enable-libxvid --enable-opencl --enable-videotoolbox --disable-lzma --enable-libopenjpeg --disable-decoder=jpeg2000 --extra-cflags=-I/usr/local/Cellar/openjpeg/2.3.0/include/openjpeg-2.3 -libavcodec 58. 18.100 / 58. 18.100 -libavformat 58. 12.100 / 58. 12.100 -libavdevice 58. 3.100 / 58. 3.100 -libavfilter 7. 16.100 / 7. 16.100 -libavresample 4. 0. 0 / 4. 0. 0 -libswscale 5. 1.100 / 5. 1.100 -libswresample 3. 1.100 / 3. 1.100 -libpostproc 55. 1.100 / 55. 1.100 -``` - -* Si el sistema arroja `-bash: ffmpeg: command not found`, algo ha ido mal. - * Nota: Si estás usando un administrador de paquetes, es improbable que encuentres este mensaje de error. Sin embargo, si hay un problema después de instalar con un administrador de paquetes, es probable que haya un problema con el administrador de paquetes y no con FFmpeg. Consulta la solución de problemas en [Homebrew](https://docs.brew.sh/Troubleshooting), [Chocolatey](https://chocolatey.org/docs/troubleshooting), o [Linuxbrew](http://linuxbrew.sh/) para asegurar que el administrador de paquetes está funcionando correctamente en tu computadora (las guías solo está disponible en inglés). Si estás intentando instalar sin un administrador de paquetes y ves este mensaje de error, haz una referencia cruzada de tu método con la La Guía de Compilación de FFmpeg anterior. - -## Usando FFmpeg en el navegador -Si no quieres instalar FFmepg en tu computadora pero te gustaría familiarizarte con el _framework_ y usarlo en la interfaz de línea de comandos, [videoconverter.js](https://bgrins.github.io/videoconverter.js/demo/) de Brian Grinstead proporciona un método para ejecutar los comandos FFmpeg en tu navegador (la interfaz está en inglés). -
    - Esta interfaz del navegador no tiene las funcionalidades como para completar todo este tutorial, pero es útil para aprender los comandos esenciales de FFmpeg. Adicionalmente, este recurso opera en una versión anterior de FFmpeg y posiblemente no tenga todas las características de la versión más reciente. -
    - -## Estructura básica y sintaxis de los comandos FFmpeg -El comando básico tiene cuatro partes: - -```bash -[Símbolo del Sistema] [Archivo de Entrada] [Banderas/Acciones] [Archivo de Salida] -``` - -* Cada comando comenzará con un símbolo del sistema. Dependiendo del uso, este será `ffmpeg` (cambiar archivos), `ffprobe` (generar metadatos de archivos) o `ffplay` (reproducir archivos). -* Los archivos de entradas son los archivos que están siendo leídos, editados o examinados. -* Las banderas y acciones son las cosas que le estás diciendo a FFmpeg que haga con los archivos de entrada. La mayoría de los comandos contendrán múltiples banderas y acciones de complejidad variable. -* Los archivos de salida son los archivos creados por el comando o los informes creados por los commandos de `ffprobe`. - -Escrito genéricamente, el comando básico es parecido a lo siguiente: - -```bash - ffmpeg -i /ruta_de_archivo/archivo_de_entrada.ext -bandera alguna_acción /ruta_de_archivo/archivo_de_salida.ext - ``` -
    -Como con cualquier interfaz de línea de comandos, tendrás que escribir las rutas de los archivos de entrada y de salida dependiendo de las ubicaciones de tus directorios de trabajo. En los ejemplos proporcionados en este tutorial, las rutas de archivos no estarán escritas completamente y se supone que el usuario ha navegado al directorio de trabajo para ejecutar los comandos.
    - - -A continuación, examinaremos algunos ejemplos de varios comandos diferentes que usan esta estructura y sintaxis. Adicionalmente, estos comandos demostrarán algunas de las características más útiles de FFmpeg y nos permitirán familiarizarnos con la forma en que se construyen los archivos audiovisuales digitales. - -# Para empezar -Para este tutorial, utilizaremos una película archivística que se llama [*Destination Earth*](https://archive.org/details/4050_Destination_Earth_01_47_33_28) como nuestro objeto de estudio. Esta película está publicada por los [Archivos Prelinger](https://es.wikipedia.org/wiki/Archivos_Prelinger) y en el [Internet Archive](https://archive.org/). Esta película, estrenada en 1956 y producida por [El American Petroleum Institute](https://es.wikipedia.org/wiki/American_Petroleum_Institute) y [John Sutherland Productions](https://en.wikipedia.org/wiki/John_Sutherland_(producer)), es un excelente ejemplo de la propaganda de la época de la Guerra Fría que exalta las virtudes del capitalismo y el estilo de vida estadounidense. Utilizando el proceso de [Technicolor](https://es.wikipedia.org/wiki/Technicolor), este corto animado de ciencia ficción cuenta la historia de una sociedad marciana que vive bajo un gobierno opresivo y sus esfuerzos para mejorar sus métodos industriales. Envían un emisario a la Tierra que descubre que la clave para esto es la refinación de petróleo y la libre empresa. Utilizaremos el vídeo para introducir algunas de las funcionalidades básicas de FFmpeg y analizar sus propiedades de color con relación a su retórica propagandística. - -{% include figure.html filename="destEarth_titlecard.png" caption="Destination Earth (1956)" %} - -En este tutorial se llevarán a cabo los siguientes pasos: -* Navegar a la página de [*Destination Earth*](https://archive.org/details/4050_Destination_Earth_01_47_33_28) en el Internet Archive -* Descargar dos archivos vídeos: las versiones "MPEG4" (extensión de archivo `.m4v`) y "OGG" (extensión de archivo `.ogv`) de la película -* Guardar estos archivos en la misma carpeta en algún lugar de tu computadora. Guárdalos con los nombres de archivos `destEarth`, seguido por su extensión. - -Tómate unos minutos para ver el vídeo y tener una idea de su estructura, mensaje y motivos visuales antes de continuar con los siguientes comandos. - -# Ejemplos de comandos preliminares - -## Ver metadatos básicos con FFprobe -Antes de comenzar a manipular nuestros archivos `destEarth`, usemos FFmpeg para examinar información básica sobre el archivo utilizando un simple comando de `ffprobe`. Esto ayudará a comprender cómo se construyen los archivos audiovisuales digitales y proporcionará una base para el resto del tutorial. Navega hasta el directorio del archivo y ejecuta: - -```bash -ffprobe destEarth.ogv -``` - -Verás los metadatos técnicos básicos del archivo impresos en `stdout`: - -{% include figure.html filename="ffprobe_ogg_es.png" caption="El output de un comando básico `ffprobe` con destEarth.ogv" %} - -La línea `Input # 0` del informe identifica el **contenedor** como [ogg](https://es.wikipedia.org/wiki/Ogg). Los contenedores (también llamados "envoltorios" o "wrappers", en inglés) proporcionan al archivo la estructura de sus diversas pistas. Los diferentes contenedores (otros más comunes incluyen `.mkv`, `.avi` y `.flv`) tienen diferentes características y compatibilidad con diversos programas. Examinaremos cómo y por qué es posible que desees cambiar el contenedor de un archivo en el siguiente comando. - -Las líneas `Stream #0:0` y `Stream #0:1` proporcionan información sobre las pistas del archivo (es decir, el contenido que ves en la pantalla y escuchas a través de sus altavoces) y también identifican el **códec** de cada pista. Los códecs especifican cómo se codifica/comprime (se escribe y almacena) y se decodifica (se reproduce) la información. La pista vídeo (`Stream #0:0`) de nuestro archivo `.ogv` usa el códec [theora](https://es.wikipedia.org/wiki/Theora) y la pista audio (`Stream #0:1`) usa el códec [vorbis](https://es.wikipedia.org/wiki/Vorbis). Estas líneas también proporcionan información importante relacionada con el espacio de color de la pista de vídeo (`yuv420p`), resolución (`400x300`) y marcos por segundo (`29.97 fps`). Adicionalmente, proporcionan información de audio como la tasa de muestreo (`44100 Hz`) y la tasa de bits (`128 kb/s`). - -Los códecs, en mayor medida que los contenedores, determinan la calidad y la compatibilidad de un archivo audiovisual con diferentes programas y plataformas (otros códecs comunes incluyen `DNxHD` y` ProRes` para vídeo y `mp3` y` FLAC` para audio). Examinaremos cómo y por qué es posible que también desees cambiar el códec de un archivo en el siguiente comando. - -Ejecuta otro comando de `ffprobe`, esta vez con el archivo `.m4v`: - -```bash -ffprobe destEarth.m4v -``` - -Una vez más, verás los metadatos técnicos básicos impresos en el `stdout`: - -{% include figure.html filename="ffprobe_mp4_es.png" caption="El output de un comando básico `ffprobe` con destEarth.m4v" %} - -También notarás que el informe para el archivo `.m4v` contiene múltiples contenedores en la línea `Input # 0` como `mov` y `m4a`. No es necesario profundizar en los detalles para los fines de este tutorial, pero ten en cuenta que los contenedores `mp4` y` mov` se presentan en múltiples "sabores" y diferentes extensiones de archivo. Sin embargo, todos son muy similares en su construcción técnica y, como tal, pueden verse agrupados en metadatos técnicos. De manera similar, el archivo `ogg` tiene la extensión` .ogv`, un "sabor" o variante del formato `ogg`. - -Al igual que en nuestro comando anterior, las líneas `Stream # 0: 0` y` Stream # 0: 1` identifican el códec de cada pista. Podemos ver que nuestro archivo `.m4v` usa el códec vídeo [H.264](https://es.wikipedia.org/wiki/H.264/MPEG-4_AVC) y el códec audio [aac](https://es.wikipedia.org/wiki/Advanced_Audio_Coding). Ten en cuenta que se nos proporcionan metadatos similares a nuestro archivo `.ogv`, pero algunas características importantes relacionadas con el análisis visual (como la resolución) son significativamente diferentes. Nuestro `.m4v` tiene una resolución más alta (`640x480`) y, por lo tanto, utilizaremos esta versión de *Destination Earth* como nuestro vídeo de origen. - -Ahora que sabemos más sobre la composición técnica de nuestro archivo, podemos comenzar a explorar las características y funcionalidades transformadoras de FFmpeg (volveremos a utilizar `ffprobe` más adelante en el tutorial para realizar una extracción de metadatos de color más avanzada). - -## Cambiar el contenedor (volver a envolver, "re-wrap") -Dependiendo de tu sistema operativo, puedes tener uno o más reproductores de medios instalados. Para efectos de demostración veamos qué sucede si intentas abrir `destEarth.ogv` usando el reproductor de medios QuickTime que viene con Mac OSX: - -{% include figure.html filename="QT_fail.png" caption="Los reproductores multimedia patentados como Quicktime a menudo están limitados en los tipos de archivos con los que pueden trabajar" %} - -Una opción cuando te enfrentas a un mensaje de este tipo es simplemente usar otro reproductor de medios. [VLC](https://www.videolan.org/vlc/index.es.html), que está construido con FFmpeg, es una excelente alternativa de código abierto, pero simplemente "usar otro programa" puede no ser siempre una solución viable (y es posible que no siempre tengas otra versión de archivo con la que trabajar). Muchos editores de vídeo populares, como Adobe Premiere, Final Cut Pro y DaVinci Resolve, tienen sus propias limitaciones en cuanto a los tipos de formatos con los que son compatibles. Además, las diferentes plataformas web y sitios de alojamiento/transmisión, como Vimeo, [también tienen sus propios requisitos.](https://help.vimeo.com/hc/es/articles/12426043233169-Video-and-audio-compression-guidelines) Por lo tanto, es importante poder volver a envolver y transcodificar tus archivos para cumplir con las diversas especificaciones para la reproducción, edición, publicación digital y ajuste de archivos a los estándares requeridos por las plataformas de archivo o preservación digital. - -
    -Para obtener una lista completa de los códecs y contenedores compatibles con tu instalación de FFmpeg, ejecuta ffmpeg -codecs y ffmpeg -formats, respectivamente, para ver la lista impresa de tu stdout. -
    - - -Como un ejercicio para aprender la sintaxis básica de FFmpeg y aprender a transcodificar entre formatos, comenzaremos con nuestro archivo `destEarth.ogv` y escribiremos un nuevo archivo con vídeo codificado en` H.264`, audio en `AAC` y envuelto en un contenedor `.mp4`, una combinación muy común y altamente portátil de códecs y contenedores que es prácticamente idéntico al archivo` .m4v` que originalmente descargamos. Aquí está el comando que ejecutarás, junto con una explicación de cada parte de la sintaxis: - -```bash -ffmpeg -i destEarth.ogv -c:v libx264 -c:a aac destEarth_transcoded.mp4 -``` - -* `ffmpeg` = comienza el comando -* `-i destEarth.ogv` = especifica el archivo de entrada -* `-c:v libx264` = transcodifica la pista de vídeo al codec H.264 -* `-c:a aac` = transcodifica la pista de audio al codec AAC -* `destEarth_transcoded.mp4` = especifica el archivo de salida. Ten en cuenta que aquí es donde se especifica el nuevo tipo de contenedor. - -Si ejecutas como está escrito y en el mismo directorio que `destEarth.ogv`, verás un nuevo archivo llamado` destEarth_transcoded.mp4`, que aparecerá en el directorio. Si estás operando en Mac OSX, también podrás reproducir este nuevo archivo con QuickTime. Una exploración completa de los convenios de códecs, contenedores, compatibilidad y extensión de archivos está más allá del alcance de este tutorial; sin embargo, este conjunto de ejemplos preliminares debería darles a aquellos que no estén familiarizados con la forma en que se construyen los archivos audiovisuales digitales un conjunto de conocimientos de referencia que les permitirá completar el resto del tutorial. - -## Creación de extractos y "demuxing" de audio y vídeo -Ahora que tenemos un mejor entendimiento de las pistas, códecs, y contenedores, veamos formas en que FFmpeg puede trabajar con materiales de vídeo a un nivel más granular. Para este tutorial, examinaremos dos secciones separadas de *Destination Earth* para comparar cómo se usa el color en relación con la retórica propagandística de la película. Crearemos y prepararemos estos extractos para el análisis utilizando un comando que realiza dos funciones diferentes simultáneamente: - -* Primero, el comando creará dos extractos de `destEarth.m4v`. -* Segundo, el comando eliminará ("demux") los componentes de audio (`Stream # 0: 1`) de estos extractos. -
    - Estamos eliminando el audio para ahorrar espacio de almacenamiento (la información de audio no es necesaria para el análisis de color). Esto probablemente será útil si esperas utilizar este tipo de análisis a escalas más grandes. Cerca del final del tutorial, se discutirá más información sobre la ampliación del análisis de color. -
    - -El primer extracto que haremos contiene una secuencia correspondiente al comienzo de la película que describe las difíciles condiciones y la vida oprimida de la sociedad marciana. El siguiente comando especifica los puntos de inicio y finalización del extracto, le dice a FFmpeg que retenga toda la información en la pista de vídeo sin transcodificar nada y le indica que escriba nuestro nuevo archivo sin la pista de audio: - -```bash -ffmpeg -i destEarth.m4v -ss 00:01:00 -to 00:04:35 -c:v copy -an destEarth_Mars_video.mp4 -``` -* `ffmpeg` = comienza el comando -* `-i destEarth.m4v` = especifica el archivo de entrada -* `-ss 00:01:00` = establece el punto de inicio a 1 minuto del inicio del archivo -* `-to 00:04:45` = establece el punto final a 4 minutos y 45 segundos desde el inicio del archivo -* `-c:v copy` = copia la pista de vídeo directamente, sin transcodificar -* `-an` = le dice a FFmpeg que ignore la pista de audio al escribir el archivo de salida. -* `destEarth_Mars_video.mp4` = especifica el archivo de salida - -{% include figure.html filename="Mars_screenshot.png" caption="Vida en Marte" %} - -Ahora, ejecutaremos un comando similar para crear un extracto de "Tierra". Esta parte de la película tiene una secuencia similar que describe las maravillas de la vida en la Tierra y la riqueza de su sociedad gracias al capitalismo de libre empresa y al uso de petróleo y productos derivados de este: - -```bash -ffmpeg -i destEarth.m4v -ss 00:07:30 -to 00:11:05 -c:v copy -an destEarth_Earth_video.mp4 -``` - -{% include figure.html filename="Earth_screenshot.png" caption="La abundancia de la Tierra" %} - - -Ahora deberías tener dos archivos nuevos en tu directorio llamados `destEarth_Mars_video.mp4` y` destEarth_Earth_video.mp4`. Puedes probar uno o ambos archivos (o cualquiera de los otros archivos en el directorio) usando la función `ffplay` de FFmpeg. Simplemente ejecuta: - -```bash -ffplay destEarth_Mars_video.mp4 -``` - -y/o - -```bash -ffplay destEarth_Earth_video.mp4 -``` - -Verás una ventana abierta y el vídeo comenzará en el punto de iniicio especificado. Se reproducirá una vez y luego la ventana se cerrará (además, notarás que no hay sonido en tu vídeo). También notarás que los comandos `ffplay` no requieren que se especifique una entrada (`-i`) o una salida porque la reproducción en sí misma es la salida. -
    -FFplay es un reproductor multimedia muy versátil que viene con una serie de opciones para personalizar la reproducción. Por ejemplo, si agregas `-loop 0` al comando se reproducirá en bucle indefinidamente.
    - - -Ahora hemos creado nuestros dos extractos para el análisis. Si vemos estos clips por separado, parece haber diferencias significativas en la forma en que se utilizan el color y la variedad de colores. En la siguiente parte del tutorial examinaremos y extraeremos datos de los archivos de vídeo para cuantificar y apoyar esta hipótesis. - -## Análisis de datos de color -El uso de herramientas digitales para analizar la información de color en películas es otra faceta emergente de las Humanidades Digitales que se superpone con los estudios cinematográficos tradicionales. En particular, el proyecto [FilmColors](https://filmcolors.org/) de la Universidad de Zurich cuestiona la intersección crítica de las "características estéticas formales de los aspectos semánticos, históricos y tecnológicos" de su producción, recepción y difusión a través del uso de herramientas de análisis y anotación digital (Flueckiger, 2017, traducido por el autor). Aunque no hay un método estandarizado para este tipo de investigación, en el momento de escribir esta lección el comando `ffprobe` que se describe a continuación es una una herramienta útil para extraer información de color que se puede usar en el análisis computacional. Primero, veamos otra manera estandarizada de representar la información de color que informa este enfoque cuantitativo, basado en datos, para el análisis de color: los vectorscopios. - -### Vectorscopios -Durante años, profesionales del vídeo han confiado en los [vectorscopios](https://es.wikipedia.org/wiki/Vectorscopio) para ver la información del color de una manera estandarizada y fácilmente legible. Un vectorscopio grafica información de color en una gratícula circular. La posición del gráfico corresponde a los [tonos](https://es.wikipedia.org/wiki/Tono_(color)) particulares encontrados en una señal de vídeo. Otros factores, como la saturación, determinan también el tamaño de un gráfico. A continuación se presenta un ejemplo de un vectorscopio que muestra los valores de color de las barras SMPTE. - -{% include figure.html filename="vectorscope.png" caption="Una lectura de vectorescopio que representa las barras SMPTE NTSC estándar. Fuente: Wikimedia Commons" %} - -{% include figure.html filename="smpte_bars.png" caption="Las barras SMPTE. Fuente: Wikimedia Commons" %} - -FFmpeg se puede utilizar para reproducir y crear archivos de vídeo con vectorscopios integrados en ellos para proporcionar una referencia en tiempo real para la información de color del vídeo. Los siguientes comandos `ffplay` incorporarán un vectorscopio en la esquina inferior derecha del marco. A medida que se reproduce el vídeo, notarás el cambio en el gráfico del vectorscopio a medida que cambia el color en pantalla: - -```bash -ffplay destEarth_Mars_video.mp4 -vf "split=2[m][v], [v]vectorscope=b=0.7:m=color3:g=green[v],[m][v]overlay=x=W-w:y=H-h" -``` - -* `ffplay` = comienza el comando -* `-i entrada_archivo.ext` = la ruta y el nombre del archivo de entrada -* `-vf` = crea un [*filter-graph*](https://trac.ffmpeg.org/wiki/FilteringGuide) para usar con las pistas -* `"` = una comilla para comenzar el *filter-graph.* La información entre las comillas - especifica los parámetros de la apariencia y posición del vectorscopio -* `split=2[m][v]` = divide la entrada en dos salidas idénticas llamadas `[m]` y `[v]` -* `,` = la coma indica que viene otro parámetro -* `[v]vectorscope=b=0.7:m=color3:g=green[v]` = asigna la salida `[v]` al filtro del vectorscopio -* `[m][v]overlay=x=W-w:y=H-h` = superpone el vectorscopio encima de la imagen de vídeo en una cierta ubicación (en este caso, en la esquina inferior derecha de la pantalla) -* `"` = termina el *filter-graph* - -
    -Para obtener más información sobre las diversas opciones para crear vectorscopios, consulta la documentación oficial y la página Wiki FFmpeg Vectorscope. Además, puedes encontrar más información sobre cómo colocar las superposiciones en la documentación del filtro de superposición FFmpeg. -
    - -{% include figure.html filename="Mars_screenshot_vector.png" caption="Captura de pantalla de la ventana de FFplay con vectorscopio incorporado" %} - -Y para el extracto de "Tierra": - -```bash -ffplay destEarth_Earth_video.mp4 -vf "split=2[m][v], [v]vectorscope=b=0.7:m=color3:g=green[v],[m][v]overlay=x=W-w:y=H-h" -``` - -{% include figure.html filename="Earth_screenshot_vector.png" caption="Captura de pantalla de la ventana de FFplay con vectorscopio incorporado" %} - -También podemos ajustar este comando para escribir nuevos archivos de vídeo con vectorscopios: - -```bash -ffmpeg -i destEarth_Mars_video.mp4 -vf "split=2[m][v], [v]vectorscope=b=0.7:m=color3:g=green[v],[m][v]overlay=x=W-w:y=H-h" -c:v libx264 destEarth_Mars_vectorscope.mp4 -``` - -```bash -ffmpeg -i destEarth_Earth_video.mp4 -vf "split=2[m][v], [v]vectorscope=b=0.7:m=color3:g=green[v],[m][v]overlay=x=W-w:y=H-h" -c:v libx264 destEarth_Earth_vectorscope.mp4 -``` - -Nota los pequeños pero importantes cambios en sintaxis: - * Hemos agregado una bandera de `-i` porque es un comando de `ffmpeg` - * Hemos especificado el códec del vídeo del archivo de salida como [H.264](https://es.wikipedia.org/wiki/H.264/MPEG-4_AVC) con la bandera `-c:v libx264` y no estamos recodificando el códec de audio (`-c:a copy`), aunque puedes especificar otro códec de audio si lo necesitas. - * Hemos definido el nombre del archivo de salida - -Tómate unos minutos para ver estos vídeos con los vectorscopios integrados en ellos. Observa cuán dinámicos (o no) son los cambios entre los extractos de "Marte" y "Tierra". Compara lo que ves en el vectorscopio con tus propias impresiones del vídeo mismo. Podríamos usar las observaciones de estos vectorscopios para hacer determinaciones sobre qué tonos de color aparecen de manera más regular o intensa en el vídeo, o podemos comparar diferentes formatos uno al lado del otro para ver cómo el color se codifica o representa de manera diferente en función de diferentes códecs, resoluciones, etc. - -Aunque los vectorscopios proporcionan una representación útil y en tiempo real de la información del color, es posible que también deseemos acceder a los datos sin procesar que se encuentran debajo de ellos. Luego, podemos usar estos datos para desarrollar visualizaciones más flexibles que no dependan de ver el archivo de vídeo simultáneamente y que ofrezcan un enfoque más cuantitativo para el análisis de color. En nuestros próximos comandos, utilizaremos `ffprobe` para producir un conjunto tabular de datos que pueda usarse para crear un gráfico de datos de color. - -### Extracción de datos de color con FFprobe -Al comienzo de este tutorial, utilizamos un comando `ffprobe` para ver los metadatos básicos de nuestro archivo impresos en el `stdout`. En los siguientes ejemplos, utilizaremos `ffprobe` para extraer datos de color de nuestros extractos de vídeo y enviar esta información a archivos` .csv`. Dentro de nuestro comando `ffprobe`, vamos a utilizar el filtro` signalstats` para crear reportes `.csv` de información de tono de color medio para cada marco en la secuencia de vídeo de` destEarth_Mars_video.mp4` y `destEarth_Earth_video.mp4`, respectivamente. - -```bash -ffprobe -f lavfi -i movie=destEarth_Mars_video.mp4,signalstats -show_entries frame=pkt_pts_time:frame_tags=lavfi.signalstats.HUEMED -print_format csv > destEarth_Mars_hue.csv -``` - -* `ffprobe` = comienza el comando -* `-f lavfi` = especifica el dispositivo de entrada virtual [libavfilter](https://ffmpeg.org/ffmpeg-devices.html#lavfi) como el formato elegido. Esto es necesario cuando se usa `signalstats` y muchos filtros en comandos FFmpeg más complejos. -* `-i movie=destEarth_Mars_video.mp4` = nombre del archivo de entrada -* `,signalstats` = especifica el uso del filtro `signalstats` con el archivo de entrada -* `-show_entries` = establece una lista de entradas que se mostrarán en el informe. Estos se especifican en las siguientes opciones. -* `frame=pkt_pts_time` = especifica mostrar cada marco con tu correspondiente `pkt_pts_time`, creando una entrada única para cada marco de vídeo -* `:frame_tags=lavfi.signalstats.HUEMED` = crea una etiqueta para cada marco que contiene el valor de tono medio -* `-print_format csv` = especifica el formato del informe de metadatos -* `> destEarth_Mars_hue.csv` = escribe un nuevo archivo `.csv` que contiene el informe de metadatos usando`> `, un [operador de redireccionamiento de Bash](https://www.gnu.org/software/bash/manual/html_node/Redirections.html). Este operador toma el comando que lo precede y "redirige" la salida a otra ubicación. En este caso, está escribiendo la salida en un nuevo archivo `.csv`. La extensión de archivo proporcionada aquí también debe coincidir con el formato especificado por el indicador `print_format`. - -A continuación, ejecuta el mismo comando para el extracto de "Tierra": - -```bash -ffprobe -f lavfi -i movie=destEarth_Earth_video.mp4,signalstats -show_entries frame=pkt_pts_time:frame_tags=lavfi.signalstats.HUEMED -print_format csv > destEarth_Earth_hue.csv -``` - -
    -Para obtener más información sobre el filtro de signalstats y las diversas métricas que se pueden extraer de las transmisiones de vídeo, consulta la documentación del filtro FFmpeg. -
    - - -Ahora deberías tener dos archivos `.csv` en tu directorio. Si los abres en un editor de texto o en un programa de hoja de cálculo, verás tres columnas de datos: - -{% include figure.html filename="csv_head.png" caption="Las primeras filas de nuestro informe de color en formato .csv" %} - -Comenzando a la izquierda y moviéndose a la derecha, las dos primeras columnas nos dan información sobre dónde estamos en el vídeo. Los números decimales representan fracciones de segundo que también corresponden aproximadamente a la base de tiempo de vídeo de 30 marcos por segundo. Cada fila en nuestro `.csv` corresponde a un marco de vídeo. La tercera columna lleva un número entero entre 0-360, valor que representa el tono medio para ese marco de vídeo. Estos números son los datos cuantitativos subyacentes del diagrama de vectorscopio y corresponden a su posición (en radianes) en la gratícula circular. Haciendo referencia a nuestra imagen de vectorescopio de antes, puedes ver que comenzando en la parte inferior del círculo (0 grados) y moviéndose a la izquierda, los "verdes" comienzan alrededor de los 38 grados, los "amarillos" en los 99 grados, los "rojos" en los 161 grados, los "magentas" en los 218 grados, los "azules" en los 279 grados y los "cianes" en los 341 grados. Una vez que comprendas estos "rangos" de tono, puedes hacerte una idea de cuál es el valor de tono medio para un marco de vídeo con solo mirar este valor numérico. - -Además, ten en cuenta que este valor extraído por el filtro `signalstats` no es una medida absoluta o completa de las cualidades de color de una imagen, sino simplemente un punto de referencia significativo desde el cual podemos explorar una estrategia basada en datos para el análisis de color. La percepción del color y la teoría del color son [áreas complejas y en evolución de la investigación académica](https://colourturn.net/) que incorporan muchas estrategias diferentes de las humanidades, las ciencias sociales y las ciencias cognitivas. Es por eso que debemos tener en cuenta que cualquier estrategia analítica debe tomarse dentro del contexto de estos discursos más amplios y con un espíritu colaborativo y generativo. - -### Visualizando datos de color -Los dos archivos `.csv` que creamos con los comandos anteriores ahora se pueden usar para crear gráficos que visualicen los datos. Hay una serie de plataformas (tanto propietarias como de código abierto) que se pueden usar para lograr esto, como [Microsoft Excel](https://www.wikihow.com/Create-a-Graph-in-Excel), [RawGraphs](https://rawgraphs.io/) y/o [plotly](https://plotly.com/graphing-libraries/). Una discusión en profundidad sobre cómo usar cualquiera de estas plataformas está fuera del alcance de este tutorial; sin embargo, a continuación se muestra la visualización final de los comandos anteriores, que se creó con los archivos `.csv` y plotly. - -{% include figure.html filename="Final_Graph_plotly.png" caption="Gráfico que incluye datos de tono medio de ambos extractos de vídeo" %} - -### Conclusiones -Al observar el gráfico, podemos ver que las trazas de Marte y la Tierra tienen rangos dinámicos muy diferentes en sus valores de tono medio. La traza de Marte es muy limitada y se mantiene dentro de los rangos rojo y amarillo (aproximadamente entre 100 y 160) en la mayoría del extracto. Esto sugiere algo sobre el uso del color en la película como un dispositivo retórico que sirve como mensaje propagandístico. Recuerda que esta sección presenta una visión antipática de la forma de vida y el sistema político marcianos: una población uniforme e infeliz, que depende de tecnología y transporte ineficientes mientras se les exige que observen la obediencia total a un gobernante supremo totalitario. La película conecta esta experiencia negativa con una paleta de tonos relativamente opacos de rojo y amarillo. También deberíamos considerar el público objetivo original de esta película, los jóvenes ciudadanos de los Estados Unidos en la década de 1950, y cómo probablemente habrían interpretado estas imágenes y usos del color en ese momento histórico. En particular, podemos considerar este uso del color en el contexto de las crecientes tensiones geopolíticas entre la Unión Soviética y los Estados Unidos y sus aliados en Europa occidental. El color rojo, específicamente, se usaba comúnmente en los medios impresos y de difusión para describir [la "amenaza" del comunismo global](https://es.wikipedia.org/wiki/Temor_rojo) durante esta era de la historia mundial. Además, la elección de presentar al líder totalitario marciano con una apariencia muy similar al icónico líder soviético [Joseph Stalin](https://es.wikipedia.org/wiki/I%C3%B3sif_Stalin) puede leerse como una señal visual y cultural explícita para la audiencia. Así, esta representación de Marte parece ser una caricatura alegórica de la vida bajo el velo del comunismo, tal como la percibe un observador externo y un oponente político/ideológico. Esta caricatura emplea no solo una paleta de colores limitada, sino una que está cargada con otras referencias culturales. El uso del color aprovecha los prejuicios y asociaciones que están presentes en el imaginario de la audiencia y, por lo tanto, está ligado estrechamente al argumento central de la película, que sostiene que el comunismo no es un sistema político viable. - -En contraste con el uso limitado del color en nuestro extracto de Marte, la traza de la Tierra cubre un rango dinámico mucho más amplio de valores de tono. En este pasaje, el emisario marciano está aprendiendo sobre el maravilloso y rico estilo de vida de los terrícolas gracias a un sistema capitalista y a la explotación de petroleo y de productos derivados de este. La secuencia enfatiza la riqueza material y la libertad empresarial ofrecida bajo un sistema capitalista usando una variedad y vivacidad de color mucho mayor que en el extracto de Marte. Los productos comerciales y las personas se representan utilizando el espectro completo del proceso Technicolor, creando asociaciones positivas entre los resultados de la industria petrolera y el estilo de vida acomodado de quienes se benefician de él. Al igual que el extracto de Marte, a la audiencia se le ofrece una caricatura unilateral de un sistema político y una forma de vida, pero en esta sección la representación reduccionista es laudable y próspera en lugar de desoladora y opresiva. - -Como una pieza de propaganda, *Destination Earth* se basa en estas distinciones poderosas pero demasiado simplistas entre dos sistemas políticos para influir en la opinión pública y promover el consumo de productos derivados del petróleo. La manera en que se usa (o no se usa) el color es una herramienta importante para elaborar y enfatizar este mensaje. Además, una vez que podemos extraer datos de color y visualizarlos utiliza técnicas gráficas simples, podemos ver que la disparidad en el rango dinámico proporciona una medida cuantitativa para vincular el uso técnico y estético del color en esta película animada con la retórica propagandística presentada por sus productores. - -{% include figure.html filename="lovely_oil.png" caption="El petróleo y los ideales estadounidenses de riqueza y prosperidad se expresan en esplendor colorido" %} - -### Escalando el análisis de color con FFprobe -Uno de los límites de esta metodología es que estamos generando manualmente informes de color en un solo archivo a la vez. Si quisiéramos adoptar un enfoque de [visión distante](https://distantviewing.org/) más en línea con las metodologías tradicionales de Humanidades Digitales, podríamos emplear un script de Bash para ejecutar nuestro comando `ffprobe` en todos los archivos en un determinado directorio. Esto es útil si, por ejemplo, un(a) investigador(a) esta interesado en realizar un análisis similar en [todas las películas animadas de John Sutherland encontradas en la colección de Archivos Prelinger](https://archive.org/details/prelinger&tab=collection?and%5B%5D=john+sutherland&sin=) u otro conjunto de material de vídeo de archivo. - -Una vez que tengas un conjunto de material para trabajar guardado en un solo lugar, puedes guardar el siguiente [bucle _for_ de Bash o "for loop"](https://www.shellscript.sh/loops.html) dentro del directorio y ejecutarlo para generar archivos `.csv` que contengan los mismos datos de tono medio a nivel de fotograma que extrajimos de nuestros extractos de *Destination Earth*. - -```bash -for file in *.m4v; do -ffprobe -f lavfi -i movie="$file",signalstats -show_entries frame=pkt_pts_time:frame_tags=lavfi.signalstats.HUEMED -print_format csv > "${file%.m4v}.csv"; -done -``` - -* `for file in *.m4v; do` = inicia el bucle _for_. Esta primera línea le dice a FFmpeg "para todos los archivos en este directorio con la extensión `.m4v`, ejecuta el siguiente comando." -* El `*` es un [comodín de Bash](http://tldp.org/LDP/GNU-Linux-Tools-Summary/html/x11655.htm) adjunto a un tipo de archivo dado para especificarlos como archivos de entrada. -* La palabra `file` es una variable arbitraria que representará cada archivo a medida que se ejecuta a través del bucle. -* `ffprobe -f lavfi -i movie="$file",signalstats -show_entries frame=pkt_pts_time:frame_tags=lavfi.signalstats.HUEMED -print_format csv > "${file%.m4v}.csv"; done` = el mismo comando de extracción de metadatos de color que ejecutamos en nuestros dos extractos de *Destination Earth*, con algunas pequeñas modificaciones en la sintaxis para explicar su uso en varios archivos en un directorio: - * `"$file"` = recuerda cada variable. Las comillas aseguran que se conserva el nombre de archivo original. - * `> "${file%.m4v}.csv";` = conserva el nombre de archivo original al escribir los archivos de salida `.csv`. Esto asegurará que los nombres de los archivos de vídeo originales coincidan con sus correspondientes reportes en `.csv`. - * `done` = termina el script una vez que se hayan completado todos los archivos del directorio. - -
    -También puedes usar signalstats para obtener otra información valiosa relacionada con el color. Consulta la documentación del filtro para obtener una lista completa de las métricas visuales disponibles. -
    - -Una vez que ejecutas este script, verás que cada archivo de vídeo en el directorio ahora tiene un archivo `.csv` correspondiente que contiene el conjunto de datos especificado. - -# En resumen -En este tutorial, hemos aprendido: - * cómo instalar FFmpeg en diferentes sistemas operativos y cómo acceder al _framework_ en el navegador web - * cuál es la sintaxis básica y la estructura de los comandos FFmpeg - * cómo visualizar metadatos técnicos básicos de un archivo audiovisual - * cómo transformar un archivo audiovisual a través de la transcodificación y el "re-wrapping" - * cómo analizar y editar ese archivo audiovisual separando sus componentes ("demux") y crear extractos - * cómo reproducir archivos audiovisuales usando `ffplay` - * cómo crear nuevos archivos de vídeo con vectorscopios integrados - * cómo exportar datos tabulares relacionados con el color de una pista de vídeo usando `ffprobe` - * cómo crear un bucle _for_ de Bash para extraer información de datos de color de múltiples archivos de vídeo con un solo comando - -A un nivel más amplio, este tutorial aspira a proporcionar una introducción informada y atractiva sobre cómo se pueden incorporar las herramientas y metodologías audiovisuales en los proyectos y las prácticas de Humanidades Digitales. Con herramientas abiertas y potentes como FFmpeg, existe un gran potencial para expandir el alcance del campo para incluir tipos de medios y análisis más ricos y complejos que nunca. - -# Más recursos -FFmpeg tiene una comunidad grande y bien apoyada de usarios a través de todo el mundo. Como tal, hay muchos recursos gratuitos y de código abierto para descubir nuevos comandos y técnicas para trabajar con materiales audiovisuales. Por favor, contacta al autor con cualquier adición a esta lista, especialmente si se trata de recursos educativos en español para aprender FFmpeg. - -* [La documentación oficial de FFmpeg](https://www.ffmpeg.org/ffmpeg.html) -* [FFmpeg Wiki](https://trac.ffmpeg.org/wiki/WikiStart) -* [ffmprovisr](https://amiaopensource.github.io/ffmprovisr/) de [La Asociación de Archivistas de Imágenes en Movimiento](https://amianet.org/?lang=es) -* [Entrenamiento de preservación audiovisual de Ashley Blewer](https://training.ashleyblewer.com/) -* [La presentación de Andrew Weaver: "Demystifying FFmpeg"](https://github.com/privatezero/NDSR/blob/master/Demystifying_FFmpeg_Slides.pdf) -* [FFmpeg: Presentación de Ben Turkus](https://docs.google.com/presentation/d/1NuusF948E6-gNTN04Lj0YHcVV9-30PTvkh_7mqyPPv4/present?ueb=true&slide=id.g2974defaca_0_231) -* [FFmpeg Cookbook for Archivists de Reto Kromer](https://avpres.net/FFmpeg/) - -## Programas de código abierto de análisis audiovisual que usan FFmpeg - -* [MediaInfo](https://mediaarea.net/en/MediaInfo) -* [QC Tools](https://bavc.org/preserve-media/preservation-tools) - -# Referencias - -* Champion, E. (2017) “Digital Humanities is text heavy, visualization light, and simulation poor,” Digital Scholarship in the Humanities 32(S1), i25-i32 - -* Hockey, S. (2004) “The History of Humanities Computing,” A Companion to Digital Humanities, ed. Susan Schreibman, Ray Siemens, John Unsworth. Oxford: Blackwell - -Este tutorial fue posible gracias al apoyo de la Academia Británica y fue escrito durante el Taller de _Programming Historian_ desarrollado en la Universidad de Los Andes en Bogotá, Colombia, entre el 31 de julio y 3 de agosto de 2018. +--- +title: Introducción a la transcodificación, edición y visualización de datos audiovisuales con FFmpeg +authors: +- Dave Rodriguez +editors: +- Brandon Walsh +reviewers: +- Tesla Cariani +- Josh Romphf +original: introduction-to-ffmpeg +date: 2018-12-20 +translator: +- Dave Rodriguez +- Sebastian Fiori +translation_date: 2020-12-11 +translation-editor: +- Antonio Rojas Castro +translation-reviewer: +- Jennifer Isasi +- José Antonio Motilla +original: introduction-to-ffmpeg +review-ticket: https://github.com/programminghistorian/ph-submissions/issues/302 +difficulty: 2 +activity: analyzing +topics: [data-manipulation, data-visualization] +abstract: Esta lección introduce las funciones básicas de FFmpeg, una herramienta libre de línea de comandos utilizada para manipular y analizar materiales audiovisuales. +avatar_alt: Una cámara antigua +doi: 10.46430/phes0049 +layout: lesson +--- + +{% include toc.html %} + +# Introducción +Historicamente, las Humanidades Digitales se han enfocado casi exclusivamente en el analisis de fuentes textuales a través de métodos computacionales (Hockey, 2004). Sin embargo, hay un interés creciente en el campo de la utilización de métodos computacionales para el análisis de materiales audiovisuales de patrimonio cultural, como refleja la creación de la [Alianza de Organizaciones de Humanidades Digitales Grupo de Interés Especial: Materiales audiovisuales en Humanidades Digitales](https://avindhsig.wordpress.com/) y [el aumento de las presentaciones relacionadas con temas audiovisuales en la conferencia global de AOHD](https://figshare.com/articles/AV_in_DH_State_of_the_Field/5680114) en los años anteriores. Investigaciones recientes, tal como [Distant Viewing TV](https://distantviewing.org), indican un cambio en el campo hacia proyectos relacionados con el uso de técnicas computacionales para ampliar el alcance de los materiales que los y las humanistas digitales pueden explorar. Como afirma Erik Champion, "la audiencia de Humanidades Digitales no siempre está enfocada en la literatura o está interesada en las formas tradicionales de alfabetización" y la aplicación de metodologías digitales para estudiar cultura audiovisual es una faceta emergente y emocionante de las humanidades digitales (Champion, 2017, traducido por el autor). Hay muchas herramientas valiosas, gratuitas y de código abierto disponibles para aquellos interesados en trabajar con materiales audiovisuales (por ejemplo, el tutorial de _Programming Historian_ [Editar Audio con Audacity](/es/lecciones/editar-audio-con-audacity)). Este tutorial presentará otra: FFmpeg. + +[FFmpeg](https://www.ffmpeg.org/about.html) es el _framework_ multimedia de código abierto líder para transcodificar, editar, filtrar y reproducir casi cualquier tipo de formato audiovisual digital (sitio web de FFmpeg - "About"). Muchos programas comunes y sitios web usan FFmpeg para leer y escribir archivos audiovisuales, por ejemplo, VLC, Google Chrome, YouTube y [muchos más](https://trac.ffmpeg.org/wiki/Projects). Además de ser una herramienta de programa y de desarrollo web, FFmpeg se puede usar en la interfaz de la línea de comandos para realizar muchas tareas comunes, complejas e importantes, relacionadas con la gestión, modificación y análisis de archivos audiovisuales. Estos tipos de procesos, tales como editar, transcodificar o extraer los metadatos de archivos, generalmente requieren acceso a otro programa (tal como editores de vídeo no lineal, como Adobe Premiere o Final Cut Pro); sin embargo, FFmpeg permite a un usuario operar directamente en archivos audiovisuales sin el uso de interfaces o programa de terceros. Como tal, el conocimiento del _framework_ permite a los usuarios manipular materiales audiovisuales para satisfacer sus necesidades con una solución de código abierto y gratuita, que ofrece gran parte de la funcionalidad de un costoso programa de audio y vídeo. Este tutorial ofrece una introducción a la lectura y escritura de comandos de FFmpeg y una guía paso a paso a partir de un caso práctico para aprender a utilizar el _framework_ en un trabajo específico para los humanistas digitales. Específicamente, se mostrará cómo FFmpeg puede ser utilizado para extraer y analizar datos de color en un video archivístico. + +## Objetivos de aprendizaje +* Instalar FFmpeg en tu computadora o usar una versión "demo" en el navegador web +* Comprender la estructura básica y la sintaxis de los comandos de FFmpeg +* Aprender varios comandos útiles, tales como: + * "Re-wrap" (cambiar el contenedor) y transcodificar (recodificar archivos) + * "Demux" de archivos (separar audio y vídeo) + * Recortar/Editar archivos + * Usar FFplay para reproducir archivos + * Crear vectorscopios para visualizar los datos de color + * Usar FFprobe para generar informes de los datos de color +* Introducir recursos para mayor exploración y experimentación + +## Requisitos previos +Antes de comenzar con este tutorial, es necesario que localices la [Terminal](https://es.wikipedia.org/wiki/Terminal_(macOS)) de tu computadora u otra interfaz de línea de comandos, ya que ahí es donde ingresarás y ejecutarás los comandos de FFmpeg. Si necesitas instrucción para acceder y usar la interfaz de línea de comandos, te recomendamos la lección de _Programming Historian_ [Introducción a la línea de comandos en Bash](/es/lecciones/introduccion-a-bash) para usarios de Mac y Linux o, para usarios de Windows, [Introducción a la línea de comandos de Windows con PowerShell](/es/lecciones/introduccion-a-powershell). Adicionalmente, será de utilidad tener conocimientos básicos de [códecs](https://es.wikipedia.org/wiki/C%C3%B3dec) y [contenedores](https://es.wikipedia.org/wiki/Formato_contenedor) audiovisuales para entender con mayor detalle el funcionamiento de FFmpeg. Proporcionaremos información adicional y revisaremos con mayor detalle sobre códecs y contenedores en la sección sobre ejemplos de comandos preliminares de este tutorial. + +# Cómo instalar FFmpeg +La instalación de FFmpeg es posiblemente la parte más difícil de usar esta herramienta. Afortunadamente, existen algunas guías y recursos disponibles para instalar el _framework_ para cada sistema operativo. + +
    +Nuevas versiones de FFmpeg son lanzadas aproximadamente cada seis meses. Para mantenerse al tanto de ellas, es recomendable seguir a FFmpeg en Twitter o en su sitio web. Las nuevas versiones de FFmpeg generalmente contienen características tales como filtros nuevos y actualizados, compatibilidades de códecs y corrección de errores. La sintaxis de FFmpeg no cambia con estas actualizaciones y las capacidades antiguas rara vez se eliminan. Puedes aprender más sobre estas actualizaciones consultando los anuncios de actualizaciones anteriores en la sección de News en el sitio web de FFmpeg. +
    + +## Para usuarios de Mac OS +La opción más simple es usar un administrador de paquetes como [Homebrew](https://brew.sh/) para instalar FFmpeg y asegurar que permanezca en la versión más reciente. Para completar este tipo de instalación, sigue estos pasos: +* Instala Homebrew de acuerdo a las instrucctiones en el enlace de arriba +* Para comenzar con una instalación básica, ejecuta `brew install ffmpeg` en tu Terminal para comenzar una instalación básica + **Nota**: generalmente se recomienda instalar FFmpeg con opciones adicionales a la incluidas en la instalación básica; esto proporcionará acceso a más herramientas y funciones. [La Guía de Instalación de Apple de Reto Kromer](https://avpres.net/FFmpeg/install_Apple.html) proporciona un buen conjunto de opciones adicionales: + + ```bash + brew install ffmpeg --with-freetype --with-openjpeg --with-x265 --with-rubberband --with-tesseract + ``` + + * Para una explicación de estas opciones adicionales, revisa [La Guía FFmpeg de Ashley Blewer](https://training.ashleyblewer.com/presentations/ffmpeg.html#10). + * Además, puedes ejecutar `brew options ffmpeg` para ver qué características están o han estado disponibles en la versión actual de FFmpeg + * Para actualizar tu instalación a la versión más reciente, ejecuta: + + ```bash + brew update && brew upgrade ffmpeg + ``` + +* Para más opciones de instalación para Mac OS, revisa [La Guía de Compilación de FFmpeg para Mac OS](https://trac.ffmpeg.org/wiki/CompilationGuide/macOS) (la guía solo está disponible en inglés). + +## Para usuarios de Windows +Los usarios de Windows pueden usar el adminstratdor de paquetes [Chocolately](https://chocolatey.org/) para instalar y mantener FFmpeg. [La Guía de Instalación de Windows de Reto Kromer](https://avpres.net/FFmpeg/install_Windows.html) proporciona toda la información necesaria para usar Chocolately o construir el _framework_ a partir del código fuente (la guía solo está disponible en inglés). + +## Para usuarios de Linux +[Linuxbrew](ttp://linuxbrew.sh/) es un programa similar a Homebrew que se puede utilizar para instalar y mantener FFmepg en Linux. Reto Kromer también proporciona una guía útil, [la Guía de Instalación de Linux](https://avpres.net/FFmpeg/install_Linux.html), que es similar a la instalación en Mac OS. Tu distribución de Linux puede tener su [propio administrador de paquetes](https://www.linode.com/docs/tools-reference/linux-package-management/) que incluye paquetes FFmpeg (la guía solo está disponible en inglés). Dependiendo de tu distribución de Linux (Ubuntu, Fedora, Arch Linux, etc.) estas versiones pueden variar, así que usar Linuxbrew podría ser útil para asegurar que la versión es la misma independientemente del tipo de Linux que utilices. + +## Otros recursos de instalación + +* [Descarga de paquetes](https://www.ffmpeg.org/download.html) + * FFmpeg permite el accesso a archivos binarios, código fuente y versiones estáticas para Mac, Windows y Linux directamente en su sitio web. Los usuarios pueden construir el _framework_ sin un administrador de paquetes con estos recursos. Es probable que solo los usuarios avanzados quieran usar esta opción. +* [La Guía de Compilación de FFmpeg](https://trac.ffmpeg.org/wiki/CompilationGuide) + * La página Wiki de FFmpeg también proporciona un compendio de guías y estrategias para instalar FFmpeg en tu computadora (la guía solo está disponible en inglés). + +## Probando la instalación +* Para asegurarte de que FFmpeg se haya instalado correctamente, ejecuta: + + ```bash + ffmpeg -version + ``` + +* Si ves una lista larga con información, ¡la instalación fue exitosa! Debe ser similar a lo siguiente: + +```bash +ffmpeg version 4.0.1 Copyright (c) 2000-2018 the FFmpeg developers +built with Apple LLVM version 9.1.0 (clang-902.0.39.1) +configuration: --prefix=/usr/local/Cellar/ffmpeg/4.0.1 --enable-shared --enable-pthreads --enable-version3 --enable-hardcoded-tables --enable-avresample --cc=clang --host-cflags= --host-ldflags= --enable-gpl --enable-ffplay --enable-libfreetype --enable-libmp3lame --enable-librubberband --enable-libtesseract --enable-libx264 --enable-libx265 --enable-libxvid --enable-opencl --enable-videotoolbox --disable-lzma --enable-libopenjpeg --disable-decoder=jpeg2000 --extra-cflags=-I/usr/local/Cellar/openjpeg/2.3.0/include/openjpeg-2.3 +libavcodec 58. 18.100 / 58. 18.100 +libavformat 58. 12.100 / 58. 12.100 +libavdevice 58. 3.100 / 58. 3.100 +libavfilter 7. 16.100 / 7. 16.100 +libavresample 4. 0. 0 / 4. 0. 0 +libswscale 5. 1.100 / 5. 1.100 +libswresample 3. 1.100 / 3. 1.100 +libpostproc 55. 1.100 / 55. 1.100 +``` + +* Si el sistema arroja `-bash: ffmpeg: command not found`, algo ha ido mal. + * Nota: Si estás usando un administrador de paquetes, es improbable que encuentres este mensaje de error. Sin embargo, si hay un problema después de instalar con un administrador de paquetes, es probable que haya un problema con el administrador de paquetes y no con FFmpeg. Consulta la solución de problemas en [Homebrew](https://docs.brew.sh/Troubleshooting), [Chocolatey](https://chocolatey.org/docs/troubleshooting), o [Linuxbrew](https://linuxbrew.sh/) para asegurar que el administrador de paquetes está funcionando correctamente en tu computadora (las guías solo está disponible en inglés). Si estás intentando instalar sin un administrador de paquetes y ves este mensaje de error, haz una referencia cruzada de tu método con la La Guía de Compilación de FFmpeg anterior. + +## Usando FFmpeg en el navegador +Si no quieres instalar FFmepg en tu computadora pero te gustaría familiarizarte con el _framework_ y usarlo en la interfaz de línea de comandos, [videoconverter.js](https://bgrins.github.io/videoconverter.js/demo/) de Brian Grinstead proporciona un método para ejecutar los comandos FFmpeg en tu navegador (la interfaz está en inglés). +
    + Esta interfaz del navegador no tiene las funcionalidades como para completar todo este tutorial, pero es útil para aprender los comandos esenciales de FFmpeg. Adicionalmente, este recurso opera en una versión anterior de FFmpeg y posiblemente no tenga todas las características de la versión más reciente. +
    + +## Estructura básica y sintaxis de los comandos FFmpeg +El comando básico tiene cuatro partes: + +```bash +[Símbolo del Sistema] [Archivo de Entrada] [Banderas/Acciones] [Archivo de Salida] +``` + +* Cada comando comenzará con un símbolo del sistema. Dependiendo del uso, este será `ffmpeg` (cambiar archivos), `ffprobe` (generar metadatos de archivos) o `ffplay` (reproducir archivos). +* Los archivos de entradas son los archivos que están siendo leídos, editados o examinados. +* Las banderas y acciones son las cosas que le estás diciendo a FFmpeg que haga con los archivos de entrada. La mayoría de los comandos contendrán múltiples banderas y acciones de complejidad variable. +* Los archivos de salida son los archivos creados por el comando o los informes creados por los commandos de `ffprobe`. + +Escrito genéricamente, el comando básico es parecido a lo siguiente: + +```bash + ffmpeg -i /ruta_de_archivo/archivo_de_entrada.ext -bandera alguna_acción /ruta_de_archivo/archivo_de_salida.ext + ``` +
    +Como con cualquier interfaz de línea de comandos, tendrás que escribir las rutas de los archivos de entrada y de salida dependiendo de las ubicaciones de tus directorios de trabajo. En los ejemplos proporcionados en este tutorial, las rutas de archivos no estarán escritas completamente y se supone que el usuario ha navegado al directorio de trabajo para ejecutar los comandos.
    + + +A continuación, examinaremos algunos ejemplos de varios comandos diferentes que usan esta estructura y sintaxis. Adicionalmente, estos comandos demostrarán algunas de las características más útiles de FFmpeg y nos permitirán familiarizarnos con la forma en que se construyen los archivos audiovisuales digitales. + +# Para empezar +Para este tutorial, utilizaremos una película archivística que se llama [*Destination Earth*](https://archive.org/details/4050_Destination_Earth_01_47_33_28) como nuestro objeto de estudio. Esta película está publicada por los [Archivos Prelinger](https://es.wikipedia.org/wiki/Archivos_Prelinger) y en el [Internet Archive](https://archive.org/). Esta película, estrenada en 1956 y producida por [El American Petroleum Institute](https://es.wikipedia.org/wiki/American_Petroleum_Institute) y [John Sutherland Productions](https://en.wikipedia.org/wiki/John_Sutherland_(producer)), es un excelente ejemplo de la propaganda de la época de la Guerra Fría que exalta las virtudes del capitalismo y el estilo de vida estadounidense. Utilizando el proceso de [Technicolor](https://es.wikipedia.org/wiki/Technicolor), este corto animado de ciencia ficción cuenta la historia de una sociedad marciana que vive bajo un gobierno opresivo y sus esfuerzos para mejorar sus métodos industriales. Envían un emisario a la Tierra que descubre que la clave para esto es la refinación de petróleo y la libre empresa. Utilizaremos el vídeo para introducir algunas de las funcionalidades básicas de FFmpeg y analizar sus propiedades de color con relación a su retórica propagandística. + +{% include figure.html filename="destEarth_titlecard.png" caption="Destination Earth (1956)" %} + +En este tutorial se llevarán a cabo los siguientes pasos: +* Navegar a la página de [*Destination Earth*](https://archive.org/details/4050_Destination_Earth_01_47_33_28) en el Internet Archive +* Descargar dos archivos vídeos: las versiones "MPEG4" (extensión de archivo `.m4v`) y "OGG" (extensión de archivo `.ogv`) de la película +* Guardar estos archivos en la misma carpeta en algún lugar de tu computadora. Guárdalos con los nombres de archivos `destEarth`, seguido por su extensión. + +Tómate unos minutos para ver el vídeo y tener una idea de su estructura, mensaje y motivos visuales antes de continuar con los siguientes comandos. + +# Ejemplos de comandos preliminares + +## Ver metadatos básicos con FFprobe +Antes de comenzar a manipular nuestros archivos `destEarth`, usemos FFmpeg para examinar información básica sobre el archivo utilizando un simple comando de `ffprobe`. Esto ayudará a comprender cómo se construyen los archivos audiovisuales digitales y proporcionará una base para el resto del tutorial. Navega hasta el directorio del archivo y ejecuta: + +```bash +ffprobe destEarth.ogv +``` + +Verás los metadatos técnicos básicos del archivo impresos en `stdout`: + +{% include figure.html filename="ffprobe_ogg_es.png" caption="El output de un comando básico `ffprobe` con destEarth.ogv" %} + +La línea `Input # 0` del informe identifica el **contenedor** como [ogg](https://es.wikipedia.org/wiki/Ogg). Los contenedores (también llamados "envoltorios" o "wrappers", en inglés) proporcionan al archivo la estructura de sus diversas pistas. Los diferentes contenedores (otros más comunes incluyen `.mkv`, `.avi` y `.flv`) tienen diferentes características y compatibilidad con diversos programas. Examinaremos cómo y por qué es posible que desees cambiar el contenedor de un archivo en el siguiente comando. + +Las líneas `Stream #0:0` y `Stream #0:1` proporcionan información sobre las pistas del archivo (es decir, el contenido que ves en la pantalla y escuchas a través de sus altavoces) y también identifican el **códec** de cada pista. Los códecs especifican cómo se codifica/comprime (se escribe y almacena) y se decodifica (se reproduce) la información. La pista vídeo (`Stream #0:0`) de nuestro archivo `.ogv` usa el códec [theora](https://es.wikipedia.org/wiki/Theora) y la pista audio (`Stream #0:1`) usa el códec [vorbis](https://es.wikipedia.org/wiki/Vorbis). Estas líneas también proporcionan información importante relacionada con el espacio de color de la pista de vídeo (`yuv420p`), resolución (`400x300`) y marcos por segundo (`29.97 fps`). Adicionalmente, proporcionan información de audio como la tasa de muestreo (`44100 Hz`) y la tasa de bits (`128 kb/s`). + +Los códecs, en mayor medida que los contenedores, determinan la calidad y la compatibilidad de un archivo audiovisual con diferentes programas y plataformas (otros códecs comunes incluyen `DNxHD` y` ProRes` para vídeo y `mp3` y` FLAC` para audio). Examinaremos cómo y por qué es posible que también desees cambiar el códec de un archivo en el siguiente comando. + +Ejecuta otro comando de `ffprobe`, esta vez con el archivo `.m4v`: + +```bash +ffprobe destEarth.m4v +``` + +Una vez más, verás los metadatos técnicos básicos impresos en el `stdout`: + +{% include figure.html filename="ffprobe_mp4_es.png" caption="El output de un comando básico `ffprobe` con destEarth.m4v" %} + +También notarás que el informe para el archivo `.m4v` contiene múltiples contenedores en la línea `Input # 0` como `mov` y `m4a`. No es necesario profundizar en los detalles para los fines de este tutorial, pero ten en cuenta que los contenedores `mp4` y` mov` se presentan en múltiples "sabores" y diferentes extensiones de archivo. Sin embargo, todos son muy similares en su construcción técnica y, como tal, pueden verse agrupados en metadatos técnicos. De manera similar, el archivo `ogg` tiene la extensión` .ogv`, un "sabor" o variante del formato `ogg`. + +Al igual que en nuestro comando anterior, las líneas `Stream # 0: 0` y` Stream # 0: 1` identifican el códec de cada pista. Podemos ver que nuestro archivo `.m4v` usa el códec vídeo [H.264](https://es.wikipedia.org/wiki/H.264/MPEG-4_AVC) y el códec audio [aac](https://es.wikipedia.org/wiki/Advanced_Audio_Coding). Ten en cuenta que se nos proporcionan metadatos similares a nuestro archivo `.ogv`, pero algunas características importantes relacionadas con el análisis visual (como la resolución) son significativamente diferentes. Nuestro `.m4v` tiene una resolución más alta (`640x480`) y, por lo tanto, utilizaremos esta versión de *Destination Earth* como nuestro vídeo de origen. + +Ahora que sabemos más sobre la composición técnica de nuestro archivo, podemos comenzar a explorar las características y funcionalidades transformadoras de FFmpeg (volveremos a utilizar `ffprobe` más adelante en el tutorial para realizar una extracción de metadatos de color más avanzada). + +## Cambiar el contenedor (volver a envolver, "re-wrap") +Dependiendo de tu sistema operativo, puedes tener uno o más reproductores de medios instalados. Para efectos de demostración veamos qué sucede si intentas abrir `destEarth.ogv` usando el reproductor de medios QuickTime que viene con Mac OSX: + +{% include figure.html filename="QT_fail.png" caption="Los reproductores multimedia patentados como Quicktime a menudo están limitados en los tipos de archivos con los que pueden trabajar" %} + +Una opción cuando te enfrentas a un mensaje de este tipo es simplemente usar otro reproductor de medios. [VLC](https://www.videolan.org/vlc/index.es.html), que está construido con FFmpeg, es una excelente alternativa de código abierto, pero simplemente "usar otro programa" puede no ser siempre una solución viable (y es posible que no siempre tengas otra versión de archivo con la que trabajar). Muchos editores de vídeo populares, como Adobe Premiere, Final Cut Pro y DaVinci Resolve, tienen sus propias limitaciones en cuanto a los tipos de formatos con los que son compatibles. Además, las diferentes plataformas web y sitios de alojamiento/transmisión, como Vimeo, [también tienen sus propios requisitos.](https://help.vimeo.com/hc/es/articles/12426043233169-Video-and-audio-compression-guidelines) Por lo tanto, es importante poder volver a envolver y transcodificar tus archivos para cumplir con las diversas especificaciones para la reproducción, edición, publicación digital y ajuste de archivos a los estándares requeridos por las plataformas de archivo o preservación digital. + +
    +Para obtener una lista completa de los códecs y contenedores compatibles con tu instalación de FFmpeg, ejecuta ffmpeg -codecs y ffmpeg -formats, respectivamente, para ver la lista impresa de tu stdout. +
    + + +Como un ejercicio para aprender la sintaxis básica de FFmpeg y aprender a transcodificar entre formatos, comenzaremos con nuestro archivo `destEarth.ogv` y escribiremos un nuevo archivo con vídeo codificado en` H.264`, audio en `AAC` y envuelto en un contenedor `.mp4`, una combinación muy común y altamente portátil de códecs y contenedores que es prácticamente idéntico al archivo` .m4v` que originalmente descargamos. Aquí está el comando que ejecutarás, junto con una explicación de cada parte de la sintaxis: + +```bash +ffmpeg -i destEarth.ogv -c:v libx264 -c:a aac destEarth_transcoded.mp4 +``` + +* `ffmpeg` = comienza el comando +* `-i destEarth.ogv` = especifica el archivo de entrada +* `-c:v libx264` = transcodifica la pista de vídeo al codec H.264 +* `-c:a aac` = transcodifica la pista de audio al codec AAC +* `destEarth_transcoded.mp4` = especifica el archivo de salida. Ten en cuenta que aquí es donde se especifica el nuevo tipo de contenedor. + +Si ejecutas como está escrito y en el mismo directorio que `destEarth.ogv`, verás un nuevo archivo llamado` destEarth_transcoded.mp4`, que aparecerá en el directorio. Si estás operando en Mac OSX, también podrás reproducir este nuevo archivo con QuickTime. Una exploración completa de los convenios de códecs, contenedores, compatibilidad y extensión de archivos está más allá del alcance de este tutorial; sin embargo, este conjunto de ejemplos preliminares debería darles a aquellos que no estén familiarizados con la forma en que se construyen los archivos audiovisuales digitales un conjunto de conocimientos de referencia que les permitirá completar el resto del tutorial. + +## Creación de extractos y "demuxing" de audio y vídeo +Ahora que tenemos un mejor entendimiento de las pistas, códecs, y contenedores, veamos formas en que FFmpeg puede trabajar con materiales de vídeo a un nivel más granular. Para este tutorial, examinaremos dos secciones separadas de *Destination Earth* para comparar cómo se usa el color en relación con la retórica propagandística de la película. Crearemos y prepararemos estos extractos para el análisis utilizando un comando que realiza dos funciones diferentes simultáneamente: + +* Primero, el comando creará dos extractos de `destEarth.m4v`. +* Segundo, el comando eliminará ("demux") los componentes de audio (`Stream # 0: 1`) de estos extractos. +
    + Estamos eliminando el audio para ahorrar espacio de almacenamiento (la información de audio no es necesaria para el análisis de color). Esto probablemente será útil si esperas utilizar este tipo de análisis a escalas más grandes. Cerca del final del tutorial, se discutirá más información sobre la ampliación del análisis de color. +
    + +El primer extracto que haremos contiene una secuencia correspondiente al comienzo de la película que describe las difíciles condiciones y la vida oprimida de la sociedad marciana. El siguiente comando especifica los puntos de inicio y finalización del extracto, le dice a FFmpeg que retenga toda la información en la pista de vídeo sin transcodificar nada y le indica que escriba nuestro nuevo archivo sin la pista de audio: + +```bash +ffmpeg -i destEarth.m4v -ss 00:01:00 -to 00:04:35 -c:v copy -an destEarth_Mars_video.mp4 +``` +* `ffmpeg` = comienza el comando +* `-i destEarth.m4v` = especifica el archivo de entrada +* `-ss 00:01:00` = establece el punto de inicio a 1 minuto del inicio del archivo +* `-to 00:04:45` = establece el punto final a 4 minutos y 45 segundos desde el inicio del archivo +* `-c:v copy` = copia la pista de vídeo directamente, sin transcodificar +* `-an` = le dice a FFmpeg que ignore la pista de audio al escribir el archivo de salida. +* `destEarth_Mars_video.mp4` = especifica el archivo de salida + +{% include figure.html filename="Mars_screenshot.png" caption="Vida en Marte" %} + +Ahora, ejecutaremos un comando similar para crear un extracto de "Tierra". Esta parte de la película tiene una secuencia similar que describe las maravillas de la vida en la Tierra y la riqueza de su sociedad gracias al capitalismo de libre empresa y al uso de petróleo y productos derivados de este: + +```bash +ffmpeg -i destEarth.m4v -ss 00:07:30 -to 00:11:05 -c:v copy -an destEarth_Earth_video.mp4 +``` + +{% include figure.html filename="Earth_screenshot.png" caption="La abundancia de la Tierra" %} + + +Ahora deberías tener dos archivos nuevos en tu directorio llamados `destEarth_Mars_video.mp4` y` destEarth_Earth_video.mp4`. Puedes probar uno o ambos archivos (o cualquiera de los otros archivos en el directorio) usando la función `ffplay` de FFmpeg. Simplemente ejecuta: + +```bash +ffplay destEarth_Mars_video.mp4 +``` + +y/o + +```bash +ffplay destEarth_Earth_video.mp4 +``` + +Verás una ventana abierta y el vídeo comenzará en el punto de iniicio especificado. Se reproducirá una vez y luego la ventana se cerrará (además, notarás que no hay sonido en tu vídeo). También notarás que los comandos `ffplay` no requieren que se especifique una entrada (`-i`) o una salida porque la reproducción en sí misma es la salida. +
    +FFplay es un reproductor multimedia muy versátil que viene con una serie de opciones para personalizar la reproducción. Por ejemplo, si agregas `-loop 0` al comando se reproducirá en bucle indefinidamente.
    + + +Ahora hemos creado nuestros dos extractos para el análisis. Si vemos estos clips por separado, parece haber diferencias significativas en la forma en que se utilizan el color y la variedad de colores. En la siguiente parte del tutorial examinaremos y extraeremos datos de los archivos de vídeo para cuantificar y apoyar esta hipótesis. + +## Análisis de datos de color +El uso de herramientas digitales para analizar la información de color en películas es otra faceta emergente de las Humanidades Digitales que se superpone con los estudios cinematográficos tradicionales. En particular, el proyecto [FilmColors](https://filmcolors.org/) de la Universidad de Zurich cuestiona la intersección crítica de las "características estéticas formales de los aspectos semánticos, históricos y tecnológicos" de su producción, recepción y difusión a través del uso de herramientas de análisis y anotación digital (Flueckiger, 2017, traducido por el autor). Aunque no hay un método estandarizado para este tipo de investigación, en el momento de escribir esta lección el comando `ffprobe` que se describe a continuación es una una herramienta útil para extraer información de color que se puede usar en el análisis computacional. Primero, veamos otra manera estandarizada de representar la información de color que informa este enfoque cuantitativo, basado en datos, para el análisis de color: los vectorscopios. + +### Vectorscopios +Durante años, profesionales del vídeo han confiado en los [vectorscopios](https://es.wikipedia.org/wiki/Vectorscopio) para ver la información del color de una manera estandarizada y fácilmente legible. Un vectorscopio grafica información de color en una gratícula circular. La posición del gráfico corresponde a los [tonos](https://es.wikipedia.org/wiki/Tono_(color)) particulares encontrados en una señal de vídeo. Otros factores, como la saturación, determinan también el tamaño de un gráfico. A continuación se presenta un ejemplo de un vectorscopio que muestra los valores de color de las barras SMPTE. + +{% include figure.html filename="vectorscope.png" caption="Una lectura de vectorescopio que representa las barras SMPTE NTSC estándar. Fuente: Wikimedia Commons" %} + +{% include figure.html filename="smpte_bars.png" caption="Las barras SMPTE. Fuente: Wikimedia Commons" %} + +FFmpeg se puede utilizar para reproducir y crear archivos de vídeo con vectorscopios integrados en ellos para proporcionar una referencia en tiempo real para la información de color del vídeo. Los siguientes comandos `ffplay` incorporarán un vectorscopio en la esquina inferior derecha del marco. A medida que se reproduce el vídeo, notarás el cambio en el gráfico del vectorscopio a medida que cambia el color en pantalla: + +```bash +ffplay destEarth_Mars_video.mp4 -vf "split=2[m][v], [v]vectorscope=b=0.7:m=color3:g=green[v],[m][v]overlay=x=W-w:y=H-h" +``` + +* `ffplay` = comienza el comando +* `-i entrada_archivo.ext` = la ruta y el nombre del archivo de entrada +* `-vf` = crea un [*filter-graph*](https://trac.ffmpeg.org/wiki/FilteringGuide) para usar con las pistas +* `"` = una comilla para comenzar el *filter-graph.* La información entre las comillas + especifica los parámetros de la apariencia y posición del vectorscopio +* `split=2[m][v]` = divide la entrada en dos salidas idénticas llamadas `[m]` y `[v]` +* `,` = la coma indica que viene otro parámetro +* `[v]vectorscope=b=0.7:m=color3:g=green[v]` = asigna la salida `[v]` al filtro del vectorscopio +* `[m][v]overlay=x=W-w:y=H-h` = superpone el vectorscopio encima de la imagen de vídeo en una cierta ubicación (en este caso, en la esquina inferior derecha de la pantalla) +* `"` = termina el *filter-graph* + +
    +Para obtener más información sobre las diversas opciones para crear vectorscopios, consulta la documentación oficial y la página Wiki FFmpeg Vectorscope. Además, puedes encontrar más información sobre cómo colocar las superposiciones en la documentación del filtro de superposición FFmpeg. +
    + +{% include figure.html filename="Mars_screenshot_vector.png" caption="Captura de pantalla de la ventana de FFplay con vectorscopio incorporado" %} + +Y para el extracto de "Tierra": + +```bash +ffplay destEarth_Earth_video.mp4 -vf "split=2[m][v], [v]vectorscope=b=0.7:m=color3:g=green[v],[m][v]overlay=x=W-w:y=H-h" +``` + +{% include figure.html filename="Earth_screenshot_vector.png" caption="Captura de pantalla de la ventana de FFplay con vectorscopio incorporado" %} + +También podemos ajustar este comando para escribir nuevos archivos de vídeo con vectorscopios: + +```bash +ffmpeg -i destEarth_Mars_video.mp4 -vf "split=2[m][v], [v]vectorscope=b=0.7:m=color3:g=green[v],[m][v]overlay=x=W-w:y=H-h" -c:v libx264 destEarth_Mars_vectorscope.mp4 +``` + +```bash +ffmpeg -i destEarth_Earth_video.mp4 -vf "split=2[m][v], [v]vectorscope=b=0.7:m=color3:g=green[v],[m][v]overlay=x=W-w:y=H-h" -c:v libx264 destEarth_Earth_vectorscope.mp4 +``` + +Nota los pequeños pero importantes cambios en sintaxis: + * Hemos agregado una bandera de `-i` porque es un comando de `ffmpeg` + * Hemos especificado el códec del vídeo del archivo de salida como [H.264](https://es.wikipedia.org/wiki/H.264/MPEG-4_AVC) con la bandera `-c:v libx264` y no estamos recodificando el códec de audio (`-c:a copy`), aunque puedes especificar otro códec de audio si lo necesitas. + * Hemos definido el nombre del archivo de salida + +Tómate unos minutos para ver estos vídeos con los vectorscopios integrados en ellos. Observa cuán dinámicos (o no) son los cambios entre los extractos de "Marte" y "Tierra". Compara lo que ves en el vectorscopio con tus propias impresiones del vídeo mismo. Podríamos usar las observaciones de estos vectorscopios para hacer determinaciones sobre qué tonos de color aparecen de manera más regular o intensa en el vídeo, o podemos comparar diferentes formatos uno al lado del otro para ver cómo el color se codifica o representa de manera diferente en función de diferentes códecs, resoluciones, etc. + +Aunque los vectorscopios proporcionan una representación útil y en tiempo real de la información del color, es posible que también deseemos acceder a los datos sin procesar que se encuentran debajo de ellos. Luego, podemos usar estos datos para desarrollar visualizaciones más flexibles que no dependan de ver el archivo de vídeo simultáneamente y que ofrezcan un enfoque más cuantitativo para el análisis de color. En nuestros próximos comandos, utilizaremos `ffprobe` para producir un conjunto tabular de datos que pueda usarse para crear un gráfico de datos de color. + +### Extracción de datos de color con FFprobe +Al comienzo de este tutorial, utilizamos un comando `ffprobe` para ver los metadatos básicos de nuestro archivo impresos en el `stdout`. En los siguientes ejemplos, utilizaremos `ffprobe` para extraer datos de color de nuestros extractos de vídeo y enviar esta información a archivos` .csv`. Dentro de nuestro comando `ffprobe`, vamos a utilizar el filtro` signalstats` para crear reportes `.csv` de información de tono de color medio para cada marco en la secuencia de vídeo de` destEarth_Mars_video.mp4` y `destEarth_Earth_video.mp4`, respectivamente. + +```bash +ffprobe -f lavfi -i movie=destEarth_Mars_video.mp4,signalstats -show_entries frame=pkt_pts_time:frame_tags=lavfi.signalstats.HUEMED -print_format csv > destEarth_Mars_hue.csv +``` + +* `ffprobe` = comienza el comando +* `-f lavfi` = especifica el dispositivo de entrada virtual [libavfilter](https://ffmpeg.org/ffmpeg-devices.html#lavfi) como el formato elegido. Esto es necesario cuando se usa `signalstats` y muchos filtros en comandos FFmpeg más complejos. +* `-i movie=destEarth_Mars_video.mp4` = nombre del archivo de entrada +* `,signalstats` = especifica el uso del filtro `signalstats` con el archivo de entrada +* `-show_entries` = establece una lista de entradas que se mostrarán en el informe. Estos se especifican en las siguientes opciones. +* `frame=pkt_pts_time` = especifica mostrar cada marco con tu correspondiente `pkt_pts_time`, creando una entrada única para cada marco de vídeo +* `:frame_tags=lavfi.signalstats.HUEMED` = crea una etiqueta para cada marco que contiene el valor de tono medio +* `-print_format csv` = especifica el formato del informe de metadatos +* `> destEarth_Mars_hue.csv` = escribe un nuevo archivo `.csv` que contiene el informe de metadatos usando`> `, un [operador de redireccionamiento de Bash](https://www.gnu.org/software/bash/manual/html_node/Redirections.html). Este operador toma el comando que lo precede y "redirige" la salida a otra ubicación. En este caso, está escribiendo la salida en un nuevo archivo `.csv`. La extensión de archivo proporcionada aquí también debe coincidir con el formato especificado por el indicador `print_format`. + +A continuación, ejecuta el mismo comando para el extracto de "Tierra": + +```bash +ffprobe -f lavfi -i movie=destEarth_Earth_video.mp4,signalstats -show_entries frame=pkt_pts_time:frame_tags=lavfi.signalstats.HUEMED -print_format csv > destEarth_Earth_hue.csv +``` + +
    +Para obtener más información sobre el filtro de signalstats y las diversas métricas que se pueden extraer de las transmisiones de vídeo, consulta la documentación del filtro FFmpeg. +
    + + +Ahora deberías tener dos archivos `.csv` en tu directorio. Si los abres en un editor de texto o en un programa de hoja de cálculo, verás tres columnas de datos: + +{% include figure.html filename="csv_head.png" caption="Las primeras filas de nuestro informe de color en formato .csv" %} + +Comenzando a la izquierda y moviéndose a la derecha, las dos primeras columnas nos dan información sobre dónde estamos en el vídeo. Los números decimales representan fracciones de segundo que también corresponden aproximadamente a la base de tiempo de vídeo de 30 marcos por segundo. Cada fila en nuestro `.csv` corresponde a un marco de vídeo. La tercera columna lleva un número entero entre 0-360, valor que representa el tono medio para ese marco de vídeo. Estos números son los datos cuantitativos subyacentes del diagrama de vectorscopio y corresponden a su posición (en radianes) en la gratícula circular. Haciendo referencia a nuestra imagen de vectorescopio de antes, puedes ver que comenzando en la parte inferior del círculo (0 grados) y moviéndose a la izquierda, los "verdes" comienzan alrededor de los 38 grados, los "amarillos" en los 99 grados, los "rojos" en los 161 grados, los "magentas" en los 218 grados, los "azules" en los 279 grados y los "cianes" en los 341 grados. Una vez que comprendas estos "rangos" de tono, puedes hacerte una idea de cuál es el valor de tono medio para un marco de vídeo con solo mirar este valor numérico. + +Además, ten en cuenta que este valor extraído por el filtro `signalstats` no es una medida absoluta o completa de las cualidades de color de una imagen, sino simplemente un punto de referencia significativo desde el cual podemos explorar una estrategia basada en datos para el análisis de color. La percepción del color y la teoría del color son [áreas complejas y en evolución de la investigación académica](https://colourturn.net/) que incorporan muchas estrategias diferentes de las humanidades, las ciencias sociales y las ciencias cognitivas. Es por eso que debemos tener en cuenta que cualquier estrategia analítica debe tomarse dentro del contexto de estos discursos más amplios y con un espíritu colaborativo y generativo. + +### Visualizando datos de color +Los dos archivos `.csv` que creamos con los comandos anteriores ahora se pueden usar para crear gráficos que visualicen los datos. Hay una serie de plataformas (tanto propietarias como de código abierto) que se pueden usar para lograr esto, como [Microsoft Excel](https://www.wikihow.com/Create-a-Graph-in-Excel), [RawGraphs](https://rawgraphs.io/) y/o [plotly](https://plotly.com/graphing-libraries/). Una discusión en profundidad sobre cómo usar cualquiera de estas plataformas está fuera del alcance de este tutorial; sin embargo, a continuación se muestra la visualización final de los comandos anteriores, que se creó con los archivos `.csv` y plotly. + +{% include figure.html filename="Final_Graph_plotly.png" caption="Gráfico que incluye datos de tono medio de ambos extractos de vídeo" %} + +### Conclusiones +Al observar el gráfico, podemos ver que las trazas de Marte y la Tierra tienen rangos dinámicos muy diferentes en sus valores de tono medio. La traza de Marte es muy limitada y se mantiene dentro de los rangos rojo y amarillo (aproximadamente entre 100 y 160) en la mayoría del extracto. Esto sugiere algo sobre el uso del color en la película como un dispositivo retórico que sirve como mensaje propagandístico. Recuerda que esta sección presenta una visión antipática de la forma de vida y el sistema político marcianos: una población uniforme e infeliz, que depende de tecnología y transporte ineficientes mientras se les exige que observen la obediencia total a un gobernante supremo totalitario. La película conecta esta experiencia negativa con una paleta de tonos relativamente opacos de rojo y amarillo. También deberíamos considerar el público objetivo original de esta película, los jóvenes ciudadanos de los Estados Unidos en la década de 1950, y cómo probablemente habrían interpretado estas imágenes y usos del color en ese momento histórico. En particular, podemos considerar este uso del color en el contexto de las crecientes tensiones geopolíticas entre la Unión Soviética y los Estados Unidos y sus aliados en Europa occidental. El color rojo, específicamente, se usaba comúnmente en los medios impresos y de difusión para describir [la "amenaza" del comunismo global](https://es.wikipedia.org/wiki/Temor_rojo) durante esta era de la historia mundial. Además, la elección de presentar al líder totalitario marciano con una apariencia muy similar al icónico líder soviético [Joseph Stalin](https://es.wikipedia.org/wiki/I%C3%B3sif_Stalin) puede leerse como una señal visual y cultural explícita para la audiencia. Así, esta representación de Marte parece ser una caricatura alegórica de la vida bajo el velo del comunismo, tal como la percibe un observador externo y un oponente político/ideológico. Esta caricatura emplea no solo una paleta de colores limitada, sino una que está cargada con otras referencias culturales. El uso del color aprovecha los prejuicios y asociaciones que están presentes en el imaginario de la audiencia y, por lo tanto, está ligado estrechamente al argumento central de la película, que sostiene que el comunismo no es un sistema político viable. + +En contraste con el uso limitado del color en nuestro extracto de Marte, la traza de la Tierra cubre un rango dinámico mucho más amplio de valores de tono. En este pasaje, el emisario marciano está aprendiendo sobre el maravilloso y rico estilo de vida de los terrícolas gracias a un sistema capitalista y a la explotación de petroleo y de productos derivados de este. La secuencia enfatiza la riqueza material y la libertad empresarial ofrecida bajo un sistema capitalista usando una variedad y vivacidad de color mucho mayor que en el extracto de Marte. Los productos comerciales y las personas se representan utilizando el espectro completo del proceso Technicolor, creando asociaciones positivas entre los resultados de la industria petrolera y el estilo de vida acomodado de quienes se benefician de él. Al igual que el extracto de Marte, a la audiencia se le ofrece una caricatura unilateral de un sistema político y una forma de vida, pero en esta sección la representación reduccionista es laudable y próspera en lugar de desoladora y opresiva. + +Como una pieza de propaganda, *Destination Earth* se basa en estas distinciones poderosas pero demasiado simplistas entre dos sistemas políticos para influir en la opinión pública y promover el consumo de productos derivados del petróleo. La manera en que se usa (o no se usa) el color es una herramienta importante para elaborar y enfatizar este mensaje. Además, una vez que podemos extraer datos de color y visualizarlos utiliza técnicas gráficas simples, podemos ver que la disparidad en el rango dinámico proporciona una medida cuantitativa para vincular el uso técnico y estético del color en esta película animada con la retórica propagandística presentada por sus productores. + +{% include figure.html filename="lovely_oil.png" caption="El petróleo y los ideales estadounidenses de riqueza y prosperidad se expresan en esplendor colorido" %} + +### Escalando el análisis de color con FFprobe +Uno de los límites de esta metodología es que estamos generando manualmente informes de color en un solo archivo a la vez. Si quisiéramos adoptar un enfoque de [visión distante](https://distantviewing.org/) más en línea con las metodologías tradicionales de Humanidades Digitales, podríamos emplear un script de Bash para ejecutar nuestro comando `ffprobe` en todos los archivos en un determinado directorio. Esto es útil si, por ejemplo, un(a) investigador(a) esta interesado en realizar un análisis similar en [todas las películas animadas de John Sutherland encontradas en la colección de Archivos Prelinger](https://archive.org/details/prelinger&tab=collection?and%5B%5D=john+sutherland&sin=) u otro conjunto de material de vídeo de archivo. + +Una vez que tengas un conjunto de material para trabajar guardado en un solo lugar, puedes guardar el siguiente [bucle _for_ de Bash o "for loop"](https://www.shellscript.sh/loops.html) dentro del directorio y ejecutarlo para generar archivos `.csv` que contengan los mismos datos de tono medio a nivel de fotograma que extrajimos de nuestros extractos de *Destination Earth*. + +```bash +for file in *.m4v; do +ffprobe -f lavfi -i movie="$file",signalstats -show_entries frame=pkt_pts_time:frame_tags=lavfi.signalstats.HUEMED -print_format csv > "${file%.m4v}.csv"; +done +``` + +* `for file in *.m4v; do` = inicia el bucle _for_. Esta primera línea le dice a FFmpeg "para todos los archivos en este directorio con la extensión `.m4v`, ejecuta el siguiente comando." +* El `*` es un [comodín de Bash](https://tldp.org/LDP/GNU-Linux-Tools-Summary/html/x11655.htm) adjunto a un tipo de archivo dado para especificarlos como archivos de entrada. +* La palabra `file` es una variable arbitraria que representará cada archivo a medida que se ejecuta a través del bucle. +* `ffprobe -f lavfi -i movie="$file",signalstats -show_entries frame=pkt_pts_time:frame_tags=lavfi.signalstats.HUEMED -print_format csv > "${file%.m4v}.csv"; done` = el mismo comando de extracción de metadatos de color que ejecutamos en nuestros dos extractos de *Destination Earth*, con algunas pequeñas modificaciones en la sintaxis para explicar su uso en varios archivos en un directorio: + * `"$file"` = recuerda cada variable. Las comillas aseguran que se conserva el nombre de archivo original. + * `> "${file%.m4v}.csv";` = conserva el nombre de archivo original al escribir los archivos de salida `.csv`. Esto asegurará que los nombres de los archivos de vídeo originales coincidan con sus correspondientes reportes en `.csv`. + * `done` = termina el script una vez que se hayan completado todos los archivos del directorio. + +
    +También puedes usar signalstats para obtener otra información valiosa relacionada con el color. Consulta la documentación del filtro para obtener una lista completa de las métricas visuales disponibles. +
    + +Una vez que ejecutas este script, verás que cada archivo de vídeo en el directorio ahora tiene un archivo `.csv` correspondiente que contiene el conjunto de datos especificado. + +# En resumen +En este tutorial, hemos aprendido: + * cómo instalar FFmpeg en diferentes sistemas operativos y cómo acceder al _framework_ en el navegador web + * cuál es la sintaxis básica y la estructura de los comandos FFmpeg + * cómo visualizar metadatos técnicos básicos de un archivo audiovisual + * cómo transformar un archivo audiovisual a través de la transcodificación y el "re-wrapping" + * cómo analizar y editar ese archivo audiovisual separando sus componentes ("demux") y crear extractos + * cómo reproducir archivos audiovisuales usando `ffplay` + * cómo crear nuevos archivos de vídeo con vectorscopios integrados + * cómo exportar datos tabulares relacionados con el color de una pista de vídeo usando `ffprobe` + * cómo crear un bucle _for_ de Bash para extraer información de datos de color de múltiples archivos de vídeo con un solo comando + +A un nivel más amplio, este tutorial aspira a proporcionar una introducción informada y atractiva sobre cómo se pueden incorporar las herramientas y metodologías audiovisuales en los proyectos y las prácticas de Humanidades Digitales. Con herramientas abiertas y potentes como FFmpeg, existe un gran potencial para expandir el alcance del campo para incluir tipos de medios y análisis más ricos y complejos que nunca. + +# Más recursos +FFmpeg tiene una comunidad grande y bien apoyada de usarios a través de todo el mundo. Como tal, hay muchos recursos gratuitos y de código abierto para descubir nuevos comandos y técnicas para trabajar con materiales audiovisuales. Por favor, contacta al autor con cualquier adición a esta lista, especialmente si se trata de recursos educativos en español para aprender FFmpeg. + +* [La documentación oficial de FFmpeg](https://www.ffmpeg.org/ffmpeg.html) +* [FFmpeg Wiki](https://trac.ffmpeg.org/wiki/WikiStart) +* [ffmprovisr](https://amiaopensource.github.io/ffmprovisr/) de [La Asociación de Archivistas de Imágenes en Movimiento](https://amianet.org/?lang=es) +* [Entrenamiento de preservación audiovisual de Ashley Blewer](https://training.ashleyblewer.com/) +* [La presentación de Andrew Weaver: "Demystifying FFmpeg"](https://github.com/privatezero/NDSR/blob/master/Demystifying_FFmpeg_Slides.pdf) +* [FFmpeg: Presentación de Ben Turkus](https://docs.google.com/presentation/d/1NuusF948E6-gNTN04Lj0YHcVV9-30PTvkh_7mqyPPv4/present?ueb=true&slide=id.g2974defaca_0_231) +* [FFmpeg Cookbook for Archivists de Reto Kromer](https://avpres.net/FFmpeg/) + +## Programas de código abierto de análisis audiovisual que usan FFmpeg + +* [MediaInfo](https://mediaarea.net/en/MediaInfo) +* [QC Tools](https://bavc.org/preserve-media/preservation-tools) + +# Referencias + +* Champion, E. (2017) “Digital Humanities is text heavy, visualization light, and simulation poor,” Digital Scholarship in the Humanities 32(S1), i25-i32 + +* Hockey, S. (2004) “The History of Humanities Computing,” A Companion to Digital Humanities, ed. Susan Schreibman, Ray Siemens, John Unsworth. Oxford: Blackwell + +Este tutorial fue posible gracias al apoyo de la Academia Británica y fue escrito durante el Taller de _Programming Historian_ desarrollado en la Universidad de Los Andes en Bogotá, Colombia, entre el 31 de julio y 3 de agosto de 2018. diff --git a/es/lecciones/introduccion-a-imageplot-y-la-visualizacion-de-metadatos.md b/es/lecciones/introduccion-a-imageplot-y-la-visualizacion-de-metadatos.md index 8e4708eaa7..60166700d3 100644 --- a/es/lecciones/introduccion-a-imageplot-y-la-visualizacion-de-metadatos.md +++ b/es/lecciones/introduccion-a-imageplot-y-la-visualizacion-de-metadatos.md @@ -41,7 +41,7 @@ Como paso final, utilizarás las medidas generadas a través de ImagePlot para c ### Información de trasfondo #### ¿Qué es ImagePlot? -Esta herramienta fue creada por el equipo de [Software Studies Initiative](http://lab.culturalanalytics.info/) para realizar visualizaciones que puedan abarcar la totalidad de una colección de imágenes y opera dentro de otro programa de libre acceso llamado [ImageJ](https://imagej.net/). Los creadores de ImagePlot utilizan el término "vista a distancia", *distant viewing* en inglés, para describir este tipo de visualización y análisis porque el punto de partida o enfoque principal es lo que se puede percibir de la totalidad de los elementos de la colección, en vez de uno o dos elementos a la vez. +Esta herramienta fue creada por el equipo de [Software Studies Initiative](https://lab.culturalanalytics.info/) para realizar visualizaciones que puedan abarcar la totalidad de una colección de imágenes y opera dentro de otro programa de libre acceso llamado [ImageJ](https://imagej.net/). Los creadores de ImagePlot utilizan el término "vista a distancia", *distant viewing* en inglés, para describir este tipo de visualización y análisis porque el punto de partida o enfoque principal es lo que se puede percibir de la totalidad de los elementos de la colección, en vez de uno o dos elementos a la vez. #### Visualización de metadatos @@ -51,7 +51,7 @@ No obstante, no necesitamos una colección inmensa para sacarle provecho a la vi #### La colección y los metadatos -Para esta lección utilizaremos una serie de imágenes provenientes de una colección de afiches del [Museo de la Palabra y la Imágen](https://web.archive.org/web/20201120143502/http://museo.com.sv/es/) en San Salvador, El Salvador. Los 394 afiches representados en los metadatos fueron creados durante los 12 años de la Guerra Civil de El Salvador (1980–1992), por más de 171 organizaciones de por lo menos 21 países. Los metadatos fueron preparados por personal de MUPI junto con el equipo de [Iniciativas Digitales de América Latina](http://ladi.lib.utexas.edu/)[^1] y la hoja de metadatos que utilizarás en esta lección es una versión modificada de la original. +Para esta lección utilizaremos una serie de imágenes provenientes de una colección de afiches del [Museo de la Palabra y la Imágen](https://web.archive.org/web/20201120143502/https://museo.com.sv/es/) en San Salvador, El Salvador. Los 394 afiches representados en los metadatos fueron creados durante los 12 años de la Guerra Civil de El Salvador (1980–1992), por más de 171 organizaciones de por lo menos 21 países. Los metadatos fueron preparados por personal de MUPI junto con el equipo de [Iniciativas Digitales de América Latina](https://ladi.lib.utexas.edu/)[^1] y la hoja de metadatos que utilizarás en esta lección es una versión modificada de la original. ## Imageplot Para utilizar ImagePlot debes comenzar descargando [ImageJ](https://imagej.net/ij/download.html) y luego descargar los cuatro macros de nuestro [repositorio en GitHub](https://github.com/programminghistorian/jekyll/tree/Issue-3275/assets/introduccion-a-imageplot-y-la-visualizacion-de-metadatos/)[^2]. Para organizar los archivos, puedes crear una carpeta con el nombre que prefieras para guardar los cuatro macros en un solo sitio. diff --git a/es/lecciones/introduccion-a-markdown.md b/es/lecciones/introduccion-a-markdown.md index 11b5a51ea6..e922ed02a7 100644 --- a/es/lecciones/introduccion-a-markdown.md +++ b/es/lecciones/introduccion-a-markdown.md @@ -39,7 +39,7 @@ Dado que las lecciones de *The Programming Historian en español* deben ser envi ### ¿Qué es Markdown? -Markdown fue desarrollado en 2004 por [John Gruber](http://daringfireball.net/projects/markdown/), y se refiere tanto a (1) una manera de formar archivos de texto, como a (2) una utilidad del lenguaje de programación Perl para convertir archivos Markdown en HTML. En esta lección nos centraremos en la primera acepción y aprenderemos a escribir archivos utilizando la sintaxis de Markdown. +Markdown fue desarrollado en 2004 por [John Gruber](https://daringfireball.net/projects/markdown/), y se refiere tanto a (1) una manera de formar archivos de texto, como a (2) una utilidad del lenguaje de programación Perl para convertir archivos Markdown en HTML. En esta lección nos centraremos en la primera acepción y aprenderemos a escribir archivos utilizando la sintaxis de Markdown. Los archivos de texto plano tienen muchas ventajas sobre otro tipo de formato. Por un lado, se pueden leer prácticamente en todos los dispositivos. También han resistido la prueba del paso del tiempo mejor que otro tipo de archivos -si alguna vez has intentado abrir un documento guardado en un formato de [procesador de textos heredado](https://es.wikipedia.org/wiki/Sistema_heredado), estarás familiarizado con los problemas de compatibilidad que implican-. @@ -325,7 +325,7 @@ Aunque Markdown se está haciendo cada vez más popular, particularmente para lo Markdown es un término medio muy útil entre los archivos de texto plano sin estilo y los documentos de procesadores de texto heredados. Su sintaxis simple se aprende rápidamente y es altamente legible en el mismo documento y cuando se transforma en HTML u otro tipo de documentos. En conclusión, escribir tus documentos en Markdown significa que serán capaces de ser utilizados y leídos a largo plazo. -[John Gruber]: http://daringfireball.net/projects/markdown/ +[John Gruber]: https://daringfireball.net/projects/markdown/ [Autoría sustentable utilizando Pandoc y Markdown]: /lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown [StackEdit]: https://stackedit.io [editor de StackEdit]: https://stackedit.io/editor diff --git a/es/lecciones/introduccion-al-web-scraping-usando-r.md b/es/lecciones/introduccion-al-web-scraping-usando-r.md index 41c1264d38..be12940c70 100644 --- a/es/lecciones/introduccion-al-web-scraping-usando-r.md +++ b/es/lecciones/introduccion-al-web-scraping-usando-r.md @@ -162,7 +162,7 @@ Existe otro lugar en el que podemos encontrar información sobre cómo interactu Este archivo está pensado principalmente para robots que hacen extracciones masivas del contenido de algunas páginas. Sin embargo, en él encontraremos información relevante para tareas más discretas, como las que realizaremos en esta serie de lecciones. -El documento robots.txt se encuentra en el directorio raíz de un sitio web, por lo tanto, en caso de estar disponible, podemos acceder a su contenido agregando "robots.txt" luego de la url principal. Por ejemplo, si quisiéramos revisar la versión de este archivo del sitio web del proyecto [Memoria Chilena](http://www.memoriachilena.gob.cl/) de la Biblioteca Nacional de Chile, tendríamos que escribir: `http://www.memoriachilena.gob.cl/robots.txt`. Eso nos llevará a [una página](https://perma.cc/37MD-HP8Y) con el siguiente contenido: +El documento robots.txt se encuentra en el directorio raíz de un sitio web, por lo tanto, en caso de estar disponible, podemos acceder a su contenido agregando "robots.txt" luego de la url principal. Por ejemplo, si quisiéramos revisar la versión de este archivo del sitio web del proyecto [Memoria Chilena](https://www.memoriachilena.gob.cl/) de la Biblioteca Nacional de Chile, tendríamos que escribir: `http://www.memoriachilena.gob.cl/robots.txt`. Eso nos llevará a [una página](https://perma.cc/37MD-HP8Y) con el siguiente contenido: ``` User-agent: * diff --git a/es/lecciones/introduccion-datos-abiertos-enlazados.md b/es/lecciones/introduccion-datos-abiertos-enlazados.md index 0730302736..be0a1d5516 100644 --- a/es/lecciones/introduccion-datos-abiertos-enlazados.md +++ b/es/lecciones/introduccion-datos-abiertos-enlazados.md @@ -47,7 +47,7 @@ Con el fin de proporcionar a los lectores una base sólida de los principios bá 1. La [web semántica](https://es.wikipedia.org/wiki/Web_sem%C3%A1ntica) y el [razonamiento semántico](https://en.wikipedia.org/wiki/Semantic_reasoner) de [conjuntos de datos](https://es.wikipedia.org/wiki/Conjunto_de_datos). Un razonador semántico deduciría que Jorge VI es el hermano o medio hermano de Eduardo VIII, dado el hecho de que a) Eduardo VIII es el hijo de Jorge V y b) Jorge VI es el hijo de Jorge V. Este tutorial no se centra en este tipo de tareas. -2. La creación y subida de conjuntos de datos abiertos enlazados a la [nube de datos enlazados](http://linkeddatacatalog.dws.informatik.uni-mannheim.de/state/). Compartir tus LOD es un principio importante, al que se anima más adelante. Sin embargo, los aspectos prácticos de contribuir con tus LOD a la nube de datos enlazados está fuera del alcance de esta lección. Al final de este tutorial hay algunos recursos disponibles que pueden ayudarte a comenzar con esta tarea. +2. La creación y subida de conjuntos de datos abiertos enlazados a la [nube de datos enlazados](https://linkeddatacatalog.dws.informatik.uni-mannheim.de/state/). Compartir tus LOD es un principio importante, al que se anima más adelante. Sin embargo, los aspectos prácticos de contribuir con tus LOD a la nube de datos enlazados está fuera del alcance de esta lección. Al final de este tutorial hay algunos recursos disponibles que pueden ayudarte a comenzar con esta tarea. ## Datos abiertos enlazados: ¿qué son? LOD es información estructurada en un formato destinado a las máquinas y, por tanto, no es necesariamente fácil de entender a primera vista. No te desanimes por esto, ya que una vez que entiendas los principios, puedes conseguir que una máquina los lea por ti. @@ -72,7 +72,7 @@ Vamos a crear un ejemplo con Jack Straw. Con este nombre propio podemos referirn persona=64183282 -A continuación, vamos a identificar al Jack Straw descrito por el *[Oxford Dictionary of National Biography](http://www.oxforddnb.com)* como 'el enigmático líder rebelde' con el número `33059614`. En consecuencia, su par atributo-valor sería el siguiente: +A continuación, vamos a identificar al Jack Straw descrito por el *[Oxford Dictionary of National Biography](https://www.oxforddnb.com)* como 'el enigmático líder rebelde' con el número `33059614`. En consecuencia, su par atributo-valor sería el siguiente: persona=33059614 @@ -82,7 +82,7 @@ Los pares atributo-valor también pueden almacenar información sobre otros tipo lugar=2655524 -En este momento podrías estar pensando, "esto es lo que hace el catálogo de la biblioteca". Es cierto que la idea clave aquí es la de [control de autoridades](https://es.wikipedia.org/wiki/Control_de_autoridades), que es central en biblioteconomía (un fichero de autoridad es una lista cerrada de términos que pueden ser utilizados en un contexto particular, por ejemplo cuando se cataloga un libro). En ambos ejemplos mencionados anteriormente, hemos utilizado los ficheros de autoridad para asignar los números (los identificadores únicos) a los Jacks y a Blackburn. Los números que utilizamos para los dos Jack Straws provienen del [Virtual International Authority File - Archivo de Autoridades Internacional Virtual](https://viaf.org) (VIAF), que es mantenido por un consorcio de bibliotecas de todo el mundo para tratar de abordar el problema de la miríada de formas en las que una misma persona podría ser nombrada. El identificador único que utilizamos para el distrito electoral de Blackburn provino de [GeoNames](http://www.geonames.org/), una base de datos geográfica gratuita. +En este momento podrías estar pensando, "esto es lo que hace el catálogo de la biblioteca". Es cierto que la idea clave aquí es la de [control de autoridades](https://es.wikipedia.org/wiki/Control_de_autoridades), que es central en biblioteconomía (un fichero de autoridad es una lista cerrada de términos que pueden ser utilizados en un contexto particular, por ejemplo cuando se cataloga un libro). En ambos ejemplos mencionados anteriormente, hemos utilizado los ficheros de autoridad para asignar los números (los identificadores únicos) a los Jacks y a Blackburn. Los números que utilizamos para los dos Jack Straws provienen del [Virtual International Authority File - Archivo de Autoridades Internacional Virtual](https://viaf.org) (VIAF), que es mantenido por un consorcio de bibliotecas de todo el mundo para tratar de abordar el problema de la miríada de formas en las que una misma persona podría ser nombrada. El identificador único que utilizamos para el distrito electoral de Blackburn provino de [GeoNames](https://www.geonames.org/), una base de datos geográfica gratuita. Pero intentemos ser más precisos por lo que entendemos por Blackburn en este caso. Jack Straw ejerció su cargo parlamentario en representación de Blackburn (que cuenta con un solo miembro en el parlamento británico). Los límites de Blackburn han cambiado con el paso del tiempo, así que en el proyecto '[Digging Into Linked Parliamentary Data](https://repository.jisc.ac.uk/6544/)' (Dilipad) (en el que trabajé) se crearon identificadores únicos para las afiliaciones a partidos y para los distritos electorales de cada miembro del parlamento. En este ejemplo, Jack Straw representó a la circunscripción conocida como 'Blackburn' en su encarnación posterior a 1955: @@ -126,18 +126,18 @@ En la sección anterior usamos dos números distintos para identificar nuestros El problema es que en todo el mundo hay muchas bases de datos que contienen personas con estos números, y probablemente sean personas diferentes. Fuera de nuestro contexto inmediato, estas cifras no identifican individuos únicos. Tratemos de arreglar eso. Aquí están estos mismos identificadores pero como URI: - http://viaf.org/viaf/64183282/ - http://viaf.org/viaf/33059614/ + https://viaf.org/viaf/64183282/ + https://viaf.org/viaf/33059614/ Así como el número único desambiguó nuestros dos Jack Straws, el URI completo anterior nos ayuda a eliminar la ambigüedad entre todos los diferentes archivos de autoridad que existen. En este caso, está claro que estamos usando VIAF como nuestro archivo de autoridad. Ya has visto esta forma de desambiguación muchas veces en la web. Hay muchos sitios web alrededor del mundo con páginas llamadas `/home` o `/faq`. Pero no hay confusión porque el [dominio](https://es.wikipedia.org/wiki/Dominio_de_Internet) (la primera parte del Localizador Uniforme de Recursos (URL) - por ejemplo,`bbc.co.uk`) es único y, por lo tanto, todas las páginas que son parte de ese dominio son únicas, diferenciándose de otras páginas `/faq` de otros sitios web. En la dirección `http://www.bbc.co.uk/faqs`, es la parte `bbc.co.uk` la que hace únicas las páginas siguientes. Esto es tan obvio para las personas que usan la web todo el tiempo que no piensan en ello. Probablemente también sepas que si quieres iniciar un sitio web llamado `bbc.co.uk` no puedes hacerlo, porque ese nombre ya se ha registrado con la autoridad correspondiente, que es el [Sistema de Nombres de Dominio](https://es.wikipedia.org/wiki/Sistema_de_nombres_de_dominio) (Domain Name System - DNS). El registro garantiza la unicidad. Los URIs también deben ser únicos. Si bien los ejemplos anteriores se parecen a las URLs, es posible también construir un URI que no se parezca en nada a una URL. Tenemos muchas maneras de identificar personas y cosas de manera única y rara vez lo pensamos o nos preocupamos de ello. Los códigos de barras, los números de pasaporte e incluso tu dirección postal están diseñados para ser únicos. En el mundo desarrollado los números de teléfono móvil se colocan con frecuencia en los carteles de las tiendas precisamente porque son únicos. Todos ellos podrían usarse como URIs. -Cuando quisimos crear URIs para las entidades descritas por el proyecto '[Tobias](http://www.history.ac.uk/projects/digital/tobias)', elegimos una estructura tipo URL y elegimos utilizar nuestro espacio web institucional, dejando de lado `data.history.ac.uk/tobias-project/` como un lugar dedicado a alojar estos URI. Al ponerlo en `data.history.ac.uk` en lugar de en `history.ac.uk`, hubo una separación clara entre los URI y las páginas del sitio web. Por ejemplo, uno de los URIs del proyecto Tobias era http://data.history.ac.uk/tobias-project/person/15601. Si bien el formato de los URI mencionados anteriormente es el mismo que el de una URL, no se vinculan a páginas web (intenta pegarlas en un navegador web). Muchas personas nuevas con los LOD encuentran esto confuso. Todas las URL son URI, pero no todas las URI son URL. Una URI puede describir cualquier cosa, mientras que una URL describe la ubicación de algo en la web. Es decir, una URL te dice la ubicación de una página web o un archivo o algo similar. Un URI simplemente hace el trabajo de identificar algo. Así como el Número Estándar Internacional de Libro, o [ISBN](https://www.iso.org/standard/36563.html) 978-0-1-873354-6 identifica de manera única una edición de tapa dura de _Bautismo, Hermandad y Creencias en la Reforma de Alemania_ por Kat Hill, pero no te dice dónde conseguir una copia. Para eso, necesitarías algo como una [signatura](https://www.upo.es/biblioteca/guia_loc_sig/signatura/index.html), que te da una ubicación exacta en un estante de una biblioteca específica. +Cuando quisimos crear URIs para las entidades descritas por el proyecto '[Tobias](https://www.history.ac.uk/projects/digital/tobias)', elegimos una estructura tipo URL y elegimos utilizar nuestro espacio web institucional, dejando de lado `data.history.ac.uk/tobias-project/` como un lugar dedicado a alojar estos URI. Al ponerlo en `data.history.ac.uk` en lugar de en `history.ac.uk`, hubo una separación clara entre los URI y las páginas del sitio web. Por ejemplo, uno de los URIs del proyecto Tobias era https://data.history.ac.uk/tobias-project/person/15601. Si bien el formato de los URI mencionados anteriormente es el mismo que el de una URL, no se vinculan a páginas web (intenta pegarlas en un navegador web). Muchas personas nuevas con los LOD encuentran esto confuso. Todas las URL son URI, pero no todas las URI son URL. Una URI puede describir cualquier cosa, mientras que una URL describe la ubicación de algo en la web. Es decir, una URL te dice la ubicación de una página web o un archivo o algo similar. Un URI simplemente hace el trabajo de identificar algo. Así como el Número Estándar Internacional de Libro, o [ISBN](https://www.iso.org/standard/36563.html) 978-0-1-873354-6 identifica de manera única una edición de tapa dura de _Bautismo, Hermandad y Creencias en la Reforma de Alemania_ por Kat Hill, pero no te dice dónde conseguir una copia. Para eso, necesitarías algo como una [signatura](https://www.upo.es/biblioteca/guia_loc_sig/signatura/index.html), que te da una ubicación exacta en un estante de una biblioteca específica. Hay un poco de jerga alrededor de los URIs. La gente habla de si son, o no, [desreferenciables](https://es.wikipedia.org/wiki/Referencia_(inform%C3%A1tica)). Eso solo significa que *¿se puede pasar desde una referencia abstracta a otra cosa?* Por ejemplo, si pegas un URI en la barra de direcciones de un navegador, ¿devolverá algo? El URI de VIAF para el historiador Simon Schama es: - http://viaf.org/viaf/46784579 + https://viaf.org/viaf/46784579 Si lo pones en el navegador, obtendrás una página web sobre Simon Schama que contiene datos estructurados sobre él y su historial de publicaciones. Esto es muy útil, pero, por otro lado, no es obvio desde la URI a quién o incluso a qué se refiere. Del mismo modo, si tratamos un número de teléfono móvil (con código internacional) como URI para una persona, entonces debería ser desreferenciable. Alguien podría responder el teléfono, e incluso podría ser Schama. @@ -162,10 +162,10 @@ Estamos poniendo ejemplos simplemente con el fin de ilustrar, pero si deseas enl Una ontología es más flexible porque no es jerárquica. Su objetivo es representar la fluidez del mundo real, donde las cosas se pueden relacionar entre sí de formas más complejas que las representadas por una estructura jerárquica de tipo arbóreo. En cambio, una ontología es más como una tela de araña. -Sea lo que sea que desees representar con los LOD, te sugerimos que busques un vocabulario existente y lo uses, en lugar de intentar escribir el tuyo propio. Esta página principal incluye [una lista de algunos de los vocabularios más populares](http://semanticweb.org/wiki/Main_Page.html) +Sea lo que sea que desees representar con los LOD, te sugerimos que busques un vocabulario existente y lo uses, en lugar de intentar escribir el tuyo propio. Esta página principal incluye [una lista de algunos de los vocabularios más populares](https://semanticweb.org/wiki/Main_Page.html) > N.T.: desplázate hacia la zona derecha/abajo de la página: "Popular Vocabularies" -Dado que nuestro anterior ejemplo se centra en los pianistas, sería una buena idea encontrar una ontología adecuada en lugar de crear nuestro propio sistema. De hecho, hay [una ontología para la música](http://web.archive.org/web/20170715094229/http://www.musicontology.com/). Además de una especificación bien desarrollada, tiene también algunos ejemplos útiles de su uso. Puedes echar un vistazo a las páginas de [Introducción](http://web.archive.org/web/20170718143925/http://musicontology.com/docs/getting-started.html) para tener una idea de cómo puedes usar esa ontología particular. +Dado que nuestro anterior ejemplo se centra en los pianistas, sería una buena idea encontrar una ontología adecuada en lugar de crear nuestro propio sistema. De hecho, hay [una ontología para la música](https://web.archive.org/web/20170715094229/https://www.musicontology.com/). Además de una especificación bien desarrollada, tiene también algunos ejemplos útiles de su uso. Puedes echar un vistazo a las páginas de [Introducción](https://web.archive.org/web/20170718143925/https://musicontology.com/docs/getting-started.html) para tener una idea de cómo puedes usar esa ontología particular. Lamentablemente, no encuentro nada que describa la relación entre un profesor y un alumno en Music Ontology. Pero la ontología se publica en abierto, así que puedo usarla para describir otras características de la música y luego crear mi propia extensión. Si luego publico mi extensión en abierto, otros pueden usarla si lo desean y puede convertirse en un estándar. Si bien el proyecto Music Ontology no tiene la relación que necesito, el proyecto [Linked Jazz](https://linkedjazz.org/) permite el uso de 'mentorDe', que parece que podría funcionar bien en nuestro caso. Aunque esta no es la solución ideal, conviene esforzarse por usar lo que ya existe. @@ -174,7 +174,7 @@ Ahora bien, si estuvieras estudiando la historia de los pianistas, querrías ide Dame los nombres de todos los pianistas enseñados por x donde x fue enseñado a tocar el piano por Liszt -La consulta devolvería todas las personas en el conjunto de datos que fueron alumnos de un alumno de Liszt. No nos entusiasmemos demasiado: esta consulta no nos dará a cada alumno de cada alumno de Liszt que haya existido alguna vez porque esa información probablemente no exista y no exista dentro de ningún grupo de tripletas existente. Lidiar con datos del mundo real muestra todo tipo de omisiones e inconsistencias que veremos cuando analicemos el mayor conjunto de LOD, [DBpedia](http://wiki.dbpedia.org), en la sección final. +La consulta devolvería todas las personas en el conjunto de datos que fueron alumnos de un alumno de Liszt. No nos entusiasmemos demasiado: esta consulta no nos dará a cada alumno de cada alumno de Liszt que haya existido alguna vez porque esa información probablemente no exista y no exista dentro de ningún grupo de tripletas existente. Lidiar con datos del mundo real muestra todo tipo de omisiones e inconsistencias que veremos cuando analicemos el mayor conjunto de LOD, [DBpedia](https://wiki.dbpedia.org), en la sección final. Si has utilizado [bases de datos relacionales](https://en.wikipedia.org/wiki/Relational_database), podrías pensar que pueden realizar la misma función. En el caso de Liszt, la información sobre pianistas descrita anteriormente podría organizarse en una [tabla](https://es.wikipedia.org/wiki/Base_de_datos_relacional) de base de datos llamada algo así como 'Alumnos'. @@ -211,19 +211,19 @@ Reconocer qué serialización estás viendo significa que puedes elegir las herr Turtle usa alias o atajos conocidos como [prefijos](https://www.w3.org/TeamSubmission/turtle/#sec-tutorial), lo que nos ahorra tener que escribir URIs completos todo el tiempo. Regresemos al URI que inventamos en la sección anterior: - http://data.history.ac.uk/tobias-project/person/15601 + https://data.history.ac.uk/tobias-project/person/15601 No queremos escribir esto cada vez que nos referimos a esta persona (Jack Straw, como recordarás). Entonces sólo tenemos que anunciar nuestro atajo: - @prefix toby: . + @prefix toby: . Así, Jack es `toby:15601`, que reemplaza el URI largo y es más fácil de leer. He elegido 'toby', pero podría haber elegido cualquier cadena de letras con la misma facilidad. Pasemos ahora de Jack Straw a William Shakespeare y usemos Turtle para describir algunos elementos sobre sus obras. Tendremos que decidir qué archivos de autoridad usar, un proceso que, como se mencionó anteriormente, se optimiza si consultamos otros conjuntos de LOD. Aquí usaremos [Dublin Core](https://es.wikipedia.org/wiki/Dublin_Core), un estándar de [metadatos](https://es.wikipedia.org/wiki/Metadato) usado por las bibliotecas, como uno de nuestros prefijos, el archivo de autoridad del [Número de control de la Biblioteca del Congreso](https://es.wikipedia.org/wiki/Library_of_Congress_Control_Number) para otro, y el último (VIAF) debería serte familiar. En conjunto, estos tres archivos de autoridad proporcionan identificadores únicos para todas las entidades que planeo usar en este ejemplo: - @prefix lccn: . - @prefix dc: . - @prefix viaf: . + @prefix lccn: . + @prefix dc: . + @prefix viaf: . lccn:n82011242 dc:creator viaf:96994048 . @@ -233,9 +233,9 @@ En el ejemplo anterior, lccn: n82011242 representa a Macbeth; dc: creator vincul Turtle también te permite listar tripletas sin molestarte en repetir cada URI cuando acabas de usarlo. Agreguemos la fecha en la que los expertos creen que Macbeth fue escrita utilizando el par atributo-valor de Dublin Core:`dc: created 'YYYY'` : - @prefix lccn: . - @prefix dc: . - @prefix viaf: . + @prefix lccn: . + @prefix dc: . + @prefix viaf: . lccn: n82011242 dc: creator viaf: 96994048 ; dc: created "1606" . @@ -252,11 +252,11 @@ Puedes usar un punto y coma si el sujeto es el mismo pero el predicado y el obje Aquí estamos diciendo que Shakespeare (96994048) y John Fletcher (12323361) fueron los creadores de la obra *Los dos nobles caballeros*. -Cuando anteriormente vimos las ontologías, sugerí que le echaras un vistazo a los ejemplos de la [Music Ontology](http://web.archive.org/web/20170718143925/http://musicontology.com/docs/getting-started.html). Espero que no te decepcionaran. Echa un vistazo de nuevo ahora. Todavía es algo complicado, pero ¿tiene más sentido ahora? +Cuando anteriormente vimos las ontologías, sugerí que le echaras un vistazo a los ejemplos de la [Music Ontology](https://web.archive.org/web/20170718143925/https://musicontology.com/docs/getting-started.html). Espero que no te decepcionaran. Echa un vistazo de nuevo ahora. Todavía es algo complicado, pero ¿tiene más sentido ahora? Una de las ontologías más accesibles es Friend of a Friend, o [FOAF](https://es.wikipedia.org/wiki/FOAF). Está diseñada para describir personas y es, quizás por esa razón, bastante intuitiva. Si, por ejemplo, deseas escribirme para decirme que este tutorial es lo mejor que has leído, aquí está mi dirección de correo electrónico expresada como tripletas en FOAF: - @prefix foaf: . + @prefix foaf: . :"Jonathan Blaney" foaf:mbox . #### RDF/XML @@ -267,10 +267,10 @@ En contraste con Turtle, RDF/XML puede parecer un poco pesado. Para empezar, con En RDF/XML, con los prefijos declarados dentro del fragmento XML, es así: - - - + + + @@ -278,29 +278,29 @@ El formato RDF/XML tiene la misma información básica que Turtle, pero se escri Pasemos a un ejemplo diferente para mostrar cómo RDF/XML combina tripletas y, al mismo tiempo, presentamos [SKOS](https://es.wikipedia.org/wiki/Simple_Knowledge_Organization_System) (Simple Knowledge Organization System - Sistema Simple de Organización del Conocimiento), que está diseñado para codificar tesauros o taxonomías. - + Abdication -Aquí estamos diciendo que el concepto SKOS `21250`, abdicación, tiene una etiqueta preferida de "abdicación". La forma en que funciona es que el elemento sujeto (incluida la parte de abdicación, que es un valor de atributo en términos XML) tiene el predicado y el objeto anidados dentro de él. El elemento anidado es el predicado y el nodo hoja [(*the leaf node*)](https://es.wikipedia.org/wiki/%C3%81rbol_(inform%C3%A1tica)#Terminolog.C3.ADa), es el objeto. Este ejemplo está tomado de un proyecto para publicar un [tesauro de historia británica e irlandesa](http://www.history.ac.uk/projects/digital/tobias). +Aquí estamos diciendo que el concepto SKOS `21250`, abdicación, tiene una etiqueta preferida de "abdicación". La forma en que funciona es que el elemento sujeto (incluida la parte de abdicación, que es un valor de atributo en términos XML) tiene el predicado y el objeto anidados dentro de él. El elemento anidado es el predicado y el nodo hoja [(*the leaf node*)](https://es.wikipedia.org/wiki/%C3%81rbol_(inform%C3%A1tica)#Terminolog.C3.ADa), es el objeto. Este ejemplo está tomado de un proyecto para publicar un [tesauro de historia británica e irlandesa](https://www.history.ac.uk/projects/digital/tobias). Al igual que con Turtle, podemos agregar más tripletas. Entonces, declaremos que el término más restringido en nuestra jerarquía de temas, uno más abajo de *Abdicación* será *Crisis de la abdicación (1936)*. - + Abdication - - + + ¿Recuerdas cómo los predicados y los objetos están anidados dentro del sujeto? Aquí lo hemos hecho dos veces con el mismo sujeto, por lo que podemos hacer esto menos detallado al anidar ambos conjuntos de predicados y objetos dentro de un sujeto: - + Abdication - + -Si estás familiarizado con XML, esto será muy fácil para ti. Si no lo estás, podrías preferir un formato como Turtle. Pero la ventaja aquí es que creando tu RDF/XML puedes usar las herramientas habituales disponibles para XML, como editores y analizadores XML, para verificar que tu RDF/XML esté formateado correctamente. Si no tienes experiencia con XML, recomiendo Turtle, para lo que puedes usar una herramienta [en línea](http://www.easyrdf.org/converter) para verificar que tu sintaxis sea correcta. +Si estás familiarizado con XML, esto será muy fácil para ti. Si no lo estás, podrías preferir un formato como Turtle. Pero la ventaja aquí es que creando tu RDF/XML puedes usar las herramientas habituales disponibles para XML, como editores y analizadores XML, para verificar que tu RDF/XML esté formateado correctamente. Si no tienes experiencia con XML, recomiendo Turtle, para lo que puedes usar una herramienta [en línea](https://www.easyrdf.org/converter) para verificar que tu sintaxis sea correcta. ## Consultas RDF con SPARQL @@ -312,7 +312,7 @@ Como mencioné al principio, *The Programming Historian en español* tiene [una Vamos a ejecutar nuestras consultas SPARQL en [DBpedia](https://es.wikipedia.org/wiki/DBpedia), que es un gran conjunto de LOD derivado de Wikipedia. Además de estar lleno de información que es muy difícil de encontrar a través de la interfaz habitual de Wikipedia, tiene varios "puntos finales" SPARQL: interfaces donde puedes escribir consultas SPARQL y obtener resultados de las tripletas de DBpedia. -El punto de entrada (*endpoint*) de consulta SPARQL que yo uso se llama [snorql](http://dbpedia.org/snorql/). Estos puntos de entrada a veces parecen desconectarse, por lo que, si ese fuera el caso, busca *dbpedia sparql* en internet para encontrar un reemplazo similar. +El punto de entrada (*endpoint*) de consulta SPARQL que yo uso se llama [snorql](https://dbpedia.org/snorql/). Estos puntos de entrada a veces parecen desconectarse, por lo que, si ese fuera el caso, busca *dbpedia sparql* en internet para encontrar un reemplazo similar. Si vas a la URL snorql indicada antes, verás que al principio ya están declarados varios prefijos, lo cual te será útil. También reconocerás ya algunos de los prefijos. {% include figure.html filename="en-or-intro-to-linked-data-03.png" caption="Figura 3. Cuadro de consulta predeterminado de snorql, con algunos prefijos declarados para ti." %} @@ -356,7 +356,7 @@ Volvamos a los resultados de la consulta que ejecuté hace un momento: Puedo ver una larga lista en la columna etiquetada como _c_ . Estos son todos los atributos que Roper tiene en la DBpedia y nos ayudarán a encontrar otras personas con estos atributos. Por ejemplo, puedo ver ```http://dbpedia.org/class/yago/Historian110177150```. ¿Puedo usar esto para obtener una lista de historiadores? Voy a poner esto en mi consulta pero en tercer lugar (porque ahí es donde estaba cuando lo encontré en los resultados de Lyndal Roper). Mi consulta se ve así: SELECT * WHERE { - ?historian_name ?predicate + ?historian_name ?predicate } He hecho un pequeño cambio aquí. Si esta consulta funciona entonces espero que mis historiadores estén en la primera columna, porque "historiador" no parece ser un predicado: no funciona como un verbo en una oración; así que voy a llamar a mi primera columna de resultados 'nombre_historiador' y a mi segunda (de la que no sé nada) 'predicado'. @@ -365,13 +365,13 @@ Ejecuta la consulta. ¿Te funciona? Yo obtuve una gran lista de historiadores. {% include figure.html filename="en-or-intro-to-linked-data-05.png" caption="Figura 5. Historiadores, según DBpedia." %} -Así que esto funciona para crear listas, lo cual es útil, pero sería mucho más potente combinar listas, para obtener intersecciones de conjuntos. Encontré un par de cosas más que podrían ser interesantes para consultar en los atributos de DBpedia de Lyndal Roper: y . Es muy fácil combinarlos pidiendo una variable que retornará (en nuestro caso, esta es `?name`) y luego usar eso en múltiples líneas de una consulta. Ten en cuenta también el espacio y el punto al final de la primera línea que comienza con `?name`: +Así que esto funciona para crear listas, lo cual es útil, pero sería mucho más potente combinar listas, para obtener intersecciones de conjuntos. Encontré un par de cosas más que podrían ser interesantes para consultar en los atributos de DBpedia de Lyndal Roper: y . Es muy fácil combinarlos pidiendo una variable que retornará (en nuestro caso, esta es `?name`) y luego usar eso en múltiples líneas de una consulta. Ten en cuenta también el espacio y el punto al final de la primera línea que comienza con `?name`: SELECT ?name WHERE { - ?name ?b . - ?name ?b + ?name ?b . + ?name ?b } ¡Funciona! Obtengo cinco resultados. En el momento de escribir, hay cinco historiadoras británicas en *DBpedia*... @@ -392,7 +392,7 @@ Sin embargo, a pesar de sus inconsistencias, *DBpedia* es un gran lugar para apr * Bob DuCharme, *Learning SPARQL*, O'Reilly, 2011 -* El blog de [Bob DuCharme](http://www.snee.com/bobdc.blog/) merece la pena leerlo también. +* El blog de [Bob DuCharme](https://www.snee.com/bobdc.blog/) merece la pena leerlo también. * Richard Gartner, *Metadata: Shaping Knowledge from Antiquity to the Semantic Web*, Springer, 2016 @@ -400,19 +400,19 @@ Sin embargo, a pesar de sus inconsistencias, *DBpedia* es un gran lugar para apr * Matthew Lincoln ['Using SPARQL to access Linked Open Data'](/en/lessons/graph-databases-and-SPARQL) -* [Linked Data guides and tutorials](http://linkeddata.org/guides-and-tutorials) +* [Linked Data guides and tutorials](https://linkeddata.org/guides-and-tutorials) * Dominic Oldman, Martin Doerr y Stefan Gradmann, "Zen and the Art of Linked Data: New Strategies for a Semantic Web of Humanist Knowledge", in *A New Companion to Digital Humanities*, editado por Susan Schreibman et al. -* Max Schmachtenberg, Christian Bizer y Heiko Paulheim, [State of the LOD Cloud 2017](http://linkeddatacatalog.dws.informatik.uni-mannheim.de/state/) +* Max Schmachtenberg, Christian Bizer y Heiko Paulheim, [State of the LOD Cloud 2017](https://linkeddatacatalog.dws.informatik.uni-mannheim.de/state/) * David Wood, Marsha Zaidman y Luke Ruth, *Linked Data: Structured data on the Web*, Manning, 2014 -* Biblioteca del Congreso Nacional de Chile, [Linked Open Data: ¿Qué es?](http://datos.bcn.cl/es/informacion/que-es) +* Biblioteca del Congreso Nacional de Chile, [Linked Open Data: ¿Qué es?](https://datos.bcn.cl/es/informacion/que-es) * Ana-Isabel Torre-Bastida, Marta González-Rodríguez y Esther Villar-Rodríguez, [Datos abiertos enlazados (LOD) y su implantación en bibliotecas: iniciativas y tecnologías](https://web.archive.org/web/20180720105638/https://recyt.fecyt.es/index.php/EPI/article/download/epi.2015.mar.04/18804) ## Agradecimientos -El autor del tutorial agradece a los revisores del tutorial original, Matthew Lincoln y a Terhi Nurmikko-Fuller, y al editor, Admam Cyrmble, por dedicar tiempo generosamente a ayudarle a mejorar este tutorial con numerosas sugerencias, aclaraciones y correcciones. Esta lección se basa en un trabajo perteneciente al “Tesauro de historia Británica e Irlandsesa como SKOS” (proyecto [Tobias](http://www.history.ac.uk/projects/digital/tobias)), financiado por el [AHRC](http://www.ahrc.ac.uk/). Ha sido revisado para *The Programming Historian*. +El autor del tutorial agradece a los revisores del tutorial original, Matthew Lincoln y a Terhi Nurmikko-Fuller, y al editor, Admam Cyrmble, por dedicar tiempo generosamente a ayudarle a mejorar este tutorial con numerosas sugerencias, aclaraciones y correcciones. Esta lección se basa en un trabajo perteneciente al “Tesauro de historia Británica e Irlandsesa como SKOS” (proyecto [Tobias](https://www.history.ac.uk/projects/digital/tobias)), financiado por el [AHRC](https://www.ahrc.ac.uk/). Ha sido revisado para *The Programming Historian*. diff --git a/es/lecciones/introduccion-e-instalacion.md b/es/lecciones/introduccion-e-instalacion.md index 47049b4c09..802c3c4072 100644 --- a/es/lecciones/introduccion-e-instalacion.md +++ b/es/lecciones/introduccion-e-instalacion.md @@ -68,7 +68,7 @@ Con el fin de trabajar con las diversas técnicas que se ofrecen en *Programming [Lenguaje de programación Python]: https://www.python.org [Analizador sintáctico (*parser*) Beautiful Soup HTML/XML]: https://www.crummy.com/software/BeautifulSoup/ -[Komodo Edit]: http://komodoide.com/komodo-edit/ +[Komodo Edit]: https://komodoide.com/komodo-edit/ [Editores de Texto para Python]: https://wiki.python.org/moin/PythonEditors/ [Zotero]: https://www.zotero.org [Jungle Disk]: https://www.jungledisk.com diff --git a/es/lecciones/introduccion-map-warper.md b/es/lecciones/introduccion-map-warper.md index 4a16d548b0..62c6280a06 100644 --- a/es/lecciones/introduccion-map-warper.md +++ b/es/lecciones/introduccion-map-warper.md @@ -30,7 +30,7 @@ doi: 10.46430/phes0048 Map Warper fue diseñada para georreferenciar mapas antiguos -mapamundis, portulanos, cartas náuticas, planos topográficos, planos arquitectónicos, cartas geográficas-, fotografías aéreas y demás materiales cartográficos contenidos en las colecciones de caracter patrimonial. En tal sentido, la herramienta nos posibilita la generación de material georreferenciado para trabajo en escritorio -rásteres- o en linea -Map Server-, útiles para vincular a sistemas de información geográfico (QGIS, JOSM, ArcGIS, Google Earth, World Map, otros). Asimismo, la herramienta ayuda a descentralizar y agilizar los procesos de georreferenciación, catalogación y visualización, ya que su plataforma crea un entorno de colaboración abierta. -Gracias a sus características, la herramienta es útil a investigadores, profesores y estudiantes, como a instituciones que están desarrollando procesos de digitalización, visualización y experimentación del material cartográfico de sus colecciones, o para el desarrollo de proyectos en humanidades espaciales, como son los caso de la [Mapoteca Digital](http://bibliotecanacional.gov.co/es-co/colecciones/biblioteca-digital/mapoteca) de la Biblioteca Nacional de Colombia, [Cartografía de Bogotá](http://cartografia.bogotaendocumentos.com/) de la Universidad Nacional de Colombia, [Paisajes coloniales: redibujando los territorios andinos en el siglo XVII](https://paisajescoloniales.com/) de la Universidad de los Andes (Colombia). +Gracias a sus características, la herramienta es útil a investigadores, profesores y estudiantes, como a instituciones que están desarrollando procesos de digitalización, visualización y experimentación del material cartográfico de sus colecciones, o para el desarrollo de proyectos en humanidades espaciales, como son los caso de la [Mapoteca Digital](https://bibliotecanacional.gov.co/es-co/colecciones/biblioteca-digital/mapoteca) de la Biblioteca Nacional de Colombia, [Cartografía de Bogotá](https://cartografia.bogotaendocumentos.com/) de la Universidad Nacional de Colombia, [Paisajes coloniales: redibujando los territorios andinos en el siglo XVII](https://paisajescoloniales.com/) de la Universidad de los Andes (Colombia). ### Lo que aprenderás en este tutorial @@ -77,7 +77,7 @@ La pestaña “Metadata” visualiza la información cumplimentada en la etapa d En la versión de Map Warper que se encuentra actualmente disponible ya no es posible añadir un mapa base.
    -En este tutorial explicaremos el proceso de georreferenciación con el [Mapa Cafetero de la República de Colombia](http://catalogoenlinea.bibliotecanacional.gov.co/custom/web/content/mapoteca/fmapoteca_984_figac_16/fmapoteca_984_figac_16.html) de la Mapoteca Digital de la Biblioteca Nacional de Colombia. El documento cartográfico lo publicó la Federación Nacional de Cafeteros de Colombia en 1933, en una época en donde el café era la industria agrícola rectora de la economía colombiana, como resultado del primer censo cafetero del país realizado en 1932. +En este tutorial explicaremos el proceso de georreferenciación con el [Mapa Cafetero de la República de Colombia](https://catalogoenlinea.bibliotecanacional.gov.co/custom/web/content/mapoteca/fmapoteca_984_figac_16/fmapoteca_984_figac_16.html) de la Mapoteca Digital de la Biblioteca Nacional de Colombia. El documento cartográfico lo publicó la Federación Nacional de Cafeteros de Colombia en 1933, en una época en donde el café era la industria agrícola rectora de la economía colombiana, como resultado del primer censo cafetero del país realizado en 1932. Recordamos que en caso de no tener cargada cartografía alguna, se podrá utilizar los mapas del siguiente listado, y en caso de recurrir al [listado](/assets/introduccion-map-warper/map-warper.csv) resaltar el mapa seleccionado en el interior del listado. @@ -97,10 +97,10 @@ En este aspecto, para comprender mejor el desarrollo de esta acción técnica, d ~~~ Google Maps: https://mt1.google.com/vt/lyrs=r&x={x}&y={y}&z={z} -Google Satellite: http://www.google.cn/maps/vt?lyrs=s@189&gl=cn&x={x}&y={y}&z={z} -Bing Satélite: http://ecn.t3.tiles.virtualearth.net/tiles/a{q}.jpeg?g=0&dir=dir_n’ -CARTO dark: http://a.basemaps.cartocdn.com/dark_all/{z}/{x}/{y}.png -Stamen Watercolor: http://tile.stamen.com/watercolor/{z}/{x}/{y}.jpg +Google Satellite: https://www.google.cn/maps/vt?lyrs=s@189&gl=cn&x={x}&y={y}&z={z} +Bing Satélite: https://ecn.t3.tiles.virtualearth.net/tiles/a{q}.jpeg?g=0&dir=dir_n’ +CARTO dark: https://a.basemaps.cartocdn.com/dark_all/{z}/{x}/{y}.png +Stamen Watercolor: https://tile.stamen.com/watercolor/{z}/{x}/{y}.jpg ~~~ ![Add control point](/images/introduccion-map-warper/es-or-introduccion-map-warper-icon3.gif): El botón *Add control point* (agregar punto de control), ubicado en ambos recuadros, sirve para posicionar los puntos de control que relacionan el mapa vinculado o seleccionado con la capa base predeterminada o establecida. diff --git a/es/lecciones/limpieza-de-datos-con-OpenRefine.md b/es/lecciones/limpieza-de-datos-con-OpenRefine.md index cd3963abe9..ff043c8de8 100644 --- a/es/lecciones/limpieza-de-datos-con-OpenRefine.md +++ b/es/lecciones/limpieza-de-datos-con-OpenRefine.md @@ -39,7 +39,7 @@ doi: 10.46430/phes0017 ## Objetivos de la lección -No confíes ciegamente en tus datos. Ese es el mensaje clave de este tutorial que se centra en mostrar cómo los investigadores pueden diagnosticar y proceder sobre la exactitud de los datos. En esta lección aprenderás los principios y la práctica de la limpieza de datos, así como la forma de usar [*OpenRefine*](http://openrefine.org/) para realizar cuatro tareas esenciales que te ayudarán a limpiar tus datos: +No confíes ciegamente en tus datos. Ese es el mensaje clave de este tutorial que se centra en mostrar cómo los investigadores pueden diagnosticar y proceder sobre la exactitud de los datos. En esta lección aprenderás los principios y la práctica de la limpieza de datos, así como la forma de usar [*OpenRefine*](https://openrefine.org/) para realizar cuatro tareas esenciales que te ayudarán a limpiar tus datos: 1. Eliminar registros duplicados 2. Separar varios valores contenidos en el mismo campo @@ -58,9 +58,9 @@ Tiempo atrás los historiadores debieron confiar en los especialistas en tecnolo Las IDTs se asemejan a los programas de hojas de cálculo de escritorio con los que todos estamos familiarizados, con los que comparten algunas funcionalidades. Por ejemplo, puedes utilizar una aplicación como Microsoft Excel para ordenar los datos basándote en filtros numéricos, alfabéticos y desarrollados a medida, lo que te permite detectar errores con mayor facilidad. Configurar estos filtros en una hoja de cálculo puede resultar difícil, ya que son una funcionalidad secundaria. De forma genérica se puede decir que las hojas de cálculo están diseñadas para trabajar en filas y celdas individuales mientras que las IDTs operan en grandes rangos de datos a la vez. Estas “super-hojas de cálculo” ofrecen una interfaz integrada y fácil de usar a través de la cual los usuarios finales pueden detectar y corregir errores. -En los últimos años se han desarrollado varias herramientas de propósito general para la transformación interactiva de datos, tales como [*Potter’s Wheel ABC*](https://perma.cc/Q6QD-E64N) y [*Wrangler*](http://vis.stanford.edu/papers/wrangler/) (actualmente [*Trifacta Wrangler*](https://www.trifacta.com/products/wrangler/)). Aquí nos centraremos específicamente en [*OpenRefine*](http://openrefine.org/) (anteriormente Freebase Gridworks y Google Refine) pues, en opinión de los autores, es la herramienta más fácil de usar para procesar y limpiar eficientemente grandes cantidades de datos en una interfaz basada en navegador. +En los últimos años se han desarrollado varias herramientas de propósito general para la transformación interactiva de datos, tales como [*Potter’s Wheel ABC*](https://perma.cc/Q6QD-E64N) y [*Wrangler*](https://vis.stanford.edu/papers/wrangler/) (actualmente [*Trifacta Wrangler*](https://www.trifacta.com/products/wrangler/)). Aquí nos centraremos específicamente en [*OpenRefine*](https://openrefine.org/) (anteriormente Freebase Gridworks y Google Refine) pues, en opinión de los autores, es la herramienta más fácil de usar para procesar y limpiar eficientemente grandes cantidades de datos en una interfaz basada en navegador. -Además del perfilado de datos y las operaciones de limpieza, las extensiones de [*OpenRefine*] permiten a los usuarios identificar conceptos en texto no estructurado, un proceso denominado [reconocimiento de nombres de entidades](https://es.wikipedia.org/wiki/Reconocimiento_de_nombres_de_entidades) ([*named-entity recognition*](http://en.wikipedia.org/wiki/Named-entity_recognition), NER, en inglés), pudiendo también cotejar[^1] sus propios datos con bases de conocimiento existentes. Así, [*OpenRefine*] puede ser una práctica herramienta para vincular datos con conceptos y autoridades que ya han sido publicadas en la Web por instituciones como la [*Biblioteca del Congreso de los EEUU*](http://www.loc.gov/index.html) u [OCLC](http://www.oclc.org/home.en.html). La limpieza de datos es un requisito previo para estos pasos; la tasa de éxito del NER y un proceso de coincidencia fructífera entre tus datos y las autoridades externas depende de tu capacidad para hacer tus datos tan coherentes como sea posible. +Además del perfilado de datos y las operaciones de limpieza, las extensiones de [*OpenRefine*] permiten a los usuarios identificar conceptos en texto no estructurado, un proceso denominado [reconocimiento de nombres de entidades](https://es.wikipedia.org/wiki/Reconocimiento_de_nombres_de_entidades) ([*named-entity recognition*](https://en.wikipedia.org/wiki/Named-entity_recognition), NER, en inglés), pudiendo también cotejar[^1] sus propios datos con bases de conocimiento existentes. Así, [*OpenRefine*] puede ser una práctica herramienta para vincular datos con conceptos y autoridades que ya han sido publicadas en la Web por instituciones como la [*Biblioteca del Congreso de los EEUU*](https://www.loc.gov/index.html) u [OCLC](https://www.oclc.org/home.en.html). La limpieza de datos es un requisito previo para estos pasos; la tasa de éxito del NER y un proceso de coincidencia fructífera entre tus datos y las autoridades externas depende de tu capacidad para hacer tus datos tan coherentes como sea posible. ## Descripción del ejercicio Powerhouse Museum diff --git a/es/lecciones/mineria-de-datos-en-internet-archive.md b/es/lecciones/mineria-de-datos-en-internet-archive.md index 0c1d1eae4e..e226acf251 100644 --- a/es/lecciones/mineria-de-datos-en-internet-archive.md +++ b/es/lecciones/mineria-de-datos-en-internet-archive.md @@ -400,37 +400,37 @@ Desde luego, para que esta técnica sea útil se requiere hacer algo de [limpiez [^1]: Agradezco a [Shawn Graham](https://hypothes.is/a/AVKeGm0rvTW_3w8Lypo1) por señalar la dependencia de `six` en `pymarc` y brindar una solución. -[Internet Archive]: http://archive.org/ +[Internet Archive]: https://archive.org/ [JSTOR Early Journal Content]: https://archive.org/details/jstor_ejc [biblioteca personal de John Adams]: https://archive.org/details/johnadamsBPL [colección Haití]: https://archive.org/details/jcbhaiti -[Ian Milligan]: http://activehistory.ca/2013/09/the-internet-archive-rocks-or-two-million-plus-free-sources-to-explore/ -[Anti-Slavery Collection]: http://archive.org/details/bplscas +[Ian Milligan]: https://activehistory.ca/2013/09/the-internet-archive-rocks-or-two-million-plus-free-sources-to-explore/ +[Anti-Slavery Collection]: https://archive.org/details/bplscas [internetarchive]: https://pypi.python.org/pypi/internetarchive [pymarc]: https://pypi.python.org/pypi/pymarc/ -[esta carta]: http://archive.org/details/lettertowilliaml00doug -[manuscrito original]: http://archive.org/stream/lettertowilliaml00doug/39999066767938#page/n0/mode/2up -[múltiples archivos]: http://archive.org/download/lettertowilliaml00doug -[Dublin Core]: http://archive.org/download/lettertowilliaml00doug/lettertowilliaml00doug_dc.xml -[MARCXML]: http://archive.org/download/lettertowilliaml00doug/lettertowilliaml00doug_marc.xml -[formato MARC 21 de la Biblioteca del Congreso para datos bibliográficos]: http://www.loc.gov/marc/bibliographic/ -[cientos de cartas, manuscritos y publicaciones antiesclavistas]: http://archive.org/search.php?query=collection%3Abplscas&sort=-publicdate +[esta carta]: https://archive.org/details/lettertowilliaml00doug +[manuscrito original]: https://archive.org/stream/lettertowilliaml00doug/39999066767938#page/n0/mode/2up +[múltiples archivos]: https://archive.org/download/lettertowilliaml00doug +[Dublin Core]: https://archive.org/download/lettertowilliaml00doug/lettertowilliaml00doug_dc.xml +[MARCXML]: https://archive.org/download/lettertowilliaml00doug/lettertowilliaml00doug_marc.xml +[formato MARC 21 de la Biblioteca del Congreso para datos bibliográficos]: https://www.loc.gov/marc/bibliographic/ +[cientos de cartas, manuscritos y publicaciones antiesclavistas]: https://archive.org/search.php?query=collection%3Abplscas&sort=-publicdate [eBook and Texts]: https://archive.org/details/texts -[elementos y sus URL están estructurados]: http://blog.archive.org/2011/03/31/how-archive-org-items-are-structured/ +[elementos y sus URL están estructurados]: https://blog.archive.org/2011/03/31/how-archive-org-items-are-structured/ [búsqueda avanzada]: https://archive.org/advancedsearch.php [esta página]: https://archive.org/search.php?query=collection%3A%28bplscas%29 -[buscar en el Internet Archive usando el módulo de Python que instalamos]: http://internetarchive.readthedocs.io/en/latest/quickstart.html#searching -[búsqueda avanzada en una colección]: http://archive.org/search.php?query=collection%3Abplscas -[downloading]: http://internetarchive.readthedocs.io/en/latest/quickstart.html#downloading +[buscar en el Internet Archive usando el módulo de Python que instalamos]: https://internetarchive.readthedocs.io/en/latest/quickstart.html#searching +[búsqueda avanzada en una colección]: https://archive.org/search.php?query=collection%3Abplscas +[downloading]: https://internetarchive.readthedocs.io/en/latest/quickstart.html#downloading [remember those?]: /lessons/code-reuse-and-modularity [son nombrados de acuerdo a reglas específicas]: https://archive.org/about/faqs.php#140 -[manejo de excepciones]: http://docs.python.org/2/tutorial/errors.html#handling-exceptions -[reglas específicas para el campo 260]: http://www.loc.gov/marc/bibliographic/bd260.html -[estándares MARC]: http://www.loc.gov/marc/ +[manejo de excepciones]: https://docs.python.org/2/tutorial/errors.html#handling-exceptions +[reglas específicas para el campo 260]: https://www.loc.gov/marc/bibliographic/bd260.html +[estándares MARC]: https://www.loc.gov/marc/ [1]: https://github.com/edsu/pymarc [algunas funciones que provee para trabajar con archivos MARC XML]: https://github.com/edsu/pymarc/blob/master/pymarc/marcxml.py [Contar frecuencias]: /es/lecciones/contar-frecuencias [lección de introducción a Google Maps]: /lessons/googlemaps-googleearth -[nube de palabras en Wordle]: https://web.archive.org/web/20201202151557/http://www.wordle.net/ +[nube de palabras en Wordle]: https://web.archive.org/web/20201202151557/https://www.wordle.net/ [limpieza de tus datos]: /lessons/cleaning-ocrd-text-with-regular-expressions [Instalar módulos de Python con pip]: /es/lecciones/instalar-modulos-python-pip diff --git a/es/lecciones/normalizar-datos.md b/es/lecciones/normalizar-datos.md index 2590d798b2..0f54d655eb 100644 --- a/es/lecciones/normalizar-datos.md +++ b/es/lecciones/normalizar-datos.md @@ -174,12 +174,12 @@ Para seguir a lo largo de las lecciones futuras es importante que tengas los arc - python-es-lecciones4.zip ([zip sync][]) [De HTML a lista de palabras (parte 2)]: /es/lecciones/de-html-a-lista-de-palabras-2 -[web page]: http://www.oldbaileyonline.org/browse.jsp?id=t17800628-33&div=t17800628-33 +[web page]: https://www.oldbaileyonline.org/browse.jsp?id=t17800628-33&div=t17800628-33 [De HTML a lista de palabras (parte 1)]: /es/lecciones/de-html-a-lista-de-palabras-1 [Manipular cadenas de caracteres en Python]: /es/lecciones/manipular-cadenas-de-caracteres-en-python -[Unicode]: http://unicode.org/ -[soporte de Python]: https://web.archive.org/web/20180502053841/http://www.diveintopython.net/xml_processing/unicode.html -[Dive into Python]: https://web.archive.org/web/20180416143856/http://www.diveintopython.net/regular_expressions/index.html +[Unicode]: https://unicode.org/ +[soporte de Python]: https://web.archive.org/web/20180502053841/https://www.diveintopython.net/xml_processing/unicode.html +[Dive into Python]: https://web.archive.org/web/20180416143856/https://www.diveintopython.net/regular_expressions/index.html [zip]: /assets/python-es-lecciones3.zip [zip sync]: /assets/python-es-lecciones4.zip [página Web]: https://www.oldbaileyonline.org/browse.jsp?id=t17800628-33&div=t17800628-33 diff --git a/es/lecciones/poniendo-omeka-a-funcionar.md b/es/lecciones/poniendo-omeka-a-funcionar.md index 8b73c6aee1..d3c9eb4672 100644 --- a/es/lecciones/poniendo-omeka-a-funcionar.md +++ b/es/lecciones/poniendo-omeka-a-funcionar.md @@ -32,7 +32,7 @@ doi: 10.46430/phes0022 -[Omeka.net](http://www.omeka.net) facilita la creación de sitios web para mostrar colecciones de ítems. +[Omeka.net](https://www.omeka.net) facilita la creación de sitios web para mostrar colecciones de ítems. > *Nota de la traductora*: Antes de empezar es importante aclarar las diferencias entre **Omeka.net** y **Omeka.org**. Este tutorial es sobre **Omeka.net**, una plataforma de publicación en línea que permite a cualquier persona con una cuenta de acceso crear o colaborar en un sitio web para exhibir colecciones y construir exposiciones digitales. **Omeka.net** es una extensión de **Omeka.org** que está disponible para bajar e instalar en un servidor de tu propiedad. La traducción al español del tutorial de *The Programming Historian* sobre [cómo instalar Omeka](/lessons/installing-omeka) en un servidor se encuentra en proceso. al Regístrate para abrir una cuenta en Omeka @@ -40,7 +40,7 @@ Regístrate para abrir una cuenta en Omeka {% include figure.html filename="up-and-running-01.png" caption="Regístrate para una cuenta de prueba" %} -Entra a [www.omeka.net](http://www.omeka.net) y haz clic en **Sign Up**. Elige el plan "Omeka trial" (Plan de prueba). Rellena el formulario de registro. Revisa tu correo electrónico y haz clic en el enlace para activar tu cuenta. +Entra a [www.omeka.net](https://www.omeka.net) y haz clic en **Sign Up**. Elige el plan "Omeka trial" (Plan de prueba). Rellena el formulario de registro. Revisa tu correo electrónico y haz clic en el enlace para activar tu cuenta. Crea tu nuevo sitio en Omeka -------------------------- @@ -62,7 +62,7 @@ Un sitio vacío de Omeka ------------------- {% include figure.html filename="up-and-running-04.png" caption="Vista pública" %} -Este es tu sitio vacío de Omeka esperando a ser llenado. Para regresar a tu panel de control (*dashboard*) haz clic en el botón **Back** (Atrás) o escribe **http://www.omeka.net/dashboard**. Esta vez haz clic en **Manage Site** (Administra el sitio). +Este es tu sitio vacío de Omeka esperando a ser llenado. Para regresar a tu panel de control (*dashboard*) haz clic en el botón **Back** (Atrás) o escribe **https://www.omeka.net/dashboard**. Esta vez haz clic en **Manage Site** (Administra el sitio). Cambia de plantilla ------------- @@ -183,5 +183,5 @@ Ahora que has añadido algunos ítems y los has agrupado en una colección, tóm Recursos adicionales ----------------------------- -El equipo de Omeka ha compilado un conjunto de muy buenos recursos en las [páginas de ayuda](http://info.omeka.net) del software. +El equipo de Omeka ha compilado un conjunto de muy buenos recursos en las [páginas de ayuda](https://info.omeka.net) del software. [Este manual en español](https://www.rubenalcaraz.es/manual-omeka/) contiene información útil para evaluar las ventajas y desventajas de usar **Omeka.net** u **Omeka.org**, al igual que instrucciones generales sobre cómo instalar Omeka en tu servidor. diff --git a/es/lecciones/preservar-datos-de-investigacion.md b/es/lecciones/preservar-datos-de-investigacion.md index a780f93a94..3e13cb35bc 100644 --- a/es/lecciones/preservar-datos-de-investigacion.md +++ b/es/lecciones/preservar-datos-de-investigacion.md @@ -81,7 +81,7 @@ El momento de documentar depende en gran medida de la persona y del ritmo de la Idealmente, los datos de investigación y la documentación deben ser guardados en archivos [independientes de plataforma] como .txt para notas y .csv (valores separados por comas) o .tsv (valores separados por tabuladores) para los datos en tablas. Estos formatos de texto plano son preferibles a los formatos propietarios utilizados por defecto por Microsoft Office o iWork porque pueden abrirse con muchos paquetes de programas y tienen una gran probabilidad de permanecer visibles o editables en el futuro. Muchas *suites* de ofimática (o paquetes de *software* para oficina) incluyen la opción de guardar archivos en formatos .txt, .csv y .tsv, lo que significa que se puede continuar trabajando con programas familiares y aún así tomar las acciones apropiadas para hacer accesible tu trabajo. Comparados con .doc o .xls, estos formatos tienen el beneficio adicional, desde una perspectiva de preservación, de contener solamente elementos legibles por la computadora. Mientras que es una práctica común el uso de negritas, itálicas o colores para indicar encabezados o para establecer una conexión visual entre elementos de los datos, estas anotaciones orientadas a la visualización no son legibles por la computadora y, por lo tanto, no puede ser consultadas ni buscadas, ni son adecuadas para grandes cantidades de información. Son preferibles los esquemas simples de anotación como el uso de un doble asterisco o tres signos de almohadilla para representar una característica de los datos; en mis propias notas, por ejemplo, tres signos de interrogación indica algo a lo que necesito dar seguimiento, y lo elegí porque "???" se puede encontrar fácilmente con una búsqueda mediante CTRL + F. -Es probable que en muchas ocasiones estos esquemas de anotación se desprendan de la práctica individual (y en consecuencia deban ser documentados), aunque hay sintaxis disponibles como [Markdown](https://es.wikipedia.org/wiki/Markdown) (los archivos Markdown se guardan como .md). En GitHub se pueden encontrar estos excelentes apuntes para quien quiera seguir -o adaptar- esta sintaxis. Se recomienda el uso de Notepad++ a los usuarios de Windows, aunque de ninguna manera es esencial para trabajar con archivos .md. Los usuarios de Mac o Unix pueden encontrar útil [Komodo Edit] o [Text Wrangler]. +Es probable que en muchas ocasiones estos esquemas de anotación se desprendan de la práctica individual (y en consecuencia deban ser documentados), aunque hay sintaxis disponibles como [Markdown](https://es.wikipedia.org/wiki/Markdown) (los archivos Markdown se guardan como .md). En GitHub se pueden encontrar estos excelentes apuntes para quien quiera seguir -o adaptar- esta sintaxis. Se recomienda el uso de Notepad++ a los usuarios de Windows, aunque de ninguna manera es esencial para trabajar con archivos .md. Los usuarios de Mac o Unix pueden encontrar útil [Komodo Edit] o [Text Wrangler]. #### Resumen 1 @@ -107,22 +107,22 @@ Examinar los URLs es una buena forma de pensar una estructura de datos de invest Un buen ejemplo de los primeros son los URLs utilizados por los sitios web de noticias o los servicios de *blogging*. Los URLs de WordPress utilizan el siguiente formato: - *Nombre del sitio web*/*año (4 dígitos)*/*mes (2 dígitos)*/*día (2 dígitos)*/*palabras-del-titulo-separadas-por-guiones* -- +- Un estilo similar es utilizado por las agencias de noticias como el periódico *The Guardian*: - *Nombre del sitio web*/*subdivisión de seccción*/*año (4 dígitos)*/*mes (3 caracteres)*/*día (2 dígitos)*/*palabras-que-describen-contenido-separadas-por-guiones* -- +- En los catálogos de archivo, se suelen utilizar URLs estructurados por un elemento de datos. *The British Cartoon Archive* estructura su archivo en línea utilizando el formato: - *nombre del sitio web*/*registro*/*número de referencia* -- +- Y el sitio Old Bailey Online usa este formato: - *nombre del sitio web*/browse.jsp?ref=*número de referencia* -- +- Lo que aprendemos de estos ejemplos es que la combinación de descripciones semánticas con elementos de datos hacen consistente y predecible la lectura de los datos estructurados tanto por máquinas como por seres humanos. Transferir esto a los datos digitales acumulados durante el curso de la investigación histórica hace que los datos de investigación sean más fácilmente navegables, así como buscar y consultar utilizando las herramientas estándar provistas por nuestros sistemas operativos (y, como veremos en próximas lecciones, por herramientas más avanzadas). @@ -236,11 +236,11 @@ blog (17 October 2013) Hitchcock, Tim, 'Judging a book by its URLs', Historyonics blog (3 January 2014) - + Howard, Sharon, 'Unclean, unclean! What historians can do about sharing our messy research data', Early Modern Notes blog (18 May 2013) - + Noble, William Stafford, A Quick Guide to Organizing Computational Biology Projects.PLoSComputBiol 5(7): e1000424 (2009) @@ -253,7 +253,7 @@ Information Management: Organising Humanities Material' (2011) Pennock, Maureen, 'The Twelve Principles of Digital Preservation (and a cartridge in a repository…)', British Library Collection Care blog (3 September 2013) - + Pritchard, Adam, 'Markdown Cheatsheet' (2013) @@ -262,10 +262,10 @@ Rosenzweig, Roy, 'Scarcity or Abundance? Preserving the Past in a Digital Era', The American Historical Review 108:3 (2003), 735-762. UK Data Archive, 'Documenting your Data' - + [PRINCE2]: https://es.wikipedia.org/wiki/PRINCE2 [independientes de plataforma]: https://es.wikipedia.org/wiki/Multiplataforma#Programaci.C3.B3n_multiplataforma -[Komodo Edit]: http://komodoide.com/komodo-edit/ +[Komodo Edit]: https://komodoide.com/komodo-edit/ [Text Wrangler]: https://www.barebones.com/products/textwrangler/ diff --git a/es/lecciones/procesamiento-basico-de-textos-en-r.md b/es/lecciones/procesamiento-basico-de-textos-en-r.md index eb45d25120..ae292f7fa6 100644 --- a/es/lecciones/procesamiento-basico-de-textos-en-r.md +++ b/es/lecciones/procesamiento-basico-de-textos-en-r.md @@ -626,11 +626,11 @@ Existen muchos tutoriales genéricos para estos tres ejemplos, además de docume [^3]: Hadley Wickham. “tidyverse: Easily Install and Load ‘Tidyverse’ Packages”. R Package, Version 1.1.1. https://cran.r-project.org/web/packages/tidyverse/index.html [^4]: Lincoln Mullen and Dmitriy Selivanov. “tokenizers: A Consistent Interface to Tokenize Natural Language Text Convert”. R Package, Version 0.1.4. https://cran.r-project.org/web/packages/tokenizers/index.html [^5]: Ten en cuenta que los nombres de las funciones como `library` o `install.packages` siempre estarán en inglés. No obstante, se proporciona una traducción de su significado para facilitar la comprensión y se traducen el nombre de las variables.[N. de la T.] -[^6]: Traducción publicada en CNN en español (12 de enero de 2016) http://cnnespanol.cnn.com/2016/01/12/discurso-completo-de-obama-sobre-el-estado-de-la-union/ [N. de la T.] -[^7]: Todos los discursos presidenciales del Estado de la Unión fueron descargados de The American Presidency Project at the University of California Santa Barbara (Accedido el 11 de noviembre de 2016) http://www.presidency.ucsb.edu/sou.php +[^6]: Traducción publicada en CNN en español (12 de enero de 2016) https://cnnespanol.cnn.com/2016/01/12/discurso-completo-de-obama-sobre-el-estado-de-la-union/ [N. de la T.] +[^7]: Todos los discursos presidenciales del Estado de la Unión fueron descargados de The American Presidency Project at the University of California Santa Barbara (Accedido el 11 de noviembre de 2016) https://www.presidency.ucsb.edu/sou.php [^8]: Aquí volvemos a la versión del discurso en su original (inglés) por motivos de continuación del análisis y, en particular, el listado de las palabras más frecuentes usadas en inglés. Seguimos traduciendo los nombres de las variables y de las funciones para facilitar la comprensión en español.[N. de la T.] [^9]: Aquí optamos por nombrar a las columnas de la tabla en inglés, como "word" (palabra) y "count" (recuento), para facilitar su interoperabilidad con el conjunto de datos que introducimos más adelante con la función `inner_join` de más adelante. [N. de la T.] -[^10]: Peter Norvig. “Google Web Trillion Word Corpus”. (Accedido el 11 de noviembre de 2016) http://norvig.com/ngrams/. +[^10]: Peter Norvig. “Google Web Trillion Word Corpus”. (Accedido el 11 de noviembre de 2016) https://norvig.com/ngrams/. [^11]: Esto ocurre en algunos discursos escritos del Estado de la Unión, donde una lista con puntos de enumeración es segmentada como una única oración larga. [^12]: Taylor Arnold. “cleanNLP: A Tidy Data Model for Natural Language Processing”. R Package, Version 0.24. https://cran.r-project.org/web/packages/cleanNLP/index.html [^13]: David Mimno. “mallet: A wrapper around the Java machine learning tool MALLET”. R Package, Version 1.0. https://cran.r-project.org/web/packages/mallet/index.html diff --git a/es/lecciones/publicar-archivos-tei-ceteicean.md b/es/lecciones/publicar-archivos-tei-ceteicean.md index 7808fa40f9..8f21b760b4 100644 --- a/es/lecciones/publicar-archivos-tei-ceteicean.md +++ b/es/lecciones/publicar-archivos-tei-ceteicean.md @@ -34,7 +34,7 @@ Para quienes se inician en el uso de TEI, uno de los escollos más comunes es qu Este tutorial te guiará a través de los pasos necesarios para publicar un archivo TEI en línea utilizando CETEIcean, una librería abierta escrita en el lenguaje de programación JavaScript. CETEIcean permite que los documentos TEI se muestren en un navegador web sin transformarlos primero a HTML. CETEIcean carga el archivo TEI dinámicamente en el navegador y cambia el nombre de los elementos de TEI por otros en HTML, de tal forma que estos nos permitan visualizar en el navegador web los fenómenos textuales que marcamos en nuestros archivos usando TEI. -En primer lugar, una aclaración sobre la visualización de tu trabajo: el método por defecto de CETEIcean para mostrar archivos TEI consiste en cargar los archivos desde otra ubicación. Sin embargo, no todos los navegadores te permitirán cargar los archivos si estos se encuentran almacenados en tu computadora. Puedes hacer el intento, pero si eso no funciona, tendrás que generar un servidor local, colocar los archivos en un servidor en línea, o utilizar un editor de código con funciones de previsualización. Para el caso de este tutorial, seguiremos esta última opción, ya que usaremos el editor [Atom](https://atom.io), con el plug-in `atom-html-preview`. No obstante, existen otras opciones libres para editar archivos TEI y generar previsualizaciones de HTML, como [jEdit](http://www.jedit.org/) o [Visual Studio Code](https://code.visualstudio.com/), y versiones propietarias como [Oxygen](https://www.oxygenxml.com/). +En primer lugar, una aclaración sobre la visualización de tu trabajo: el método por defecto de CETEIcean para mostrar archivos TEI consiste en cargar los archivos desde otra ubicación. Sin embargo, no todos los navegadores te permitirán cargar los archivos si estos se encuentran almacenados en tu computadora. Puedes hacer el intento, pero si eso no funciona, tendrás que generar un servidor local, colocar los archivos en un servidor en línea, o utilizar un editor de código con funciones de previsualización. Para el caso de este tutorial, seguiremos esta última opción, ya que usaremos el editor [Atom](https://atom.io), con el plug-in `atom-html-preview`. No obstante, existen otras opciones libres para editar archivos TEI y generar previsualizaciones de HTML, como [jEdit](https://www.jedit.org/) o [Visual Studio Code](https://code.visualstudio.com/), y versiones propietarias como [Oxygen](https://www.oxygenxml.com/).
    Actualización de marzo de 2025: No recomendamos usar Atom, ya que el software no ha recibido mantenimiento ni actualizaciones desde su cierre en diciembre de 2022. Puedes usar VSCode de la misma manera, siempre que también instales la extensión HTML Preview del marketplace de extensiones. @@ -44,9 +44,9 @@ Deberás entonces descargar e instalar [Atom](https://atom.io) antes de continua {% include figure.html filename="publicar-archivos-tei-ceteicean1.png" caption="Proceso de instalación del plug-in de Atom para previsualizar archivos en HTML" %} -Usaremos como texto de prueba la crónica conocida como *La Argentina Manuscrita*, del hispano-guaraní [Ruy Díaz de Guzmán](https://es.wikipedia.org/wiki/Ruy_D%C3%ADaz_de_Guzm%C3%A1n). Este texto del siglo XVII hace uso del topónimo Argentina por primera vez, para referirse a los extensos territorios del Cono Sur que componían el Río de la Plata y sus adyacencias, es decir, territorios de la actual Argentina, Paraguay, Uruguay, sur de Brasil y Bolivia. Puedes encontrar una edición digital completa del texto en: [http://hdlab.space/La-Argentina-Manuscrita](http://hdlab.space/La-Argentina-Manuscrita). +Usaremos como texto de prueba la crónica conocida como *La Argentina Manuscrita*, del hispano-guaraní [Ruy Díaz de Guzmán](https://es.wikipedia.org/wiki/Ruy_D%C3%ADaz_de_Guzm%C3%A1n). Este texto del siglo XVII hace uso del topónimo Argentina por primera vez, para referirse a los extensos territorios del Cono Sur que componían el Río de la Plata y sus adyacencias, es decir, territorios de la actual Argentina, Paraguay, Uruguay, sur de Brasil y Bolivia. Puedes encontrar una edición digital completa del texto en: [https://hdlab.space/La-Argentina-Manuscrita](https://hdlab.space/La-Argentina-Manuscrita). -Comenzaremos con un archivo simple (aunque un tanto extenso) en formato TEI P5, que queremos hacer visible en un navegador web: [`Ruy_Diaz-La_Argentina_Manuscrita.xml`](http://hdlab.space/La-Argentina-Manuscrita/assets/Ruy_Diaz-La_argentina_manuscrita.tei.xml). Para descargar el archivo haz clic derecho sobre el enlace de descarga y selecciona la opción 'Save Link As...'. +Comenzaremos con un archivo simple (aunque un tanto extenso) en formato TEI P5, que queremos hacer visible en un navegador web: [`Ruy_Diaz-La_Argentina_Manuscrita.xml`](https://hdlab.space/La-Argentina-Manuscrita/assets/Ruy_Diaz-La_argentina_manuscrita.tei.xml). Para descargar el archivo haz clic derecho sobre el enlace de descarga y selecciona la opción 'Save Link As...'. ## Paso 1: Crear una estructura para nuestros archivos Comenzaremos por establecer una estructura para nuestros archivos, es decir, una carpeta contenedora con el nombre 'tutorial_es' con las subcarpetas y archivos que te indicaremos a continuación. Puedes descargar el directorio completo del repositorio [CETEIcean en GitHub](https://github.com/TEIC/CETEIcean) y trabajar en la carpeta 'tutorial_es', o puedes descargar los archivos individualmente, siempre y cuando mantengan la misma estructura que en GitHub, que es la siguiente: @@ -270,7 +270,7 @@ CETEIcean posee una cantidad de comportamientos integrados que puedes reemplazar Si haces esto, puede que desees agregar estilos de CSS o comportamientos para elegir la forma en la que se visualizará el contenido del TEI Header en el navegador. -En este tutorial no agotamos todas las posibilidades para la presentación de nuestro documento fuente. Te invitamos a que continúes experimentando por tu cuenta en las diferentes formas en las que un marcado de TEI puede visualizarse en un navegador usando CETEICean. Puedes encontrar más información en [CETEIcean](http://teic.github.io/CETEIcean/). +En este tutorial no agotamos todas las posibilidades para la presentación de nuestro documento fuente. Te invitamos a que continúes experimentando por tu cuenta en las diferentes formas en las que un marcado de TEI puede visualizarse en un navegador usando CETEICean. Puedes encontrar más información en [CETEIcean](https://teic.github.io/CETEIcean/). ## Referencias @@ -280,11 +280,11 @@ Allés Torrent, Susanna. 2019. "Introducción a la Text Encoding Initiative". *T Atom. A hackable text editor for the 21st Century. https://atom.io -Cayless, Hugh y Viglianti, Raffaele. CETEIcean. http://teic.github.io/CETEIcean/ +Cayless, Hugh y Viglianti, Raffaele. CETEIcean. https://teic.github.io/CETEIcean/ -del Rio Riande, Gimena; De León, Romina, y Hernández, Nidia. 2019. *Historia de la conquista del Río de la Plata o La Argentina manuscrita*. http://hdlab.space/La-Argentina-Manuscrita/ +del Rio Riande, Gimena; De León, Romina, y Hernández, Nidia. 2019. *Historia de la conquista del Río de la Plata o La Argentina manuscrita*. https://hdlab.space/La-Argentina-Manuscrita/ -Jedit. Programmer's text editor. Stable Version: 5.6.0. http://www.jedit.org/ +Jedit. Programmer's text editor. Stable Version: 5.6.0. https://www.jedit.org/ Oxygen. XML Editor. https://www.oxygenxml.com/ diff --git a/es/lecciones/retirada/introduccion-control-versiones-github-desktop.md b/es/lecciones/retirada/introduccion-control-versiones-github-desktop.md index 11bd1d4d6e..2cf9bd0593 100644 --- a/es/lecciones/retirada/introduccion-control-versiones-github-desktop.md +++ b/es/lecciones/retirada/introduccion-control-versiones-github-desktop.md @@ -142,7 +142,7 @@ Esta lección no cubre la sintaxis Markdown por razones de espacio, pero es úti ### Editores de texto -Para escribir un documento de texto plano necesitamos un editor. Hay muchos editores disponibles, algunos gratuitos, otros de pago. Algunos son fáciles de usar mientras que otros tienen una curva de aprendizaje y un potencial que sobrepasa las funciones de un editor de texto. A largo plazo, un editor avanzado como Vim o Emacs puede ahorrarte tiempo pero de momento puedes empezar con un editor más simple. Por ejemplo, [Atom](https://atom.io/) es un buen editor desarrollado por GitHub que destaca la sintaxis Markdown y, además, se integra con la plataforma GitHub. Es gratuito y su código es abierto; además, incluye un [manual](http://flight-manual.atom.io/) de instrucciones muy exhaustivo. +Para escribir un documento de texto plano necesitamos un editor. Hay muchos editores disponibles, algunos gratuitos, otros de pago. Algunos son fáciles de usar mientras que otros tienen una curva de aprendizaje y un potencial que sobrepasa las funciones de un editor de texto. A largo plazo, un editor avanzado como Vim o Emacs puede ahorrarte tiempo pero de momento puedes empezar con un editor más simple. Por ejemplo, [Atom](https://atom.io/) es un buen editor desarrollado por GitHub que destaca la sintaxis Markdown y, además, se integra con la plataforma GitHub. Es gratuito y su código es abierto; además, incluye un [manual](https://flight-manual.atom.io/) de instrucciones muy exhaustivo. Si no quieres instalar un programa nuevo, puedes utilizar uno de los editores que incluidos en tu ordenador como TextEdit para Mac. Si decides continuar aprendiendo Markdown en el futuro, te recomendamos utilizar un editor de texto que destaque la sintaxis Markdown, entre otras funcionalidades. @@ -202,7 +202,7 @@ Aunque hay diferencias entre el control de versiones de código y de textos, las Es importante que tus anotaciones y los mensajes asociados que las describen tengan sentido y sean específicos. Escribir buenas descripciones de las anotaciones requiere reflexión. A veces, los mensajes que para ti son claros en el momento de la anotación se vuelven difíciles de comprender en el futuro. Si vas a utilizar el control de versiones con otras personas es importante que tus colaboradores puedan entenderte. El control de versiones para gestionar cambios en documentos funciona mejor cuando nos esforzamos un poco en pensar cómo utilizamos el programa. Por tanto, cuando se lleva a cabo un trabajo colaborativo es importante aclarar estas cuestiones y compartir una misma visión para usar el control de cambios de manera efectiva. -Una manera de enfrentarse a este problema es intentar seguir un 'estilo de anotaciones'. Por ejemplo, te recomendamos seguir la influyente [sugerencia de Tim Pope](http://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html) cuando realices anotaciones. La sugerencia de Tim Pope [tiene en cuenta](https://github.com/blog/926-shiny-new-commit-styles), parcialmente, la interfaz de GitHub Desktop para anotar cambios y describirlos pero entender el formato te ayudará a poner en práctica su estrategia. El siguiente mensaje es una adaptación de la propuesta de Tim Pope, que se centra en la anotación de texto (en lugar de código): +Una manera de enfrentarse a este problema es intentar seguir un 'estilo de anotaciones'. Por ejemplo, te recomendamos seguir la influyente [sugerencia de Tim Pope](https://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html) cuando realices anotaciones. La sugerencia de Tim Pope [tiene en cuenta](https://github.com/blog/926-shiny-new-commit-styles), parcialmente, la interfaz de GitHub Desktop para anotar cambios y describirlos pero entender el formato te ayudará a poner en práctica su estrategia. El siguiente mensaje es una adaptación de la propuesta de Tim Pope, que se centra en la anotación de texto (en lugar de código): ``` Breve resumen (50 o menos caracteres) con mayúscula inicial. @@ -331,6 +331,6 @@ GitHub Desktop es una forma sencilla de aprender a controlar versiones con GitHu * [Atlassian](https://www.atlassian.com/git/tutorials): contiene tutoriales más avanzados (pero fáciles de entender) de Git. Ponen el acento en las diferencias entre Git y otros controles de versiones; esto puede no ser relevante para ti pero te ayudará a comprender el funcionamiento de Git de manera más detallada. * [Pro Git](https://git-scm.com/book/en/v2): un libro exclusivamente sobre Git. Empieza con el funcionamiento básico y luego pasa a tratar asuntos más avanzados de Git. * Para [estudiantes](https://education.github.com/pack) e [investigadores](https://github.com/blog/1840-improving-github-for-science) GitHub ofrece repositorios privados sin pagar por una suscripción. Estos repositorios pueden ser útiles para borradores o notas que no queremos publicar. Nota: no es muy aconsejable guardar contenido delicado incluso en un repositorio privado en GitHub. -* [ProfHacker](https://web.archive.org/web/20170716182645/http://www.chronicle.com/blogs/profhacker/tag/github) tiene varias entradas sobre proyectos que utilizan GitHub en el contexto académico. +* [ProfHacker](https://web.archive.org/web/20170716182645/https://www.chronicle.com/blogs/profhacker/tag/github) tiene varias entradas sobre proyectos que utilizan GitHub en el contexto académico. * [GitHub, Academia, and Collaborative Writing](https://www.hastac.org/blogs/harrisonm/2013/10/12/github-academia-and-collaborative-writing) reflexioina sobre el uso de GitHub para la escritura colaborativa. * La lección [Introducción a Bash](/lessons/intro-to-bash) te permitirá aprender más sobre la línea de comandos, muy útil para utilizar GitHub. diff --git a/es/lecciones/retirada/sparql-datos-abiertos-enlazados.md b/es/lecciones/retirada/sparql-datos-abiertos-enlazados.md index 80d39121fa..5d5c9d6c72 100644 --- a/es/lecciones/retirada/sparql-datos-abiertos-enlazados.md +++ b/es/lecciones/retirada/sparql-datos-abiertos-enlazados.md @@ -1,429 +1,429 @@ ---- -title: | - Uso de SPARQL para acceder a datos abiertos enlazados -authors: -- Matthew Lincoln -date: 2015-11-24 -translation_date: 2017-05-20 -editors: -- Fred Gibbs -reviewers: -- Patrick Murray-John -- Jason Heppler -- Will Hanley -- Fred Gibbs -translator: -- Nuria Rodríguez Ortega -translation-editor: -- Antonio Rojas Castro -translation-reviewer: -- Antonio Rojas Castro -- Juan Antonio Pastor Sánchez -review-ticket: https://github.com/programminghistorian/ph-submissions/issues/67 -layout: lesson -original: graph-databases-and-SPARQL -redirect_from: -- /es/lessons/graph-databases-and-SPARQL -- /es/lecciones/sparql-datos-abiertos-enlazados -difficulty: 2 -activity: acquiring -topics: [lod] -abstract: "Esta lección explica por qué numerosas instituciones culturales están adoptando bases de datos orientadas a grafos y cómo los investigadores pueden acceder a estos datos a través de consultas realizadas en el lenguaje llamado SPARQL." -retired: true -retirement-reason: | - El Museo Británico no ha mantenido el acceso a su base de datos de colecciones de una manera consistente. Aunque la sintaxis y los comandos de SPARQL siguen siendo correctos, las URLs a las que intentan conectarse son ahora demasiado inconsistentes para su uso en una lección funcional. -avatar_alt: Grabado con dos peces unidos por una rama en sus bocas. -doi: 10.46430/phes0027 ---- - -Objetivos de la lección ------------------------ - -Esta lección explica por qué numerosas instituciones culturales están adoptando bases de datos orientadas a grafos (*graph databases*) y cómo los investigadores pueden acceder a estos datos a través de consultas realizadas en el lenguaje llamado SPARQL. - -{% include toc.html %} - - - - - - -## Bases de datos orientadas a grafo, RDF y datos abiertos enlazados (Linked Open Data, LOD) - -Actualmente, numerosas instituciones culturales están ofreciendo información sobre sus colecciones a través de las denominadas API ([*Application Programming Interfaces*](/lessons/intro-to-the-zotero-api.html)). Estas API son instrumentos muy eficaces para acceder de manera automatizada a registros individuales, sin embargo, no constituyen el procedimiento ideal cuando tratamos con datos culturales debido a que las API están estructuradas para trabajar con un conjunto predeterminado de consultas (*queries*). Por ejemplo, un museo puede tener información sobre donantes, artistas, obras de arte, exposiciones, procedencia de sus obras (*provenance*), etc., pero su API puede ofrecer solo una recuperación orientada a objetos, haciendo difícil o imposible buscar datos relacionados con donantes, artistas, etc. Así pues, esta estructura es interesante si el objetivo es buscar información sobre objetos particulares; sin embargo, puede complicar la operación de agregar información sobre los artistas o donantes que también se encuentran registrados en la base de datos. - -Las bases de datos RDF son muy apropiadas para expresar relaciones complejas entre múltiples entidades, como personas, lugares, eventos y conceptos ligados a objetos individuales. Estas bases de datos se denominan habitualmente bases de datos orientadas a grafos (*graph databases*) porque estructuran la información como un grafo o red, donde un conjunto de recursos o nodos están conectados entre sí mediante aristas (o enlaces) que describen las relaciones establecidas entre dichos recursos y/o nodos. - -Dado que las bases de datos RDF admiten el uso de URL, estas pueden estar accesibles *online* y también pueden enlazarse a otras bases de datos, de ahí el término "datos abiertos enlazados" (*Linked Open Data*, LOD). Importantes colecciones artísticas, entre las que se incluyen las del [British Museum](http://collection.britishmuseum.org/), [Europeana](http://labs.europeana.eu/api/linked-open-data-introduction), el [Smithsonian American Art Museum](http://americanart.si.edu/) y el [Yale Center for British Art](http://britishart.yale.edu/collections/using-collections/technology/linked-open-data), han publicado sus colecciones de datos como LOD. El [Getty Vocabulary Program](http://vocab.getty.edu/) también ha publicado sus vocabularios controlados (TGN, ULAN y AAT) como LOD. - -SPARQL es el lenguaje utilizado para interrogar este tipo de bases de datos. Este lenguaje es particularmente potente porque obvia las perspectivas que los usuarios transfieren a los datos. Una consulta sobre objetos y una consulta sobre donantes son prácticamente equivalentes en estas bases de datos. Lamentablemente, numerosos tutoriales sobre SPARQL utilizan modelos de datos tan extremadamente simplificados que no son operativos cuando se trata de utilizar las complejas bases de datos desarrolladas por las instituciones culturales. Este tutorial ofrece un curso intensivo sobre SPARQL utilizando un conjunto de datos (*dataset*) que un humanista podría realmente encontrar en Internet. En concreto, en este tutorial aprenderemos cómo interrogar la colección LOD del British Museum. - -### RDF en pocas palabras - -RDF representa la información en una declaración triple -también llamada tripleta- que sigue la estructura sujeto-predicado-objeto. Por ejemplo: - -``` - . - -``` - -(Observa que, como toda buena oración, estas declaraciones terminan con un punto y final). - -En este ejemplo, el sujeto `` y el objeto `` pueden ser considerados como dos nodos de un grafo, donde el predicado `` define la arista -o relación- entre ellos. (Técnicamente, puede ser tratado en otras consultas como un objeto o un sujeto, pero esta cuestión escapa el alcance de este tutorial). - -Una seudobase de datos RDF podría contener declaraciones interrelacionadas entre sí, como las siguientes: - -``` -... - . - <1642>. - <óleo sobre lienzo>. - <1606>. - . - . - . - <óleo sobre lienzo>. -... -``` - -Si visualizásemos estas declaraciones como nodos y aristas de un grafo o red, la representación sería como sigue: - -{% include figure.html caption="Visualización en red del seudoRDF mostrado más arriba. Las flechas indican la 'dirección' del predicado. Por ejemplo, que '*La tasadora de perlas* fue creada por Vermeer' y no al revés. Diagrama reconstruido por Nuria Rodríguez Ortega." filename="sparql-lod-01.png" %} - -Las tradicionales bases de datos relacionales pueden distribuir atributos sobre obras de arte y artistas en tablas separadas. En las bases de datos RDF u orientadas a grafos, todos estos datos pertenencen a un mismo mismo grafo interconectado, lo que permite a los usuarios una mayor flexibilidad a la hora de decidir cómo quieren interrogar estos recursos. - -### Buscando RDF con SPARQL - -SPARQL nos permite traducir datos en grafo, intensamente enlazados, en datos normalizados en formato tabular, esto es, distribuidos en filas y columnas, que se pueden abrir en programas como Excel o importar a programas de visualización, tales como [plot.ly](https://plot.ly/) o [Palladio](http://hdlab.stanford.edu/palladio/). - -Resulta útil pensar las consultas SPARQL como un [Mad Lib](https://en.wikipedia.org/wiki/Mad_Libs) -un conjunto de oraciones con espacios en blanco-. La base de datos tomará esta consulta y encontrará cada conjunto de oraciones que encaje correctamente en estos espacios en blanco, devolviéndonos los valores coincidentes como una tabla. Veamos esta consulta SPARQL: - -``` -SELECT ?pintura -WHERE { - ?pintura <óleo sobre lienzo> . -} -``` - -En este consulta, `?pintura` representa el nodo (o nodos) que la bases de datos nos devolverá. Una vez recibida la consulta, la base de datos buscará todos los valores para `?pintura` que adecuadamente complete la declaración RDF ` <óleo sobre lienzo>`. - -{% include figure.html caption="Visualización de lo que nuestra consulta está buscando. Diagrama reconstruido por Nuria Rodríguez Ortega." filename="sparql-lod-02.png" %} - - -Cuando la consulta interroga la base de datos completa, esta busca los sujetos, predicados y objetos que coinciden con esta declaración, exluyendo, al mismo tiempo, el resto de datos. - -{% include figure.html filename="sparql-lod-03.png" caption="Visualización de la consulta SPARQL con los elementos mencionados en naranja y los elementos seleccionados (aquellos que nos serán devueltos en los resultados) en rojo. Diagrama reconstruido por Nuria Rodríguez Ortega." %} - -Nuestros resultados podrían tener este aspecto: - -| **pinturas** | -| --------------------- | -| La ronda de noche | -| La tasadora de perlas | - -Ahora bien, lo que hace a RDF y a SPARQL herramientas tan potentes es su habilidad para crear consultas complejas que referencian múltiples variables al mismo tiempo. Por ejemplo, podríamos buscar en nuestra seudobase de datos RDF pinturas creadas por cualquier artista que fuese holandés: - -``` -SELECT ?artista ?pintura -WHERE { - ?artista . - ?pintura ?artista . - } -``` - -En este ejemplo, hemos introducido una segunda variable: `?artista`. La base de datos RDF devolverá todas las combinaciones conincidentes de `?artista` y `?pintura` que encajen en ambas declaraciones. - -{% include figure.html filename="sparql-lod-04.png" caption="Visualización de la consulta SPARQL con los elementos mencionados en naranja y los elementos seleccionados (aquellos que serán recuperados en los resultados en rojo). Diagrama reconstruido por Nuria Rodríguez Ortega." %} - -| artistas | pinturas | -| ------------------ | --------------------- | -| Rembrandt van Rijn | La ronda de noche | -| Johannes Vermeer | La tasadora de perlas | - -### URI y literales - -Hasta ahora, hemos visto una representación facticia de RDF que utiliza un texto fácil de leer. Sin embargo, RDF se almacena principalmente en formato URI (*Uniform Resource Identifiers*), que separa las entidades conceptuales de sus etiquetas lingüísticas. (Ten en cuenta que una URL, o *Uniform Resource Locator*, es una URI accesible desde la web). En RDF real, nuestra declaración original: - -``` - . -``` - -sería más parecido a lo siguiente: - -``` - . -``` - -*N.B. el Rijksmuseum todavía no ha desarrollado su propio sitio LOD, por lo que en esta consulta la URI responde únicamente a objetivos de demostración.* - -A fin de obtener una versión legible desde el punto de vista humano de la información representada por cada una de estas URI, lo que hacemos realmente es recuperar más declaraciones RDF. Incluso el predicado en esta declaración tiene su propia etiqueta literal: - -``` - "La ronda de noche". - "fue creado por". - "Rembrandt van Rijn". -``` - -Como se puede observar, a diferencia de las URI que en esta consulta están enmarcadas por los signos `<>`, los *objetos* son cadenas de texto entrecomilladas. Esto es lo que se conoce como *literales* (*literals*). Los literales representan valores, mientras que las URI representan referencias. Por ejemplo, `` representa una entidad que puede referenciar (y puede ser referenciada por) muchas otras declaraciones (fechas de nacimiento, discípulos, miembros de la familia, etc.), mientras que la cadena de texto `"Rembrandt van Rijn"` solo se representa a sí misma. Otros valores literales en RDF incluyen fechas y números. - -Fijémenos ahora en los predicados de estas declaraciones, con nombres de dominio como `purl.org`, `w3.org` y `xmlns.com`. Estos son algunos de los numerosos proveedores de ontologías que ayudan a estandarizar el modo en que describimos relaciones entre bits de información como, "título", "etiqueta", "creador" o "nombre". Cuanto más trabajemos con RDF/LOD, más proveedores de este tipo encontraremos. - -Las URI pueden llegar a ser difíciles de manejar cuando se componen consultas SPARQL. Para simplificar este proceso se utilizan los *prefijos* (*prefixes*). Los prefijos son atajos que nos liberan de tener que escribir toda la larga cadena de caracteres que constituye una URI. Por ejemplo, recordemos el predicado para recuperar el título de *La ronda de noche*, `http://purl.org/dc/terms/title>`. Con los siguientes prefijos, solo necesitamos escribir `dct:title` cuando queramos utilizar un predicado `purl.org`. `dct:` representa la cadena completa `http://purl.org.dc/terms,` y `'title'` simplemente se agrega al final de este enlace. - -Por ejemplo, con el prefijo `PREFIX rkm: que representa la cadena completa `, agregado al inicio de nuestra consulta SPARQL, `http://data.rijksmuseum.nl/item/8909812347 <` se convierte en `rkm:item/8909812347`. - -Debemos ser conscientes de que los prefijos se pueden asignar arbitrariamente a cualquier abreviatura que queramos; así, diferentes puntos de entrada (*endpoints*) pueden utilizar prefijos ligeramente diferentes para el mismo espacio de nombre (*namespace*) (por ejemplo: `dct vs.` `dcterms` para ``). - -### Términos para revisar - -* **SPARQL** - *Protocol and RDF Query Language* - El lenguaje utilizado para interrogar bases de datos RDF u orientadas a grafos. -* **RDF** - *Resource Description Framework* - Un método para estructurar datos en forma de grafo o como una red de declaraciones conectadas más que como una serie de tablas. -* **LOD** - *Linked Open Data* (datos abiertos enlazados) - LOD son datos RDF publicados *online* en formato URI de modo que los desarrolladores pueden referenciarlos de manera fiable y sin ambigüedad. -* **declaración** - a veces denominada "tripleta", una declaración RDF es una unidad de conocimiento que comprende sujeto, predicado y objeto. -* **URI** - *Uniform Resource Identifier* - una cadena de caracteres que identifica un recurso. Las declaraciones RDF utilizan URI para enlazar varios recursos. Una URL, o *Uniform Resource Locator*, es un tipo de URI que apunta a un determinado recurso en la web. -* **literal** - En las declaraciones RDF, algunos objetos no referencian recursos con una URI sino que vehiculan un valor, que puede ser un texto (`"Rembrandt van Rijn"`), un número (`5`) o una fecha (`1606-06-15`). Estos objetos se conocen como literales. -* **prefijo** - A fin de simplificar las consultas SPARQL, un usuario puede especificar prefijos que funcionan como abreviaturas de las URI completas. Estas abreviaturas, o **QNAmes**, se utilizan también en los espacios de nombre (*namespaces*) de los documentos XML. - -## Consultas basadas en casos reales - -### Todas las declaraciones para un objeto - -Vamos a empezar nuestra primera consulta utilizando el [punto de entrada SPARQL del British Museum](http://collection.britishmuseum.org/sparql). Un punto de entrada SPARQL es una dirección web que acepta consultas SPARQL y devuelve resultados. El punto de entrada del British Museum funciona como muchos otros: cuando accedemos a él a través de un navegador web, encontramos una caja de texto para componer las consultas. - -{% include figure.html filename="sparql-lod-05.png" caption="Web del punto de entrada SPARQL del British Museum. Para todas las consultas de este tutorial, hay que asegurarse de haber dejado las casillas 'Include inferred' y 'Expand results over equivalent URIs' sin marcar." %} - - -Cuando empezamos a explorar una nueva base de datos RDF, resulta últil examinar, a modo de ejemplo, las relaciones que emanan de un [objeto en concreto](http://collection.britishmuseum.org/resource?uri=http://collection.britishmuseum.org/id/object/PPA82633). - -(Para cada una de las siguientes consultas, clica en el enlace "Run query" situado más abajo para ver los resultados. La puedes ejecutar tal y como está o modificarla antes. En este último caso, recuerda que es necesario dejar sin marcar la casilla "Include inferred" antes de ejecutar la consulta). - -``` -SELECT ?p ?o -WHERE { - ?p ?o . -} -``` - -[Run query](http://collection.britishmuseum.org/sparql?query=SELECT+*%0D%0AWHERE+%7B%0D%0A++%3Chttp%3A%2F%2Fcollection.britishmuseum.org%2Fid%2Fobject%2FPPA82633%3E+%3Fp+%3Fo+.%0D%0A++%7D&_implicit=false&_equivalent=false&_form=%2Fsparql) - -Con la orden `SELECT ?p ?o`, le estamos diciendo a la base de datos que nos devuelva los valores de `?p` y `?o` descritos en el comando `WHERE {}`. Esta consulta devuelve cada declaración para la cual nuestra obra de arte seleccionada, ``, es el sujeto. `?p` ocupa la posición central en la declaración RDF en el comando `WHERE {}`, por lo que esta devuelve cualquier predicado que coincide con la declaración, mientras que `?o`, en la posición final, devuelve todos los objetos. Aunque yo las he nombrado como `?p` y `?o`, en realidad, tal y como se puede ver en el ejemplo inferior, es posible nombrar estas variables del modo que nosotros queramos. De hecho, será útil darles nombres significativos para las consultas complejas que siguen a continuación. - -{% include figure.html filename="sparql-lod-06.png" caption="Listado inicial de todos los predicados y objetos asociados con una obra de arte en el British Museum." %} - - -El punto de entrada del Britism Museum formatea la tabla de resultados con enlaces para cada una de las variables, que son, en realidad, nodos RDF, por lo que clicando en cada uno de estos enlaces podemos ver todos los predicados y objetos para cada uno de los nodos seleccionados. Advierte que el British Musuem incluye automáticamente un amplio rango de prefijos SPARQL en sus consultas, por lo que encontraremos numerosos enlaces mostrados en su versión abreviada; si pasamos el ratón sobre ellos, podremos ver las URI sin abreviar. - -{% include figure.html filename="sparql-lod-07.png" caption="Visualización del conjunto de nodos recuperados a través de la primera consulta realizada a la base de datos del British Museum. Los elementos de este grafo coloreados en rojo se encuentran también en la tabla de resultados mostrada más arriba. Se han incluido niveles adicionales en la jerarquía para mostrar cómo esta obra en particular se encuentra conectada en el grafo general que constituye la base de datos del BM." %} - - -Veamos ahora cómo se almacena la información de tipo objeto: busca el predicado `` (marcado en la tabla anterior) y clica en el enlace `thes:x8577` para acceder al nodo que describe el tipo de objeto "print" (grabado). - -{% include figure.html filename="sparql-lod-08.png" caption="Página del recurso `thes:x8577` ('print') en el conjunto de datos enlazados del British Museum." %} - -Como se puede observar, este nodo tiene una etiqueta (*label*) en texto plano, así como enlaces a nodos del tipo "objetos artísticos" con los que se relaciona en el conjunto de la base de datos. - -### Consultas complejas - -Para encontrar otros objetos del mismo tipo descritos con la etiqueta "print", podemos invocar esta consulta: - -``` -PREFIX bmo: -PREFIX skos: - -SELECT ?object -WHERE { - - # Busca todos los valores de ?object que tengan un "object type" dado - ?object bmo:PX_object_type ?object_type . - - # El "object type" debería tener la etiqueta "print" - ?object_type skos:prefLabel "print" . -} -LIMIT 10 -``` - -[Run query](https://collection.britishmuseum.org/sparql#query=PREFIX+bmo%3A+%3Chttp%3A%2F%2Fwww.researchspace.org%2Fontology%2F%3E%0APREFIX+skos%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2004%2F02%2Fskos%2Fcore%23%3E%0A%0ASELECT+%3Fobject%0AWHERE+%7B%0A%0A++%23+Search+for+all+values+of+%3Fobject+that+have+a+given+%22object+type%22%0A++%3Fobject+bmo%3APX_object_type+%3Fobject_type+.%0A%0A++%23+That+object+type+should+have+the+label+%22print%22%0A++%3Fobject_type+skos%3AprefLabel+%22print%22+.%0A%7D%0ALIMIT+10) / [User-generated query](https://hypothes.is/a/AVLH7aAMvTW_3w8Ly19w) - -{% include figure.html filename="sparql-lod-09.png" caption="Tabla resultantes de nuestra consulta para todos los objetos del tipo 'print'." %} - -Recuerda que, dado que `"print"` funciona aquí como un literal, lo escribimos entrecomillado en nuestra consulta. Cuando se incluyen literales en las consultas SPARQL, la base de datos solo devuelve coincidencias exactas para estos valores. - -Advierte también que, dado que `?object_type` no se encuentra presente en el comando `SELECT`, este no se mostrará en la tabla de resultados. Sin embargo, resulta esencial estructurar nuestra consulta, porque es esto lo que permite conectar los puntos desde `?object` con la etiqueta `"print"`. - -### FILTER - -En los ejemplos anteriores, nuestra consulta SPARQL ha buscado una coincidencia exacta para el tipo de objeto con la etiqueta "print". Sin embargo, con frecuencia querremos encontrar valores literales que caen dentro de un determinado rango, como son las fechas. Para ello utilizaremos el comando `FILTER`. - -Para localizar las URI de todos los grabados presentes en la base de datos del British Museum creados entre 1580 y 1600, necesitaremos, en primer lugar, averiguar dónde se almacenan en la base de datos las fechas en relación con los objetos, y entonces añadir referencias a estas fechas en nuestra consulta. De manera similar al procedimiento que hemos seguido de un único enlace para determinar un tipo de objeto, debemos ahora movernos a través de diversos nodos para encontrar las fechas de producción asociadas a un objeto dado: - -{% include figure.html filename="sparql-lod-10.png" caption="Visualización de la parte del modelo de datos del British Museum donde las fechas de producción están conectadas a los objetos." %} - -``` -PREFIX bmo: -PREFIX skos: -PREFIX ecrm: -PREFIX xsd: - -# Recupera enlaces de objetos y fechas de creación -SELECT ?object ?date -WHERE { - - # Utilizaremos nuestro comando previo para buscar solo - # objetos del tipo "print" - ?object bmo:PX_object_type ?object_type . - ?object_type skos:prefLabel "print" . - - # Necesitamos enlazar diversos nodos para encontrar la - # fecha de creación asociada con un objeto - ?object ecrm:P108i_was_produced_by ?production . - ?production ecrm:P9_consists_of ?date_node . - ?date_node ecrm:P4_has_time-span ?timespan . - ?timespan ecrm:P82a_begin_of_the_begin ?date . - - # Como se ve, es necesario conectar unos cuantos pocos de puntos - # para llegar al nodo de la fecha. Ahora que lo tehemos, podemos - # filtrar nuestros resultados. Dado que estamos filtrando por fecha, - # debemos agregar la etiqueta ^^xsd:date después de nuestra cadena de fecha. - # Esta etiqueta le dice a la base de datos que interprete la cadena - # "1580-01-01" como la fecha 1 de enero de 1580. - - FILTER(?date >= "1580-01-01"^^xsd:date && - ?date <= "1600-01-01"^^xsd:date) -} -``` - -[Run query](https://collection.britishmuseum.org/sparql#query=PREFIX+bmo%3A+%3Chttp%3A%2F%2Fwww.researchspace.org%2Fontology%2F%3E%0APREFIX+skos%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2004%2F02%2Fskos%2Fcore%23%3E%0APREFIX+ecrm%3A+%3Chttp%3A%2F%2Fwww.cidoc-crm.org%2Fcidoc-crm%2F%3E%0APREFIX+xsd%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2001%2FXMLSchema%23%3E%0A%0A%23+Return+object+links+and+creation+date%0ASELECT+%3Fobject+%3Fdate%0AWHERE+%7B%0A%0A++%23+We'll+use+our+previous+command+to+search+only+for%0A++%23+objects+of+type+%22print%22%0A++%3Fobject+bmo%3APX_object_type+%3Fobject_type+.%0A++%3Fobject_type+skos%3AprefLabel+%22print%22+.%0A%0A++%23+We+need+to+link+though+several+nodes+to+find+the%0A++%23+creation+date+associated+with+an+object%0A++%3Fobject+ecrm%3AP108i_was_produced_by+%3Fproduction+.%0A++%3Fproduction+ecrm%3AP9_consists_of+%3Fdate_node+.%0A++%3Fdate_node+ecrm%3AP4_has_time-span+%3Ftimespan+.%0A++%3Ftimespan+ecrm%3AP82a_begin_of_the_begin+%3Fdate+.%0A%0A++%23+As+you+can+see%2C+we+need+to+connect+quite+a+few+dots%0A++%23+to+get+to+the+date+node!+Now+that+we+have+it%2C+we+can%0A++%23+filter+our+results.+Because+we+are+filtering+by+date%2C%0A++%23+we+must+attach+the+tag+%5E%5Exsd%3Adate+after+our+date+strings.%0A++%23+This+tag+tells+the+database+to+interpret+the+string%0A++%23+%221580-01-01%22+as+the+date+1+January+1580.%0A%0A++FILTER(%3Fdate+%3E%3D+%221580-01-01%22%5E%5Exsd%3Adate+%26%26%0A+++++++++%3Fdate+%3C%3D+%221600-01-01%22%5E%5Exsd%3Adate)%0A%7D) - -{% include figure.html filename="sparql-lod-11.png" caption="Todos los grabados del British Museum realizados entre 1580-1600." %} - - -### Agregación - -Hasta ahora, solo hemos utilizado el comando `SELECT` para recuperar una tabla de objetos. Sin embargo, SPARQL nos permite realizar análisis muchos más avanzados, como agrupaciones, cálculos y clasificaciones. - -Pongamos por caso que estuviésemos interesados en examinar los objetos realizados entre 1580 y 1600, pero que asimismo quisiésemos conocer cuántos objetos de cada tipo tiene el British Museum en su colección. En vez de limitar nuestros resultados a los objetos del tipo "print", en este caso utilizaríamos el operador `COUNT` para sumar los resultados de nuestra búsqueda en función del tipo al que pertenezcan. - -``` -PREFIX bmo: -PREFIX skos: -PREFIX ecrm: -PREFIX xsd: - -SELECT ?type (COUNT(?type) as ?n) -WHERE { - # Es necesario que indiquemos la variable ?object_type, - # sin embargo, ahora no es necesario que esta coincida con el valor "print" - - ?object bmo:PX_object_type ?object_type . - ?object_type skos:prefLabel ?type . - - # De nuevo, filtraremos por fecha - ?object ecrm:P108i_was_produced_by ?production . - ?production ecrm:P9_consists_of ?date_node . - ?date_node ecrm:P4_has_time-span ?timespan . - ?timespan ecrm:P82a_begin_of_the_begin ?date . - FILTER(?date >= "1580-01-01"^^xsd:date && - ?date <= "1600-01-01"^^xsd:date) -} -# El comando GROUP BY designa la variable que se sumará, -# y el comando ORDER BY DESC() clasifica los resultados -# en orden descedente. - -GROUP BY ?type -ORDER BY DESC(?n) -``` - -[Run query](https://collection.britishmuseum.org/sparql#query=PREFIX+bmo%3A+%3Chttp%3A%2F%2Fwww.researchspace.org%2Fontology%2F%3E%0APREFIX+skos%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2004%2F02%2Fskos%2Fcore%23%3E%0APREFIX+ecrm%3A+%3Chttp%3A%2F%2Fwww.cidoc-crm.org%2Fcidoc-crm%2F%3E%0APREFIX+xsd%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2001%2FXMLSchema%23%3E%0A%0ASELECT+%3Ftype+(COUNT(%3Ftype)+as+%3Fn)%0AWHERE+%7B%0A++%23+We+still+need+to+indicate+the+%3Fobject_type+variable%2C%0A++%23+however+we+will+not+require+it+to+match+%22print%22+this+time%0A%0A++%3Fobject+bmo%3APX_object_type+%3Fobject_type+.%0A++%3Fobject_type+skos%3AprefLabel+%3Ftype+.%0A%0A++%23+Once+again%2C+we+will+also+filter+by+date%0A++%3Fobject+ecrm%3AP108i_was_produced_by+%3Fproduction+.%0A++%3Fproduction+ecrm%3AP9_consists_of+%3Fdate_node+.%0A++%3Fdate_node+ecrm%3AP4_has_time-span+%3Ftimespan+.%0A++%3Ftimespan+ecrm%3AP82a_begin_of_the_begin+%3Fdate+.%0A++FILTER(%3Fdate+%3E%3D+%221580-01-01%22%5E%5Exsd%3Adate+%26%26%0A+++++++++%3Fdate+%3C%3D+%221600-01-01%22%5E%5Exsd%3Adate)%0A%7D%0A%23+The+GROUP+BY+command+designates+the+variable+to+tally+by%2C%0A%23+and+the+ORDER+BY+DESC()+command+sorts+the+results+by%0A%23+descending+number.%0AGROUP+BY+%3Ftype%0AORDER+BY+DESC(%3Fn)) - -{% include figure.html filename="sparql-lod-12.png" caption="Recuento de los objetos producidos entre 1580 y 1600 según el tipo al que pertenecen." %} - - -### Enlazando múltiples puntos de entrada SPARQL - -
    2018-06-13: Desafortunadamente, Europeana ha eliminado la opción de enlazar puntos de entrada externos por medio de consultas `SERVICE`, y, en consecuencia, esta sección ha dejado de funcionar. Mantenemos el texto que sigue porque creemos que puede tener valor como referencia y porque esperamos que el servicio de Europeana vuelva a estar operativo en el futuro.
    - -Hasta ahora, hemos construido consultas que buscan patrones en un único conjunto de datos. Sin embargo, el escenario ideal al que aspiran los partidarios de LOD viene dado por la posibilidad de enlazar múltiples bases de datos, lo que permitirá realizar consultas mucho más complejas al estar estas basadas en el conocimiento distribuido que es posible extraer de diversos espacios web. No obstante, esto resulta más fácil de decir que de hacer, y muchos puntos de entrada (incluido el del British Museum) todavía no referencian recursos de autoridad externos. - -Un punto de entrada que sí lo hace es el de [Europeana](http://sparql.europeana.eu/). Europeana ha creado enlaces entre los objetos de sus bases de datos y los registros de personas en [DBPedia](http://wiki.dbpedia.org/) y [VIAF](https://viaf.org/), los registros de lugares en [GeoNames](http://sws.geonames.org/), y los conceptos resgistrados el *Tesauro de Arte y Arquitectura* (AAT) del Getty Research Institute. SPARQL nos permite insertar declaraciones `SERVICE` que ordenan a la base de datos "llamar a un amigo" y ejecutar una porción de la consulta en una base de datos externa, utilizando estos resultados para completar la consulta en la base de datos local. Si bien esta lección no se dentendrá en los modelos de datos de Europeana y DBPedia en profundidad, la siguiente consulta nos permite ver cómo funciona la declaración `SELECT`. Cada uno de los lectores puede ejecutarla por sí mismo copiando y pegando el texto de la consulta en el punto de entrada de [Europeana](http://sparql.europeana.eu). (A fin de que la consulta funcione, en el punto de entrada de Europeana se debe configurar el menú "Sponging" para "Retrieve remote RDF data for all missing source graphs"). - -``` -PREFIX ore: -PREFIX edm: -PREFIX rdf: -PREFIX dbo: -PREFIX dbr: -PREFIX rdaGr2: - -# Encuentra todos los ?object relacionados por alguna ?property con un ?agent nacido en una -# ?dutch_city -SELECT ?object ?property ?agent ?dutch_city -WHERE { - ?proxy ?property ?agent . - ?proxy ore:proxyFor ?object . - - ?agent rdf:type edm:Agent . - ?agent rdaGr2:placeOfBirth ?dutch_city . - - # En DBPedia, ?dutch_city está definida por pertenecer al país "Netherlands" - # La declaración SERVICE pregunta a - # http://dbpdeia.org/sparql qué ciudades pertenecen al país - # "Netherlands". La respuesta obtenida de esta subconsulta se utilizará para - # completar nuestra consulta originaria sobre los objetos - # presentes en la base de datos de Europeana - - SERVICE { - ?dutch_city dbo:country dbr:Netherlands . - } -} -# Potencialmente, esta consulta puede devolvernos un elevado número de objetos, por lo que vamos -# a solicitar solo los cien primeros a fin de agilizar la búsqueda -LIMIT 100 -``` - -{% include figure.html filename="sparql-lod-13.png" caption="Visualización de la secuencia de la consulta de la solicitud SPARQL definida más arriba." %} - - -Una consulta interconectada como esta significa que podemos interrogar a Europeana sobre los objetos que cuentan con información geográfica (¿cuáles son las ciudades de Holanda?) sin necesidad de que Europeana tenga que almacenar y mantener esta información por sí misma. Es de esperar que, en el futuro, cada vez mayor cantidad de información LOD de carácter cultural esté enlazada con bases de datos autorizadas, como el ULAN (*Union List of Artist Names*) del [Getty Research Institute](http://www.getty.edu/research/). Esto permitirá, por ejemplo, que el British Museum "externalice" la información biográfica acudiendo a los recursos más completos del GRI. - -## Trabajando con resultados SPARQL - -Una vez que hemos construido y ejecutado una consulta, ¿qué hacemos ahora con estos resultados? Muchos puntos de entrada, como el del British Museum, ofrecen un navegador web que devuelve resultados legibles para los humanos. Sin embargo, el objetivo de los puntos de entrada SPARQL (y para eso están diseñados) es devolver datos estructurados para ser utilizados por otros programas. - -### Exportar resultados en formato CSV - -En la esquina superior derecha de la página de resultados del punto de entrada del BM, se encuentran enlaces para descargas en formato JSON y XML. Otros puntos de entrada también pueden ofrecer la opción de descargar los resultados en CSV/TSV; sin embargo, esta opción no siempre se encuentra disponible. Las salidas JSON y XML desde un punto de entrada SPARQL contienen no solo los valores devueltos por la declaración `SELECT`, sino también metadatos adicionales sobre tipos de variables e idiomas. - -El procesamiento de la versión XML de los resultados se puede realizar con herramientas tales como Beautiful Soup (véase la lección correspondiente en *[The Programming Historian](/lessons/intro-to-beautiful-soup.html)* u [OpenRefine](http://openrefine.org/)). Para convertir rápidamente los resultados JSON desde un punto de entrada SPARQL en un formato tabular, yo recomiendo la utilidad de la línea de comando gratuita [jg](https://stedolan.github.io/jq/download/). (Para un tutorial sobre cómo utilizar programas de línea de comando, véase ["Introduction to the Bash Command Line"](/lessons/intro-to-bash.html)). La siguiente consulta convertirá el formato especial JSON RDF en un fichero CSV, que podremos cargar en nuestro programa preferido para su posterior análisis y visualización: - -``` -jq -r '.head.vars as $fields | ($fields | @csv), (.results.bindings[] | [.[$fields[]].value] | @csv)' sparql.json > sparql.csv -``` - - -### Exportar resultados a Palladio - -La popular plataforma de análisis de datos [Palladio](http://hdlab.stanford.edu/palladio/) puede cargar directamente datos desde un punto de entrada SPARQL. En la parte inferior de la pantalla "Create a new project", el enlace "Load data from a SPARQL endpoint (beta)" nos proporciona un campo para escribir la dirección del punto de entrada y una caja para la consulta propiamente dicha. Dependiendo del punto de entrada, podemos necesitar especifidar el tipo de fichero de salida en la dirección del punto de entrada; por ejemplo, para cargar datos desde el punto de entrada del British Museum, debemos utilizar la dirección `http://collection.britishmuseum.org/sparql.json`. Trata de pegar la consulta de agregación que utilizamos más arriba para el recuento de obras de arte según su tipología y clica en "Run query". Palladio debería mostrar una tabla de previsualización como esta: - -{% include figure.html filename="sparql-lod-14.png" caption="Interfaz de Palladio para las consultas SPARQL." %} - - -Después de previsualizar los datos devueltos por el punto de entrada, clica en en botón "Load data" en la parte inferior de la pantalla para empezar a trabajar con ellos. (Véase esta lección de *[Programming Historian](/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas)* para un tutorial más detallado sobre Palladio). [Por ejemplo, podríamos realizar una consulta que devuelva enlaces a las imágenes de los grabados realizados entre 1580 y 1600](https://collection.britishmuseum.org/sparql?query=%23+Return+object+links+and+creation+date%0D%0APREFIX+bmo%3A+%3Chttp%3A%2F%2Fcollection.britishmuseum.org%2Fid%2Fontology%2F%3E%0D%0APREFIX+skos%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2004%2F02%2Fskos%2Fcore%23%3E%0D%0APREFIX+ecrm%3A+%3Chttp%3A%2F%2Ferlangen-crm.org%2Fcurrent%2F%3E%0D%0APREFIX+xsd%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2001%2FXMLSchema%23%3E%0D%0ASELECT+DISTINCT+%3Fobject+%3Fdate+%3Fimage%0D%0AWHERE+%7B%0D%0A%0D%0A++%23+We%27ll+use+our+previous+command+to+search+only+for+objects+of+type+%22print%22%0D%0A++%3Fobject+bmo%3APX_object_type+%3Fobject_type+.%0D%0A++%3Fobject_type+skos%3AprefLabel+%22print%22+.%0D%0A%0D%0A++%23+We+need+to+link+though+several+nodes+to+find+the+creation+date+associated%0D%0A++%23+with+an+object%0D%0A++%3Fobject+ecrm%3AP108i_was_produced_by+%3Fproduction+.%0D%0A++%3Fproduction+ecrm%3AP9_consists_of+%3Fdate_node+.%0D%0A++%3Fdate_node+ecrm%3AP4_has_time-span+%3Ftimespan+.%0D%0A++%3Ftimespan+ecrm%3AP82a_begin_of_the_begin+%3Fdate+.%0D%0A%0D%0A++%23+Yes%2C+we+need+to+connect+quite+a+few+dots+to+get+to+the+date+node%21+Now+that%0D%0A++%23+we+have+it%2C+we+can+filter+our+results.+Because+we+are+filtering+a+date%2C+we%0D%0A++%23+must+attach+the+xsd%3Adate+tag+to+our+date+strings+so+that+SPARQL+knows+how+to%0D%0A++%23+parse+them.%0D%0A%0D%0A++FILTER%28%3Fdate+%3E%3D+%221580-01-01%22%5E%5Exsd%3Adate+%26%26+%3Fdate+%3C%3D+%221600-01-01%22%5E%5Exsd%3Adate%29%0D%0A++%0D%0A++%3Fobject+bmo%3APX_has_main_representation+%3Fimage+.%0D%0A%7D%0D%0ALIMIT+100#query=%23+Return+object+links+and+creation+date%0APREFIX+bmo%3A+%3Chttp%3A%2F%2Fwww.researchspace.org%2Fontology%2F%3E%0APREFIX+skos%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2004%2F02%2Fskos%2Fcore%23%3E%0APREFIX+xsd%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2001%2FXMLSchema%23%3E%0APREFIX+ecrm%3A+%3Chttp%3A%2F%2Fwww.cidoc-crm.org%2Fcidoc-crm%2F%3E%0ASELECT+DISTINCT+%3Fobject+%3Fdate+%3Fimage%0AWHERE+%7B%0A++%0A++%23+We'll+use+our+previous+command+to+search+only+for+objects+of+type+%22print%22%0A++%3Fobject+bmo%3APX_object_type+%3Fobject_type+.%0A++%3Fobject_type+skos%3AprefLabel+%22print%22+.%0A%0A++%23+We+need+to+link+though+several+nodes+to+find+the+creation+date+associated%0A++%23+with+an+object%0A++%3Fobject+ecrm%3AP108i_was_produced_by+%3Fproduction+.%0A++%3Fproduction+ecrm%3AP9_consists_of+%3Fdate_node+.%0A++%3Fdate_node+ecrm%3AP4_has_time-span+%3Ftimespan+.%0A++%3Ftimespan+ecrm%3AP82a_begin_of_the_begin+%3Fdate+.%0A%0A++%0A++%23+Yes%2C+we+need+to+connect+quite+a+few+dots+to+get+to+the+date+node!+Now+that%0A++%23+we+have+it%2C+we+can+filter+our+results.+Because+we+are+filtering+a+date%2C+we%0A++%23+must+attach+the+xsd%3Adate+tag+to+our+date+strings+so+that+SPARQL+knows+how+to%0A++%23+parse+them.%0A%0A++FILTER(%3Fdate+%3E%3D+%221580-01-01%22%5E%5Exsd%3Adate+%26%26+%3Fdate+%3C%3D+%221600-01-01%22%5E%5Exsd%3Adate)%0A++%0A++%3Fobject+bmo%3APX_has_main_representation+%3Fimage+.%0A%7D%0ALIMIT+100), y representar estos datos como una galería de imágenes clasificadas por fecha: - -{% include figure.html filename="sparql-lod-15.png" caption="Galería de imágenes con línea de tiempo de sus fechas de creación generada utilizando Palladio." %} - - -Adviértase que Palladio está diseñado para funcionar con un conjunto relativamente pequeño de datos (del orden de cientos de miles de filas, no decenas de miles), por lo que pudiera ser necesario utilizar el comando LIMIT, que ya empleamos anteriormente en la consulta en el punto de entrada de Europeana, para reducir el número de resultados obtenidos y así evitar que el programa se quede bloqueado. - -## Lecturas adicionales - -En este tutorial hemos examinado la estructura de LOD y hemos realizado un ejemplo real de cómo escribir consultas SPARQL para la base de datos del British Museum. También hemos aprendido cómo utilizar comandos de agregación en SPARQL para agrupar, contar y clasificar resultados más allá de la simple operación de listarlos. - -Con todo, existen otras muchas maneras de modificar estas consultas, tales como introducir operadores `OR` y `UNION` (para describir consultas condicionales) y declaraciones `CONSTRUCT` (para inferir nuevos enlaces basados en reglas definidas), búsqueda de texto completo o llevar a cabo otras operaciones matemáticas más complejas que la del recuento. Para un informe más detallado de los comandos disponibles en SPARQL, véanse estos enlaces: - -* [Wikibooks SPARQL tutorial](https://en.wikibooks.org/wiki/XQuery/SPARQL_Tutorial) -* [Full W3C Overview of SPARQL](https://www.w3.org/TR/sparql11-overview/) - -Tanto la web de Europeana como la del Getty Vocabularies ofrecen ejemplos extensos y bastante complejos de consultas que pueden constituir buenos recursos para comprender cómo buscar en sus datos: - -* [Europeana SPARQL how-to](http://labs.europeana.eu/api/linked-open-data-SPARQL-endpoint) -* [Getty Vocabularies Example Queries](http://vocab.getty.edu/queries#Finding_Subjects) +--- +title: | + Uso de SPARQL para acceder a datos abiertos enlazados +authors: +- Matthew Lincoln +date: 2015-11-24 +translation_date: 2017-05-20 +editors: +- Fred Gibbs +reviewers: +- Patrick Murray-John +- Jason Heppler +- Will Hanley +- Fred Gibbs +translator: +- Nuria Rodríguez Ortega +translation-editor: +- Antonio Rojas Castro +translation-reviewer: +- Antonio Rojas Castro +- Juan Antonio Pastor Sánchez +review-ticket: https://github.com/programminghistorian/ph-submissions/issues/67 +layout: lesson +original: graph-databases-and-SPARQL +redirect_from: +- /es/lessons/graph-databases-and-SPARQL +- /es/lecciones/sparql-datos-abiertos-enlazados +difficulty: 2 +activity: acquiring +topics: [lod] +abstract: "Esta lección explica por qué numerosas instituciones culturales están adoptando bases de datos orientadas a grafos y cómo los investigadores pueden acceder a estos datos a través de consultas realizadas en el lenguaje llamado SPARQL." +retired: true +retirement-reason: | + El Museo Británico no ha mantenido el acceso a su base de datos de colecciones de una manera consistente. Aunque la sintaxis y los comandos de SPARQL siguen siendo correctos, las URLs a las que intentan conectarse son ahora demasiado inconsistentes para su uso en una lección funcional. +avatar_alt: Grabado con dos peces unidos por una rama en sus bocas. +doi: 10.46430/phes0027 +--- + +Objetivos de la lección +----------------------- + +Esta lección explica por qué numerosas instituciones culturales están adoptando bases de datos orientadas a grafos (*graph databases*) y cómo los investigadores pueden acceder a estos datos a través de consultas realizadas en el lenguaje llamado SPARQL. + +{% include toc.html %} + + + + + + +## Bases de datos orientadas a grafo, RDF y datos abiertos enlazados (Linked Open Data, LOD) + +Actualmente, numerosas instituciones culturales están ofreciendo información sobre sus colecciones a través de las denominadas API ([*Application Programming Interfaces*](/lessons/intro-to-the-zotero-api.html)). Estas API son instrumentos muy eficaces para acceder de manera automatizada a registros individuales, sin embargo, no constituyen el procedimiento ideal cuando tratamos con datos culturales debido a que las API están estructuradas para trabajar con un conjunto predeterminado de consultas (*queries*). Por ejemplo, un museo puede tener información sobre donantes, artistas, obras de arte, exposiciones, procedencia de sus obras (*provenance*), etc., pero su API puede ofrecer solo una recuperación orientada a objetos, haciendo difícil o imposible buscar datos relacionados con donantes, artistas, etc. Así pues, esta estructura es interesante si el objetivo es buscar información sobre objetos particulares; sin embargo, puede complicar la operación de agregar información sobre los artistas o donantes que también se encuentran registrados en la base de datos. + +Las bases de datos RDF son muy apropiadas para expresar relaciones complejas entre múltiples entidades, como personas, lugares, eventos y conceptos ligados a objetos individuales. Estas bases de datos se denominan habitualmente bases de datos orientadas a grafos (*graph databases*) porque estructuran la información como un grafo o red, donde un conjunto de recursos o nodos están conectados entre sí mediante aristas (o enlaces) que describen las relaciones establecidas entre dichos recursos y/o nodos. + +Dado que las bases de datos RDF admiten el uso de URL, estas pueden estar accesibles *online* y también pueden enlazarse a otras bases de datos, de ahí el término "datos abiertos enlazados" (*Linked Open Data*, LOD). Importantes colecciones artísticas, entre las que se incluyen las del [British Museum](https://collection.britishmuseum.org/), [Europeana](https://labs.europeana.eu/api/linked-open-data-introduction), el [Smithsonian American Art Museum](https://americanart.si.edu/) y el [Yale Center for British Art](https://britishart.yale.edu/collections/using-collections/technology/linked-open-data), han publicado sus colecciones de datos como LOD. El [Getty Vocabulary Program](https://vocab.getty.edu/) también ha publicado sus vocabularios controlados (TGN, ULAN y AAT) como LOD. + +SPARQL es el lenguaje utilizado para interrogar este tipo de bases de datos. Este lenguaje es particularmente potente porque obvia las perspectivas que los usuarios transfieren a los datos. Una consulta sobre objetos y una consulta sobre donantes son prácticamente equivalentes en estas bases de datos. Lamentablemente, numerosos tutoriales sobre SPARQL utilizan modelos de datos tan extremadamente simplificados que no son operativos cuando se trata de utilizar las complejas bases de datos desarrolladas por las instituciones culturales. Este tutorial ofrece un curso intensivo sobre SPARQL utilizando un conjunto de datos (*dataset*) que un humanista podría realmente encontrar en Internet. En concreto, en este tutorial aprenderemos cómo interrogar la colección LOD del British Museum. + +### RDF en pocas palabras + +RDF representa la información en una declaración triple -también llamada tripleta- que sigue la estructura sujeto-predicado-objeto. Por ejemplo: + +``` + . + +``` + +(Observa que, como toda buena oración, estas declaraciones terminan con un punto y final). + +En este ejemplo, el sujeto `` y el objeto `` pueden ser considerados como dos nodos de un grafo, donde el predicado `` define la arista -o relación- entre ellos. (Técnicamente, puede ser tratado en otras consultas como un objeto o un sujeto, pero esta cuestión escapa el alcance de este tutorial). + +Una seudobase de datos RDF podría contener declaraciones interrelacionadas entre sí, como las siguientes: + +``` +... + . + <1642>. + <óleo sobre lienzo>. + <1606>. + . + . + . + <óleo sobre lienzo>. +... +``` + +Si visualizásemos estas declaraciones como nodos y aristas de un grafo o red, la representación sería como sigue: + +{% include figure.html caption="Visualización en red del seudoRDF mostrado más arriba. Las flechas indican la 'dirección' del predicado. Por ejemplo, que '*La tasadora de perlas* fue creada por Vermeer' y no al revés. Diagrama reconstruido por Nuria Rodríguez Ortega." filename="sparql-lod-01.png" %} + +Las tradicionales bases de datos relacionales pueden distribuir atributos sobre obras de arte y artistas en tablas separadas. En las bases de datos RDF u orientadas a grafos, todos estos datos pertenencen a un mismo mismo grafo interconectado, lo que permite a los usuarios una mayor flexibilidad a la hora de decidir cómo quieren interrogar estos recursos. + +### Buscando RDF con SPARQL + +SPARQL nos permite traducir datos en grafo, intensamente enlazados, en datos normalizados en formato tabular, esto es, distribuidos en filas y columnas, que se pueden abrir en programas como Excel o importar a programas de visualización, tales como [plot.ly](https://plot.ly/) o [Palladio](https://hdlab.stanford.edu/palladio/). + +Resulta útil pensar las consultas SPARQL como un [Mad Lib](https://en.wikipedia.org/wiki/Mad_Libs) -un conjunto de oraciones con espacios en blanco-. La base de datos tomará esta consulta y encontrará cada conjunto de oraciones que encaje correctamente en estos espacios en blanco, devolviéndonos los valores coincidentes como una tabla. Veamos esta consulta SPARQL: + +``` +SELECT ?pintura +WHERE { + ?pintura <óleo sobre lienzo> . +} +``` + +En este consulta, `?pintura` representa el nodo (o nodos) que la bases de datos nos devolverá. Una vez recibida la consulta, la base de datos buscará todos los valores para `?pintura` que adecuadamente complete la declaración RDF ` <óleo sobre lienzo>`. + +{% include figure.html caption="Visualización de lo que nuestra consulta está buscando. Diagrama reconstruido por Nuria Rodríguez Ortega." filename="sparql-lod-02.png" %} + + +Cuando la consulta interroga la base de datos completa, esta busca los sujetos, predicados y objetos que coinciden con esta declaración, exluyendo, al mismo tiempo, el resto de datos. + +{% include figure.html filename="sparql-lod-03.png" caption="Visualización de la consulta SPARQL con los elementos mencionados en naranja y los elementos seleccionados (aquellos que nos serán devueltos en los resultados) en rojo. Diagrama reconstruido por Nuria Rodríguez Ortega." %} + +Nuestros resultados podrían tener este aspecto: + +| **pinturas** | +| --------------------- | +| La ronda de noche | +| La tasadora de perlas | + +Ahora bien, lo que hace a RDF y a SPARQL herramientas tan potentes es su habilidad para crear consultas complejas que referencian múltiples variables al mismo tiempo. Por ejemplo, podríamos buscar en nuestra seudobase de datos RDF pinturas creadas por cualquier artista que fuese holandés: + +``` +SELECT ?artista ?pintura +WHERE { + ?artista . + ?pintura ?artista . + } +``` + +En este ejemplo, hemos introducido una segunda variable: `?artista`. La base de datos RDF devolverá todas las combinaciones conincidentes de `?artista` y `?pintura` que encajen en ambas declaraciones. + +{% include figure.html filename="sparql-lod-04.png" caption="Visualización de la consulta SPARQL con los elementos mencionados en naranja y los elementos seleccionados (aquellos que serán recuperados en los resultados en rojo). Diagrama reconstruido por Nuria Rodríguez Ortega." %} + +| artistas | pinturas | +| ------------------ | --------------------- | +| Rembrandt van Rijn | La ronda de noche | +| Johannes Vermeer | La tasadora de perlas | + +### URI y literales + +Hasta ahora, hemos visto una representación facticia de RDF que utiliza un texto fácil de leer. Sin embargo, RDF se almacena principalmente en formato URI (*Uniform Resource Identifiers*), que separa las entidades conceptuales de sus etiquetas lingüísticas. (Ten en cuenta que una URL, o *Uniform Resource Locator*, es una URI accesible desde la web). En RDF real, nuestra declaración original: + +``` + . +``` + +sería más parecido a lo siguiente: + +``` + . +``` + +*N.B. el Rijksmuseum todavía no ha desarrollado su propio sitio LOD, por lo que en esta consulta la URI responde únicamente a objetivos de demostración.* + +A fin de obtener una versión legible desde el punto de vista humano de la información representada por cada una de estas URI, lo que hacemos realmente es recuperar más declaraciones RDF. Incluso el predicado en esta declaración tiene su propia etiqueta literal: + +``` + "La ronda de noche". + "fue creado por". + "Rembrandt van Rijn". +``` + +Como se puede observar, a diferencia de las URI que en esta consulta están enmarcadas por los signos `<>`, los *objetos* son cadenas de texto entrecomilladas. Esto es lo que se conoce como *literales* (*literals*). Los literales representan valores, mientras que las URI representan referencias. Por ejemplo, `` representa una entidad que puede referenciar (y puede ser referenciada por) muchas otras declaraciones (fechas de nacimiento, discípulos, miembros de la familia, etc.), mientras que la cadena de texto `"Rembrandt van Rijn"` solo se representa a sí misma. Otros valores literales en RDF incluyen fechas y números. + +Fijémenos ahora en los predicados de estas declaraciones, con nombres de dominio como `purl.org`, `w3.org` y `xmlns.com`. Estos son algunos de los numerosos proveedores de ontologías que ayudan a estandarizar el modo en que describimos relaciones entre bits de información como, "título", "etiqueta", "creador" o "nombre". Cuanto más trabajemos con RDF/LOD, más proveedores de este tipo encontraremos. + +Las URI pueden llegar a ser difíciles de manejar cuando se componen consultas SPARQL. Para simplificar este proceso se utilizan los *prefijos* (*prefixes*). Los prefijos son atajos que nos liberan de tener que escribir toda la larga cadena de caracteres que constituye una URI. Por ejemplo, recordemos el predicado para recuperar el título de *La ronda de noche*, `http://purl.org/dc/terms/title>`. Con los siguientes prefijos, solo necesitamos escribir `dct:title` cuando queramos utilizar un predicado `purl.org`. `dct:` representa la cadena completa `http://purl.org.dc/terms,` y `'title'` simplemente se agrega al final de este enlace. + +Por ejemplo, con el prefijo `PREFIX rkm: que representa la cadena completa `, agregado al inicio de nuestra consulta SPARQL, `http://data.rijksmuseum.nl/item/8909812347 <` se convierte en `rkm:item/8909812347`. + +Debemos ser conscientes de que los prefijos se pueden asignar arbitrariamente a cualquier abreviatura que queramos; así, diferentes puntos de entrada (*endpoints*) pueden utilizar prefijos ligeramente diferentes para el mismo espacio de nombre (*namespace*) (por ejemplo: `dct vs.` `dcterms` para ``). + +### Términos para revisar + +* **SPARQL** - *Protocol and RDF Query Language* - El lenguaje utilizado para interrogar bases de datos RDF u orientadas a grafos. +* **RDF** - *Resource Description Framework* - Un método para estructurar datos en forma de grafo o como una red de declaraciones conectadas más que como una serie de tablas. +* **LOD** - *Linked Open Data* (datos abiertos enlazados) - LOD son datos RDF publicados *online* en formato URI de modo que los desarrolladores pueden referenciarlos de manera fiable y sin ambigüedad. +* **declaración** - a veces denominada "tripleta", una declaración RDF es una unidad de conocimiento que comprende sujeto, predicado y objeto. +* **URI** - *Uniform Resource Identifier* - una cadena de caracteres que identifica un recurso. Las declaraciones RDF utilizan URI para enlazar varios recursos. Una URL, o *Uniform Resource Locator*, es un tipo de URI que apunta a un determinado recurso en la web. +* **literal** - En las declaraciones RDF, algunos objetos no referencian recursos con una URI sino que vehiculan un valor, que puede ser un texto (`"Rembrandt van Rijn"`), un número (`5`) o una fecha (`1606-06-15`). Estos objetos se conocen como literales. +* **prefijo** - A fin de simplificar las consultas SPARQL, un usuario puede especificar prefijos que funcionan como abreviaturas de las URI completas. Estas abreviaturas, o **QNAmes**, se utilizan también en los espacios de nombre (*namespaces*) de los documentos XML. + +## Consultas basadas en casos reales + +### Todas las declaraciones para un objeto + +Vamos a empezar nuestra primera consulta utilizando el [punto de entrada SPARQL del British Museum](https://collection.britishmuseum.org/sparql). Un punto de entrada SPARQL es una dirección web que acepta consultas SPARQL y devuelve resultados. El punto de entrada del British Museum funciona como muchos otros: cuando accedemos a él a través de un navegador web, encontramos una caja de texto para componer las consultas. + +{% include figure.html filename="sparql-lod-05.png" caption="Web del punto de entrada SPARQL del British Museum. Para todas las consultas de este tutorial, hay que asegurarse de haber dejado las casillas 'Include inferred' y 'Expand results over equivalent URIs' sin marcar." %} + + +Cuando empezamos a explorar una nueva base de datos RDF, resulta últil examinar, a modo de ejemplo, las relaciones que emanan de un [objeto en concreto](https://collection.britishmuseum.org/resource?uri=https://collection.britishmuseum.org/id/object/PPA82633). + +(Para cada una de las siguientes consultas, clica en el enlace "Run query" situado más abajo para ver los resultados. La puedes ejecutar tal y como está o modificarla antes. En este último caso, recuerda que es necesario dejar sin marcar la casilla "Include inferred" antes de ejecutar la consulta). + +``` +SELECT ?p ?o +WHERE { + ?p ?o . +} +``` + +[Run query](https://collection.britishmuseum.org/sparql?query=SELECT+*%0D%0AWHERE+%7B%0D%0A++%3Chttp%3A%2F%2Fcollection.britishmuseum.org%2Fid%2Fobject%2FPPA82633%3E+%3Fp+%3Fo+.%0D%0A++%7D&_implicit=false&_equivalent=false&_form=%2Fsparql) + +Con la orden `SELECT ?p ?o`, le estamos diciendo a la base de datos que nos devuelva los valores de `?p` y `?o` descritos en el comando `WHERE {}`. Esta consulta devuelve cada declaración para la cual nuestra obra de arte seleccionada, ``, es el sujeto. `?p` ocupa la posición central en la declaración RDF en el comando `WHERE {}`, por lo que esta devuelve cualquier predicado que coincide con la declaración, mientras que `?o`, en la posición final, devuelve todos los objetos. Aunque yo las he nombrado como `?p` y `?o`, en realidad, tal y como se puede ver en el ejemplo inferior, es posible nombrar estas variables del modo que nosotros queramos. De hecho, será útil darles nombres significativos para las consultas complejas que siguen a continuación. + +{% include figure.html filename="sparql-lod-06.png" caption="Listado inicial de todos los predicados y objetos asociados con una obra de arte en el British Museum." %} + + +El punto de entrada del Britism Museum formatea la tabla de resultados con enlaces para cada una de las variables, que son, en realidad, nodos RDF, por lo que clicando en cada uno de estos enlaces podemos ver todos los predicados y objetos para cada uno de los nodos seleccionados. Advierte que el British Musuem incluye automáticamente un amplio rango de prefijos SPARQL en sus consultas, por lo que encontraremos numerosos enlaces mostrados en su versión abreviada; si pasamos el ratón sobre ellos, podremos ver las URI sin abreviar. + +{% include figure.html filename="sparql-lod-07.png" caption="Visualización del conjunto de nodos recuperados a través de la primera consulta realizada a la base de datos del British Museum. Los elementos de este grafo coloreados en rojo se encuentran también en la tabla de resultados mostrada más arriba. Se han incluido niveles adicionales en la jerarquía para mostrar cómo esta obra en particular se encuentra conectada en el grafo general que constituye la base de datos del BM." %} + + +Veamos ahora cómo se almacena la información de tipo objeto: busca el predicado `` (marcado en la tabla anterior) y clica en el enlace `thes:x8577` para acceder al nodo que describe el tipo de objeto "print" (grabado). + +{% include figure.html filename="sparql-lod-08.png" caption="Página del recurso `thes:x8577` ('print') en el conjunto de datos enlazados del British Museum." %} + +Como se puede observar, este nodo tiene una etiqueta (*label*) en texto plano, así como enlaces a nodos del tipo "objetos artísticos" con los que se relaciona en el conjunto de la base de datos. + +### Consultas complejas + +Para encontrar otros objetos del mismo tipo descritos con la etiqueta "print", podemos invocar esta consulta: + +``` +PREFIX bmo: +PREFIX skos: + +SELECT ?object +WHERE { + + # Busca todos los valores de ?object que tengan un "object type" dado + ?object bmo:PX_object_type ?object_type . + + # El "object type" debería tener la etiqueta "print" + ?object_type skos:prefLabel "print" . +} +LIMIT 10 +``` + +[Run query](https://collection.britishmuseum.org/sparql#query=PREFIX+bmo%3A+%3Chttp%3A%2F%2Fwww.researchspace.org%2Fontology%2F%3E%0APREFIX+skos%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2004%2F02%2Fskos%2Fcore%23%3E%0A%0ASELECT+%3Fobject%0AWHERE+%7B%0A%0A++%23+Search+for+all+values+of+%3Fobject+that+have+a+given+%22object+type%22%0A++%3Fobject+bmo%3APX_object_type+%3Fobject_type+.%0A%0A++%23+That+object+type+should+have+the+label+%22print%22%0A++%3Fobject_type+skos%3AprefLabel+%22print%22+.%0A%7D%0ALIMIT+10) / [User-generated query](https://hypothes.is/a/AVLH7aAMvTW_3w8Ly19w) + +{% include figure.html filename="sparql-lod-09.png" caption="Tabla resultantes de nuestra consulta para todos los objetos del tipo 'print'." %} + +Recuerda que, dado que `"print"` funciona aquí como un literal, lo escribimos entrecomillado en nuestra consulta. Cuando se incluyen literales en las consultas SPARQL, la base de datos solo devuelve coincidencias exactas para estos valores. + +Advierte también que, dado que `?object_type` no se encuentra presente en el comando `SELECT`, este no se mostrará en la tabla de resultados. Sin embargo, resulta esencial estructurar nuestra consulta, porque es esto lo que permite conectar los puntos desde `?object` con la etiqueta `"print"`. + +### FILTER + +En los ejemplos anteriores, nuestra consulta SPARQL ha buscado una coincidencia exacta para el tipo de objeto con la etiqueta "print". Sin embargo, con frecuencia querremos encontrar valores literales que caen dentro de un determinado rango, como son las fechas. Para ello utilizaremos el comando `FILTER`. + +Para localizar las URI de todos los grabados presentes en la base de datos del British Museum creados entre 1580 y 1600, necesitaremos, en primer lugar, averiguar dónde se almacenan en la base de datos las fechas en relación con los objetos, y entonces añadir referencias a estas fechas en nuestra consulta. De manera similar al procedimiento que hemos seguido de un único enlace para determinar un tipo de objeto, debemos ahora movernos a través de diversos nodos para encontrar las fechas de producción asociadas a un objeto dado: + +{% include figure.html filename="sparql-lod-10.png" caption="Visualización de la parte del modelo de datos del British Museum donde las fechas de producción están conectadas a los objetos." %} + +``` +PREFIX bmo: +PREFIX skos: +PREFIX ecrm: +PREFIX xsd: + +# Recupera enlaces de objetos y fechas de creación +SELECT ?object ?date +WHERE { + + # Utilizaremos nuestro comando previo para buscar solo + # objetos del tipo "print" + ?object bmo:PX_object_type ?object_type . + ?object_type skos:prefLabel "print" . + + # Necesitamos enlazar diversos nodos para encontrar la + # fecha de creación asociada con un objeto + ?object ecrm:P108i_was_produced_by ?production . + ?production ecrm:P9_consists_of ?date_node . + ?date_node ecrm:P4_has_time-span ?timespan . + ?timespan ecrm:P82a_begin_of_the_begin ?date . + + # Como se ve, es necesario conectar unos cuantos pocos de puntos + # para llegar al nodo de la fecha. Ahora que lo tehemos, podemos + # filtrar nuestros resultados. Dado que estamos filtrando por fecha, + # debemos agregar la etiqueta ^^xsd:date después de nuestra cadena de fecha. + # Esta etiqueta le dice a la base de datos que interprete la cadena + # "1580-01-01" como la fecha 1 de enero de 1580. + + FILTER(?date >= "1580-01-01"^^xsd:date && + ?date <= "1600-01-01"^^xsd:date) +} +``` + +[Run query](https://collection.britishmuseum.org/sparql#query=PREFIX+bmo%3A+%3Chttp%3A%2F%2Fwww.researchspace.org%2Fontology%2F%3E%0APREFIX+skos%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2004%2F02%2Fskos%2Fcore%23%3E%0APREFIX+ecrm%3A+%3Chttp%3A%2F%2Fwww.cidoc-crm.org%2Fcidoc-crm%2F%3E%0APREFIX+xsd%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2001%2FXMLSchema%23%3E%0A%0A%23+Return+object+links+and+creation+date%0ASELECT+%3Fobject+%3Fdate%0AWHERE+%7B%0A%0A++%23+We'll+use+our+previous+command+to+search+only+for%0A++%23+objects+of+type+%22print%22%0A++%3Fobject+bmo%3APX_object_type+%3Fobject_type+.%0A++%3Fobject_type+skos%3AprefLabel+%22print%22+.%0A%0A++%23+We+need+to+link+though+several+nodes+to+find+the%0A++%23+creation+date+associated+with+an+object%0A++%3Fobject+ecrm%3AP108i_was_produced_by+%3Fproduction+.%0A++%3Fproduction+ecrm%3AP9_consists_of+%3Fdate_node+.%0A++%3Fdate_node+ecrm%3AP4_has_time-span+%3Ftimespan+.%0A++%3Ftimespan+ecrm%3AP82a_begin_of_the_begin+%3Fdate+.%0A%0A++%23+As+you+can+see%2C+we+need+to+connect+quite+a+few+dots%0A++%23+to+get+to+the+date+node!+Now+that+we+have+it%2C+we+can%0A++%23+filter+our+results.+Because+we+are+filtering+by+date%2C%0A++%23+we+must+attach+the+tag+%5E%5Exsd%3Adate+after+our+date+strings.%0A++%23+This+tag+tells+the+database+to+interpret+the+string%0A++%23+%221580-01-01%22+as+the+date+1+January+1580.%0A%0A++FILTER(%3Fdate+%3E%3D+%221580-01-01%22%5E%5Exsd%3Adate+%26%26%0A+++++++++%3Fdate+%3C%3D+%221600-01-01%22%5E%5Exsd%3Adate)%0A%7D) + +{% include figure.html filename="sparql-lod-11.png" caption="Todos los grabados del British Museum realizados entre 1580-1600." %} + + +### Agregación + +Hasta ahora, solo hemos utilizado el comando `SELECT` para recuperar una tabla de objetos. Sin embargo, SPARQL nos permite realizar análisis muchos más avanzados, como agrupaciones, cálculos y clasificaciones. + +Pongamos por caso que estuviésemos interesados en examinar los objetos realizados entre 1580 y 1600, pero que asimismo quisiésemos conocer cuántos objetos de cada tipo tiene el British Museum en su colección. En vez de limitar nuestros resultados a los objetos del tipo "print", en este caso utilizaríamos el operador `COUNT` para sumar los resultados de nuestra búsqueda en función del tipo al que pertenezcan. + +``` +PREFIX bmo: +PREFIX skos: +PREFIX ecrm: +PREFIX xsd: + +SELECT ?type (COUNT(?type) as ?n) +WHERE { + # Es necesario que indiquemos la variable ?object_type, + # sin embargo, ahora no es necesario que esta coincida con el valor "print" + + ?object bmo:PX_object_type ?object_type . + ?object_type skos:prefLabel ?type . + + # De nuevo, filtraremos por fecha + ?object ecrm:P108i_was_produced_by ?production . + ?production ecrm:P9_consists_of ?date_node . + ?date_node ecrm:P4_has_time-span ?timespan . + ?timespan ecrm:P82a_begin_of_the_begin ?date . + FILTER(?date >= "1580-01-01"^^xsd:date && + ?date <= "1600-01-01"^^xsd:date) +} +# El comando GROUP BY designa la variable que se sumará, +# y el comando ORDER BY DESC() clasifica los resultados +# en orden descedente. + +GROUP BY ?type +ORDER BY DESC(?n) +``` + +[Run query](https://collection.britishmuseum.org/sparql#query=PREFIX+bmo%3A+%3Chttp%3A%2F%2Fwww.researchspace.org%2Fontology%2F%3E%0APREFIX+skos%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2004%2F02%2Fskos%2Fcore%23%3E%0APREFIX+ecrm%3A+%3Chttp%3A%2F%2Fwww.cidoc-crm.org%2Fcidoc-crm%2F%3E%0APREFIX+xsd%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2001%2FXMLSchema%23%3E%0A%0ASELECT+%3Ftype+(COUNT(%3Ftype)+as+%3Fn)%0AWHERE+%7B%0A++%23+We+still+need+to+indicate+the+%3Fobject_type+variable%2C%0A++%23+however+we+will+not+require+it+to+match+%22print%22+this+time%0A%0A++%3Fobject+bmo%3APX_object_type+%3Fobject_type+.%0A++%3Fobject_type+skos%3AprefLabel+%3Ftype+.%0A%0A++%23+Once+again%2C+we+will+also+filter+by+date%0A++%3Fobject+ecrm%3AP108i_was_produced_by+%3Fproduction+.%0A++%3Fproduction+ecrm%3AP9_consists_of+%3Fdate_node+.%0A++%3Fdate_node+ecrm%3AP4_has_time-span+%3Ftimespan+.%0A++%3Ftimespan+ecrm%3AP82a_begin_of_the_begin+%3Fdate+.%0A++FILTER(%3Fdate+%3E%3D+%221580-01-01%22%5E%5Exsd%3Adate+%26%26%0A+++++++++%3Fdate+%3C%3D+%221600-01-01%22%5E%5Exsd%3Adate)%0A%7D%0A%23+The+GROUP+BY+command+designates+the+variable+to+tally+by%2C%0A%23+and+the+ORDER+BY+DESC()+command+sorts+the+results+by%0A%23+descending+number.%0AGROUP+BY+%3Ftype%0AORDER+BY+DESC(%3Fn)) + +{% include figure.html filename="sparql-lod-12.png" caption="Recuento de los objetos producidos entre 1580 y 1600 según el tipo al que pertenecen." %} + + +### Enlazando múltiples puntos de entrada SPARQL + +
    2018-06-13: Desafortunadamente, Europeana ha eliminado la opción de enlazar puntos de entrada externos por medio de consultas `SERVICE`, y, en consecuencia, esta sección ha dejado de funcionar. Mantenemos el texto que sigue porque creemos que puede tener valor como referencia y porque esperamos que el servicio de Europeana vuelva a estar operativo en el futuro.
    + +Hasta ahora, hemos construido consultas que buscan patrones en un único conjunto de datos. Sin embargo, el escenario ideal al que aspiran los partidarios de LOD viene dado por la posibilidad de enlazar múltiples bases de datos, lo que permitirá realizar consultas mucho más complejas al estar estas basadas en el conocimiento distribuido que es posible extraer de diversos espacios web. No obstante, esto resulta más fácil de decir que de hacer, y muchos puntos de entrada (incluido el del British Museum) todavía no referencian recursos de autoridad externos. + +Un punto de entrada que sí lo hace es el de [Europeana](https://sparql.europeana.eu/). Europeana ha creado enlaces entre los objetos de sus bases de datos y los registros de personas en [DBPedia](https://wiki.dbpedia.org/) y [VIAF](https://viaf.org/), los registros de lugares en [GeoNames](https://sws.geonames.org/), y los conceptos resgistrados el *Tesauro de Arte y Arquitectura* (AAT) del Getty Research Institute. SPARQL nos permite insertar declaraciones `SERVICE` que ordenan a la base de datos "llamar a un amigo" y ejecutar una porción de la consulta en una base de datos externa, utilizando estos resultados para completar la consulta en la base de datos local. Si bien esta lección no se dentendrá en los modelos de datos de Europeana y DBPedia en profundidad, la siguiente consulta nos permite ver cómo funciona la declaración `SELECT`. Cada uno de los lectores puede ejecutarla por sí mismo copiando y pegando el texto de la consulta en el punto de entrada de [Europeana](https://sparql.europeana.eu). (A fin de que la consulta funcione, en el punto de entrada de Europeana se debe configurar el menú "Sponging" para "Retrieve remote RDF data for all missing source graphs"). + +``` +PREFIX ore: +PREFIX edm: +PREFIX rdf: +PREFIX dbo: +PREFIX dbr: +PREFIX rdaGr2: + +# Encuentra todos los ?object relacionados por alguna ?property con un ?agent nacido en una +# ?dutch_city +SELECT ?object ?property ?agent ?dutch_city +WHERE { + ?proxy ?property ?agent . + ?proxy ore:proxyFor ?object . + + ?agent rdf:type edm:Agent . + ?agent rdaGr2:placeOfBirth ?dutch_city . + + # En DBPedia, ?dutch_city está definida por pertenecer al país "Netherlands" + # La declaración SERVICE pregunta a + # http://dbpdeia.org/sparql qué ciudades pertenecen al país + # "Netherlands". La respuesta obtenida de esta subconsulta se utilizará para + # completar nuestra consulta originaria sobre los objetos + # presentes en la base de datos de Europeana + + SERVICE { + ?dutch_city dbo:country dbr:Netherlands . + } +} +# Potencialmente, esta consulta puede devolvernos un elevado número de objetos, por lo que vamos +# a solicitar solo los cien primeros a fin de agilizar la búsqueda +LIMIT 100 +``` + +{% include figure.html filename="sparql-lod-13.png" caption="Visualización de la secuencia de la consulta de la solicitud SPARQL definida más arriba." %} + + +Una consulta interconectada como esta significa que podemos interrogar a Europeana sobre los objetos que cuentan con información geográfica (¿cuáles son las ciudades de Holanda?) sin necesidad de que Europeana tenga que almacenar y mantener esta información por sí misma. Es de esperar que, en el futuro, cada vez mayor cantidad de información LOD de carácter cultural esté enlazada con bases de datos autorizadas, como el ULAN (*Union List of Artist Names*) del [Getty Research Institute](https://www.getty.edu/research/). Esto permitirá, por ejemplo, que el British Museum "externalice" la información biográfica acudiendo a los recursos más completos del GRI. + +## Trabajando con resultados SPARQL + +Una vez que hemos construido y ejecutado una consulta, ¿qué hacemos ahora con estos resultados? Muchos puntos de entrada, como el del British Museum, ofrecen un navegador web que devuelve resultados legibles para los humanos. Sin embargo, el objetivo de los puntos de entrada SPARQL (y para eso están diseñados) es devolver datos estructurados para ser utilizados por otros programas. + +### Exportar resultados en formato CSV + +En la esquina superior derecha de la página de resultados del punto de entrada del BM, se encuentran enlaces para descargas en formato JSON y XML. Otros puntos de entrada también pueden ofrecer la opción de descargar los resultados en CSV/TSV; sin embargo, esta opción no siempre se encuentra disponible. Las salidas JSON y XML desde un punto de entrada SPARQL contienen no solo los valores devueltos por la declaración `SELECT`, sino también metadatos adicionales sobre tipos de variables e idiomas. + +El procesamiento de la versión XML de los resultados se puede realizar con herramientas tales como Beautiful Soup (véase la lección correspondiente en *[The Programming Historian](/lessons/intro-to-beautiful-soup.html)* u [OpenRefine](https://openrefine.org/)). Para convertir rápidamente los resultados JSON desde un punto de entrada SPARQL en un formato tabular, yo recomiendo la utilidad de la línea de comando gratuita [jg](https://stedolan.github.io/jq/download/). (Para un tutorial sobre cómo utilizar programas de línea de comando, véase ["Introduction to the Bash Command Line"](/lessons/intro-to-bash.html)). La siguiente consulta convertirá el formato especial JSON RDF en un fichero CSV, que podremos cargar en nuestro programa preferido para su posterior análisis y visualización: + +``` +jq -r '.head.vars as $fields | ($fields | @csv), (.results.bindings[] | [.[$fields[]].value] | @csv)' sparql.json > sparql.csv +``` + + +### Exportar resultados a Palladio + +La popular plataforma de análisis de datos [Palladio](https://hdlab.stanford.edu/palladio/) puede cargar directamente datos desde un punto de entrada SPARQL. En la parte inferior de la pantalla "Create a new project", el enlace "Load data from a SPARQL endpoint (beta)" nos proporciona un campo para escribir la dirección del punto de entrada y una caja para la consulta propiamente dicha. Dependiendo del punto de entrada, podemos necesitar especifidar el tipo de fichero de salida en la dirección del punto de entrada; por ejemplo, para cargar datos desde el punto de entrada del British Museum, debemos utilizar la dirección `http://collection.britishmuseum.org/sparql.json`. Trata de pegar la consulta de agregación que utilizamos más arriba para el recuento de obras de arte según su tipología y clica en "Run query". Palladio debería mostrar una tabla de previsualización como esta: + +{% include figure.html filename="sparql-lod-14.png" caption="Interfaz de Palladio para las consultas SPARQL." %} + + +Después de previsualizar los datos devueltos por el punto de entrada, clica en en botón "Load data" en la parte inferior de la pantalla para empezar a trabajar con ellos. (Véase esta lección de *[Programming Historian](/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas)* para un tutorial más detallado sobre Palladio). [Por ejemplo, podríamos realizar una consulta que devuelva enlaces a las imágenes de los grabados realizados entre 1580 y 1600](https://collection.britishmuseum.org/sparql?query=%23+Return+object+links+and+creation+date%0D%0APREFIX+bmo%3A+%3Chttp%3A%2F%2Fcollection.britishmuseum.org%2Fid%2Fontology%2F%3E%0D%0APREFIX+skos%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2004%2F02%2Fskos%2Fcore%23%3E%0D%0APREFIX+ecrm%3A+%3Chttp%3A%2F%2Ferlangen-crm.org%2Fcurrent%2F%3E%0D%0APREFIX+xsd%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2001%2FXMLSchema%23%3E%0D%0ASELECT+DISTINCT+%3Fobject+%3Fdate+%3Fimage%0D%0AWHERE+%7B%0D%0A%0D%0A++%23+We%27ll+use+our+previous+command+to+search+only+for+objects+of+type+%22print%22%0D%0A++%3Fobject+bmo%3APX_object_type+%3Fobject_type+.%0D%0A++%3Fobject_type+skos%3AprefLabel+%22print%22+.%0D%0A%0D%0A++%23+We+need+to+link+though+several+nodes+to+find+the+creation+date+associated%0D%0A++%23+with+an+object%0D%0A++%3Fobject+ecrm%3AP108i_was_produced_by+%3Fproduction+.%0D%0A++%3Fproduction+ecrm%3AP9_consists_of+%3Fdate_node+.%0D%0A++%3Fdate_node+ecrm%3AP4_has_time-span+%3Ftimespan+.%0D%0A++%3Ftimespan+ecrm%3AP82a_begin_of_the_begin+%3Fdate+.%0D%0A%0D%0A++%23+Yes%2C+we+need+to+connect+quite+a+few+dots+to+get+to+the+date+node%21+Now+that%0D%0A++%23+we+have+it%2C+we+can+filter+our+results.+Because+we+are+filtering+a+date%2C+we%0D%0A++%23+must+attach+the+xsd%3Adate+tag+to+our+date+strings+so+that+SPARQL+knows+how+to%0D%0A++%23+parse+them.%0D%0A%0D%0A++FILTER%28%3Fdate+%3E%3D+%221580-01-01%22%5E%5Exsd%3Adate+%26%26+%3Fdate+%3C%3D+%221600-01-01%22%5E%5Exsd%3Adate%29%0D%0A++%0D%0A++%3Fobject+bmo%3APX_has_main_representation+%3Fimage+.%0D%0A%7D%0D%0ALIMIT+100#query=%23+Return+object+links+and+creation+date%0APREFIX+bmo%3A+%3Chttp%3A%2F%2Fwww.researchspace.org%2Fontology%2F%3E%0APREFIX+skos%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2004%2F02%2Fskos%2Fcore%23%3E%0APREFIX+xsd%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2001%2FXMLSchema%23%3E%0APREFIX+ecrm%3A+%3Chttp%3A%2F%2Fwww.cidoc-crm.org%2Fcidoc-crm%2F%3E%0ASELECT+DISTINCT+%3Fobject+%3Fdate+%3Fimage%0AWHERE+%7B%0A++%0A++%23+We'll+use+our+previous+command+to+search+only+for+objects+of+type+%22print%22%0A++%3Fobject+bmo%3APX_object_type+%3Fobject_type+.%0A++%3Fobject_type+skos%3AprefLabel+%22print%22+.%0A%0A++%23+We+need+to+link+though+several+nodes+to+find+the+creation+date+associated%0A++%23+with+an+object%0A++%3Fobject+ecrm%3AP108i_was_produced_by+%3Fproduction+.%0A++%3Fproduction+ecrm%3AP9_consists_of+%3Fdate_node+.%0A++%3Fdate_node+ecrm%3AP4_has_time-span+%3Ftimespan+.%0A++%3Ftimespan+ecrm%3AP82a_begin_of_the_begin+%3Fdate+.%0A%0A++%0A++%23+Yes%2C+we+need+to+connect+quite+a+few+dots+to+get+to+the+date+node!+Now+that%0A++%23+we+have+it%2C+we+can+filter+our+results.+Because+we+are+filtering+a+date%2C+we%0A++%23+must+attach+the+xsd%3Adate+tag+to+our+date+strings+so+that+SPARQL+knows+how+to%0A++%23+parse+them.%0A%0A++FILTER(%3Fdate+%3E%3D+%221580-01-01%22%5E%5Exsd%3Adate+%26%26+%3Fdate+%3C%3D+%221600-01-01%22%5E%5Exsd%3Adate)%0A++%0A++%3Fobject+bmo%3APX_has_main_representation+%3Fimage+.%0A%7D%0ALIMIT+100), y representar estos datos como una galería de imágenes clasificadas por fecha: + +{% include figure.html filename="sparql-lod-15.png" caption="Galería de imágenes con línea de tiempo de sus fechas de creación generada utilizando Palladio." %} + + +Adviértase que Palladio está diseñado para funcionar con un conjunto relativamente pequeño de datos (del orden de cientos de miles de filas, no decenas de miles), por lo que pudiera ser necesario utilizar el comando LIMIT, que ya empleamos anteriormente en la consulta en el punto de entrada de Europeana, para reducir el número de resultados obtenidos y así evitar que el programa se quede bloqueado. + +## Lecturas adicionales + +En este tutorial hemos examinado la estructura de LOD y hemos realizado un ejemplo real de cómo escribir consultas SPARQL para la base de datos del British Museum. También hemos aprendido cómo utilizar comandos de agregación en SPARQL para agrupar, contar y clasificar resultados más allá de la simple operación de listarlos. + +Con todo, existen otras muchas maneras de modificar estas consultas, tales como introducir operadores `OR` y `UNION` (para describir consultas condicionales) y declaraciones `CONSTRUCT` (para inferir nuevos enlaces basados en reglas definidas), búsqueda de texto completo o llevar a cabo otras operaciones matemáticas más complejas que la del recuento. Para un informe más detallado de los comandos disponibles en SPARQL, véanse estos enlaces: + +* [Wikibooks SPARQL tutorial](https://en.wikibooks.org/wiki/XQuery/SPARQL_Tutorial) +* [Full W3C Overview of SPARQL](https://www.w3.org/TR/sparql11-overview/) + +Tanto la web de Europeana como la del Getty Vocabularies ofrecen ejemplos extensos y bastante complejos de consultas que pueden constituir buenos recursos para comprender cómo buscar en sus datos: + +* [Europeana SPARQL how-to](https://labs.europeana.eu/api/linked-open-data-SPARQL-endpoint) +* [Getty Vocabularies Example Queries](https://vocab.getty.edu/queries#Finding_Subjects) diff --git a/es/lecciones/reutilizando-colecciones-digitales-glam-labs.md b/es/lecciones/reutilizando-colecciones-digitales-glam-labs.md index 5d425c9366..782196458a 100644 --- a/es/lecciones/reutilizando-colecciones-digitales-glam-labs.md +++ b/es/lecciones/reutilizando-colecciones-digitales-glam-labs.md @@ -43,11 +43,11 @@ Tradicionalmente las instituciones de patrimonio cultural conocidas como [GLAM]( El avance de las tecnologías ha favorecido un nuevo contexto en el que las colecciones digitales pueden ser utilizadas en investigación por medio de diferentes métodos, como visión por computador o técnicas de aprendizaje automático. Actualmente, las instituciones GLAM promueven e incentivan la reutilización de sus colecciones digitales a través de programas de colaboración directa con investigadores pero también con empresas e instituciones académicas. Las instituciones de patrimonio cultural han comenzado a experimentar de forma creativa e innovadora con las colecciones digitales, que tradicionalmente han puesto a disposición del público, lo que ha favorecido la creación de nuevos espacios en el seno de las instituciones, conocidos como "Labs". -Uno de los primeros, líder en este ámbito, y que ha establecido las bases para el resto, es el de la [Biblioteca Británica](http://labs.bl.uk), financiado por la [Mellon Foundation](https://mellon.org/). Como resultado de dos encuentros de carácter internacional en la sede de la Biblioteca Británica y en la [Biblioteca Real de Dinamarca](https://www.kb.dk/en), en Copenhague, se creó la [Comunidad Internacional GLAM Labs](https://glamlabs.io) compuesta por numerosas instituciones, que se muestran en la Figura 1. +Uno de los primeros, líder en este ámbito, y que ha establecido las bases para el resto, es el de la [Biblioteca Británica](https://labs.bl.uk), financiado por la [Mellon Foundation](https://mellon.org/). Como resultado de dos encuentros de carácter internacional en la sede de la Biblioteca Británica y en la [Biblioteca Real de Dinamarca](https://www.kb.dk/en), en Copenhague, se creó la [Comunidad Internacional GLAM Labs](https://glamlabs.io) compuesta por numerosas instituciones, que se muestran en la Figura 1. {% include figure.html filename="reutilizando-colecciones-digitales-glam-labs1.png" caption="Mapa que representa las instituciones de la Comunidad Internacional GLAM Labs" %} -En septiembre de 2019, dieciséis personas pertenecientes a dicha comunidad se reunieron en Doha, Catar, para escribir, a partir de la metodología [Book Sprint](https://www.booksprints.net/book/book-sprint-open-a-glam-lab/), el libro [Open a GLAM Lab](https://glamlabs.io/books/open-a-glam-lab/) que actualmente ha sido traducido a diversos idiomas, entre ellos [español](http://rua.ua.es/dspace/handle/10045/110281) y [árabe](https://qspace.qu.edu.qa/handle/10576/13484). +En septiembre de 2019, dieciséis personas pertenecientes a dicha comunidad se reunieron en Doha, Catar, para escribir, a partir de la metodología [Book Sprint](https://www.booksprints.net/book/book-sprint-open-a-glam-lab/), el libro [Open a GLAM Lab](https://glamlabs.io/books/open-a-glam-lab/) que actualmente ha sido traducido a diversos idiomas, entre ellos [español](https://rua.ua.es/dspace/handle/10045/110281) y [árabe](https://qspace.qu.edu.qa/handle/10576/13484). Una colección digital publicada por una institución GLAM puede estar formada por cualquier tipo de contenido incluyendo metadatos, textos, imágenes, mapas, videos o audios. En este sentido, reutilizar una colección digital consiste en analizar el contenido para adquirir nuevo conocimiento. El análisis puede constar de fases tales como extracción, transformación y enriquecimiento. Como resultado podemos obtener una nueva colección descrita con otro vocabulario más expresivo y rico, una visualización que facilite el descubrimiento de conocimiento, o una agregación de diferentes colecciones digitales basadas en un tema específico. @@ -55,7 +55,7 @@ A la hora de reutilizar una colección digital existen diferentes aspectos que d Recientemente se publicó el estudio *[Collections as data](https://collectionsasdata.github.io/)*, que proporciona un nuevo enfoque para publicar las colecciones digitales que facilitan el procesamiento por parte de las computadoras. Por ejemplo, es posible utilizar un corpus de miles de textos para identificar personas o lugares de forma automática. Las computadoras permiten la aplicación de métodos de investigación en Humanidades Digitales como [minería de textos](https://es.wikipedia.org/wiki/Miner%C3%ADa_de_textos), [visualización de datos](https://es.wikipedia.org/wiki/Visualizaci%C3%B3n_de_datos) o el uso de [Sistemas de Información Geográfica (SIG)](https://es.wikipedia.org/wiki/Sistema_de_informaci%C3%B3n_geogr%C3%A1fica), como también [procesamiento de lenguaje natural](https://es.wikipedia.org/wiki/Procesamiento_de_lenguajes_naturales), [inteligencia artificial](https://es.wikipedia.org/wiki/Inteligencia_artificial) y [visión por computador](https://es.wikipedia.org/wiki/Visi%C3%B3n_artificial). -La combinación de las colecciones digitales proporcionadas por las instituciones GLAM, junto a código y narrativa, proporcionan el marco ideal para la reproducción de los resultados de investigación. En este sentido, los Jupyter Notebooks permiten integrar estos tres elementos y se han convertido en un recurso muy popular tanto en la comunidad investigadora como en la educativa. Numerosos proyectos se centran en la publicación de colecciones de notebooks, como por ejemplo [GLAM Workbench](https://glam-workbench.github.io/) o [GLAM Jupyter Notebooks](http://data.cervantesvirtual.com/blog/notebooks/). Los Labs favorecen un espacio para poner de manifiesto estas nuevas tendencias para mejorar y mantener la relevancia de las instituciones de patrimonio cultural. +La combinación de las colecciones digitales proporcionadas por las instituciones GLAM, junto a código y narrativa, proporcionan el marco ideal para la reproducción de los resultados de investigación. En este sentido, los Jupyter Notebooks permiten integrar estos tres elementos y se han convertido en un recurso muy popular tanto en la comunidad investigadora como en la educativa. Numerosos proyectos se centran en la publicación de colecciones de notebooks, como por ejemplo [GLAM Workbench](https://glam-workbench.github.io/) o [GLAM Jupyter Notebooks](https://data.cervantesvirtual.com/blog/notebooks/). Los Labs favorecen un espacio para poner de manifiesto estas nuevas tendencias para mejorar y mantener la relevancia de las instituciones de patrimonio cultural. En esta lección se incluyen varias opciones para localizar colecciones digitales publicadas por instituciones GLAM para su reutilización. A continuación, se introducen dos ejemplos implementados como Jupyter Notebooks que muestran de forma reproducible cómo reutilizar las colecciones digitales a través de diferentes técnicas que se encuentran disponibles en [Zenodo](https://zenodo.org/record/5340157)[^1]. El último apartado corresponde a las conclusiones. @@ -65,10 +65,10 @@ Actualmente existen numerosos sitios web donde es posible localizar colecciones | Institución | Colección | URL | | ------------- | ------------- | ------------- | -| Bibliotèque Nationale de France | BnF API et jeux de données | [http://api.bnf.fr/](http://api.bnf.fr/) | +| Bibliotèque Nationale de France | BnF API et jeux de données | [https://api.bnf.fr/](https://api.bnf.fr/) | | Bibliothèque Nationale du Luxembourg | BnL Open Data | [https://data.bnl.lu/](https://data.bnl.lu/) | | British Library | BL Labs | [temporalmente no disponible] | -| Biblioteca Virtual Miguel de Cervantes | BVMC Labs | [http://data.cervantesvirtual.com/blog/labs](http://data.cervantesvirtual.com/blog/labs) | +| Biblioteca Virtual Miguel de Cervantes | BVMC Labs | [https://data.cervantesvirtual.com/blog/labs](https://data.cervantesvirtual.com/blog/labs) | | Det Kgl. Bibliotek | KB Labs | [https://labs.kb.dk/](https://labs.kb.dk/) | | Europeana | Europeana IIIF APIs | [https://pro.europeana.eu/page/iiif](https://pro.europeana.eu/page/iiif) | | History Trust of South Australia | Learn section | [https://history.sa.gov.au/](https://history.sa.gov.au/) | @@ -79,7 +79,7 @@ Actualmente existen numerosos sitios web donde es posible localizar colecciones | Staatsbibliothek zu Berlin | SBB Labs | [https://lab.sbb.berlin/?lang=en](https://lab.sbb.berlin/?lang=en)| | State Library New South Wales | DX Lab | [https://dxlab.sl.nsw.gov.au](https://dxlab.sl.nsw.gov.au)| -Las instituciones GLAM publican colecciones digitales en diferentes formatos. Tradicionalmente han publicado diversos tipos de materiales como imágenes, textos y mapas. Recientemente, han aparecido nuevas formas de publicación que utilizan tecnologías basadas en la [Web Semántica](https://es.wikipedia.org/wiki/Web_sem%C3%A1ntica). Estas técnicas permiten el enriquecimiento con repositorios externos a partir de la creación de enlaces. [Wikidata](https://www.wikidata.org) se ha convertido en un repositorio muy popular en el ámbito de las instituciones GLAM y muchas de ellas ya disponen de propiedades específicas para enlazar sus recursos como autores y obras. Por ejemplo, la [Biblioteca Virtual Miguel de Cervantes](http://www.cervantesvirtual.com/) dispone de la propiedad [P2799](https://www.wikidata.org/wiki/Property:P2799) para enlazar autores desde su repositorio de datos abiertos hacia Wikidata. +Las instituciones GLAM publican colecciones digitales en diferentes formatos. Tradicionalmente han publicado diversos tipos de materiales como imágenes, textos y mapas. Recientemente, han aparecido nuevas formas de publicación que utilizan tecnologías basadas en la [Web Semántica](https://es.wikipedia.org/wiki/Web_sem%C3%A1ntica). Estas técnicas permiten el enriquecimiento con repositorios externos a partir de la creación de enlaces. [Wikidata](https://www.wikidata.org) se ha convertido en un repositorio muy popular en el ámbito de las instituciones GLAM y muchas de ellas ya disponen de propiedades específicas para enlazar sus recursos como autores y obras. Por ejemplo, la [Biblioteca Virtual Miguel de Cervantes](https://www.cervantesvirtual.com/) dispone de la propiedad [P2799](https://www.wikidata.org/wiki/Property:P2799) para enlazar autores desde su repositorio de datos abiertos hacia Wikidata. @@ -233,7 +233,7 @@ Este ejemplo se basa en la recuperación de localizaciones geográficas relacion En este sentido, este ejemplo pretende introducir los pasos necesarios para reutilizar una colección digital publicada, siguiendo los principios de Linked Open Data que facilitan el establecimiento de enlaces a repositorios externos. Los repositorios semánticos publicados por instituciones GLAM son una fuente de información de gran valor que se encuentran a disposición de los investigadores sin ningún tipo de restricción para su uso. Sin embargo, su reutilización no es sencilla ya que requiere conocimientos avanzados en tecnologías como [RDF](https://es.wikipedia.org/wiki/Resource_Description_Framework) (del inglés Resource Description Framework) o SPARQL para poder realizar las consultas. -Este ejemplo utiliza los metadatos del repositorio que indican localizaciones, como por ejemplo las propiedades `blt:publication` y `blt:projectedPublication` que indican lugares de publicación. Gracias a que los registros están enlazados a GeoNames, vamos a poder acceder a Wikidata para recuperar las coordenadas geográficas de las localizaciones y mostrar los beneficios de Linked Open Data. El vocabulario utilizado por BNB Linked Data es [Bibliographic Ontology (BIBO)](http://bibliontology.com/) que es un vocabulario sencillo que permite describir los metadatos de un repositorio bibliográfico. +Este ejemplo utiliza los metadatos del repositorio que indican localizaciones, como por ejemplo las propiedades `blt:publication` y `blt:projectedPublication` que indican lugares de publicación. Gracias a que los registros están enlazados a GeoNames, vamos a poder acceder a Wikidata para recuperar las coordenadas geográficas de las localizaciones y mostrar los beneficios de Linked Open Data. El vocabulario utilizado por BNB Linked Data es [Bibliographic Ontology (BIBO)](https://bibliontology.com/) que es un vocabulario sencillo que permite describir los metadatos de un repositorio bibliográfico. En primer lugar, importamos las librerías necesarias para procesar esta colección: [folium](https://pypi.org/project/folium/0.1.4/)[^4] para visualizar información geográfica en un mapa; csv y json para el procesamiento de los formatos de entrada y salida; request para la realización de peticiones HTTP; pandas para la gestión de datos tabulares con columnas de tipo heterogéneo y [matplotlib](https://matplotlib.org/)[^5] para la creación de gráficas. @@ -455,7 +455,7 @@ De forma similar a como se ha creado el mapa en el ejemplo de Miguel de Cervante En el primer ejemplo se han reutilizado dos colecciones digitales descritas con MARCXML. Aunque la mayoría del código es reutilizable para ambos casos, los campos utilizados para describir los metadatos en cada colección son diferentes y por lo tanto es necesario un análisis previo. -En el caso de la BNB, y teniendo en cuenta la forma de representar los distintos roles que se pueden dar en un repositorio bibliográfico, la elección del vocabulario a utilizar puede ser crucial a la hora de dotar de suficiente expresividad a los metadatos. En este sentido, vocabularios ricos en términos semánticos como [Resource Description and Access (RDA)](http://www.rdaregistry.info) proporcionan un listado de elementos para representar numerosos roles con el objetivo de relacionar las obras con los autores como por ejemplo director, ilustrador, impresor o narrador. Además, es relevante resaltar que tan solo alrededor de un 50% de las obras se encuentran enlazadas a GeoNames y que el mapa que obtenemos como resultado no incluye el total de ubicaciones del repositorio. +En el caso de la BNB, y teniendo en cuenta la forma de representar los distintos roles que se pueden dar en un repositorio bibliográfico, la elección del vocabulario a utilizar puede ser crucial a la hora de dotar de suficiente expresividad a los metadatos. En este sentido, vocabularios ricos en términos semánticos como [Resource Description and Access (RDA)](https://www.rdaregistry.info) proporcionan un listado de elementos para representar numerosos roles con el objetivo de relacionar las obras con los autores como por ejemplo director, ilustrador, impresor o narrador. Además, es relevante resaltar que tan solo alrededor de un 50% de las obras se encuentran enlazadas a GeoNames y que el mapa que obtenemos como resultado no incluye el total de ubicaciones del repositorio. ## Conclusiones diff --git a/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.md b/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.md index 3d22b79828..d64d57c4da 100644 --- a/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.md +++ b/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.md @@ -72,7 +72,7 @@ Hay que tener en cuenta que cuando alguien se refiere a un "sitio web de Jekyll" Dado que los sitios estáticos no son más que archivos de texto (sin una base de datos que complique las cosas), es posible *versionarlos* fácilmente, es decir, usar una herramienta para llevar un registro de las diferentes versiones del sitio a lo largo del tiempo rastreando los cambios en los archivos de texto que lo componen. El control de versiones es muy útil cuando deseamos combinar ambas versiones (por ejemplo, dos estudiantes escriben una publicación de blog juntos y deseamos combinar sus dos versiones) o cuando queremos comparar archivos para buscar diferencias entre ellos (por ejemplo, "¿Cómo se describía el proyecto en la página 'Acerca de' original?"). El control de versiones es muy útil cuando se trabaja en equipo (por ejemplo, permite combinar y rastrear el trabajo de diferentes personas), pero también es útil al crear o ejecutar un sitio web por nuestra propia cuenta. -Puedes leer más acerca de [Jekyll](http://jekyllrb.com/docs/home/) o [generadores de sitios estáticos](https://davidwalsh.name/introduction-static-site-generators) (en inglés). +Puedes leer más acerca de [Jekyll](https://jekyllrb.com/docs/home/) o [generadores de sitios estáticos](https://davidwalsh.name/introduction-static-site-generators) (en inglés). ### GitHub & GitHub Pages @@ -132,7 +132,7 @@ La cuenta de usuario de GitHub nos permite alojar nuestro sitio web (ponerlo a d 1\. Visita [GitHub.com](https://github.com/) y haz clic en el botón verde "Sign up" (Registrarse). -2\. En la página siguiente, ingresa el nombre de usuario deseado. El nombre de usuario es visible para otros usuarios, nos identifica en GitHub y también es parte de la URL de nuestro sitio. Por ejemplo, si el nombre de usuario de GitHub es *hdcaicyt*, la URL del sitio será http://hdcaicyt.github.io/. (Ten en cuenta que uno también puede comprar su propio nombre de dominio y usarlo para este sitio, pero eso no se tratará en este tutorial). Escribe una dirección de correo electrónico de uso habitual y añade una contraseña que contenga al menos un número y una letra minúscula. +2\. En la página siguiente, ingresa el nombre de usuario deseado. El nombre de usuario es visible para otros usuarios, nos identifica en GitHub y también es parte de la URL de nuestro sitio. Por ejemplo, si el nombre de usuario de GitHub es *hdcaicyt*, la URL del sitio será https://hdcaicyt.github.io/. (Ten en cuenta que uno también puede comprar su propio nombre de dominio y usarlo para este sitio, pero eso no se tratará en este tutorial). Escribe una dirección de correo electrónico de uso habitual y añade una contraseña que contenga al menos un número y una letra minúscula. 3\. En el recuadro "Verify your account", presiona el botón "Verify" (Verificar). Usa las flechas para poner la imagen en el sentido correcto. Finalmente, haz clic en "Select a plan" (Seleccionar un plan). @@ -164,7 +164,7 @@ La aplicación GitHub Desktop facilita la actualización del sitio web luego de ### Editor de texto -Es necesario descargar e instalar un editor de texto para realizar pequeñas personalizaciones al código de nuestro sitio Jekyll. Algunas buenas opciones gratuitas incluyen [jEdit](https://www.jedit.org), [Atom](https://atom.io/), [SublimeText](https://www.sublimetext.com/3), [Notepad ++](https://notepad-plus-plus.org/) para Windows o [BBedit](http://www.barebones.com/products/bbedit) para Mac. Los procesadores de texto, como Microsoft Word o WordPad, no son una buena opción porque es fácil olvidar cómo formatear y guardar el archivo; es posible agregar accidentalmente formatos y caracteres extra y/o invisibles que pueden generar problemas en el sitio. Por eso es mejor usar programas que puedan guardar lo que escribimos como texto plano (por ejemplo, HTML o Markdown). +Es necesario descargar e instalar un editor de texto para realizar pequeñas personalizaciones al código de nuestro sitio Jekyll. Algunas buenas opciones gratuitas incluyen [jEdit](https://www.jedit.org), [Atom](https://atom.io/), [SublimeText](https://www.sublimetext.com/3), [Notepad ++](https://notepad-plus-plus.org/) para Windows o [BBedit](https://www.barebones.com/products/bbedit) para Mac. Los procesadores de texto, como Microsoft Word o WordPad, no son una buena opción porque es fácil olvidar cómo formatear y guardar el archivo; es posible agregar accidentalmente formatos y caracteres extra y/o invisibles que pueden generar problemas en el sitio. Por eso es mejor usar programas que puedan guardar lo que escribimos como texto plano (por ejemplo, HTML o Markdown). *Opcional:* Consulta la sección ["Creación en Markdown"](#section5-2) más abajo, para más información sobre un programa de edición específico en Markdown, que también puedes instalar cuando ya estemos en la etapa de crear páginas web y/o publicaciones (posts) de blog. @@ -214,7 +214,7 @@ Abre una ventana de línea de comandos (*Aplicaciones > Utilidades > Terminal*) ### Herramientas de línea de comandos -Primero vamos a instalar las "herramientas de línea de comandos" de Mac para poder usar [Homebrew](http://brew.sh/) (que instalaremos a continuación). Homebrew permite descargar e instalar desde la línea de comandos software de código abierto (es un "administrador de paquetes"), lo que facilitará la instalación de Ruby (el lenguaje en el que se basa Jekyll). +Primero vamos a instalar las "herramientas de línea de comandos" de Mac para poder usar [Homebrew](https://brew.sh/) (que instalaremos a continuación). Homebrew permite descargar e instalar desde la línea de comandos software de código abierto (es un "administrador de paquetes"), lo que facilitará la instalación de Ruby (el lenguaje en el que se basa Jekyll). En el Terminal, pega el siguiente código y presiona Enter: @@ -235,7 +235,7 @@ Una vez que termine la instalación, va a aparecer un mensaje de instalación ex ### Homebrew -Al terminar la instalación de las herramientas de la línea de comandos, regresa a la ventana de la línea de comandos y copia el siguiente texto para instalar [Homebrew](http://brew.sh/): +Al terminar la instalación de las herramientas de la línea de comandos, regresa a la ventana de la línea de comandos y copia el siguiente texto para instalar [Homebrew](https://brew.sh/): ``` /usr/bin/ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)" @@ -340,7 +340,7 @@ Haz clic derecho en la carpeta "GitHub" y elige "Copiar GitHub". La ruta de la c Es necesario esperar a que vuelva a aparecer el prompt para continuar con el siguiente paso. -4\. La URL pública de tu sitio tendrá la siguiente forma: [http://amandavisconti.github.io/JekyllDemo/](http://amandavisconti.github.io/JekyllDemo/) (*amandavisconti* es el usuario de GitHub de la autora y *JekyllDemo* el nombre del sitio que ingresamos en este paso (*es posible pagar y usar tu propia [URL personalizada](#section7-2), pero no lo cubriremos en este tutorial*). **Los sitios en mayúsculas y minúsculas *no* dirigen al mismo sitio web**, así que a diferencia del ejemplo **JekyllDemo** es recomendable elegir un nombre todo en minúsculas para asegurarse de que la gente lo escriba correctamente. +4\. La URL pública de tu sitio tendrá la siguiente forma: [https://amandavisconti.github.io/JekyllDemo/](https://amandavisconti.github.io/JekyllDemo/) (*amandavisconti* es el usuario de GitHub de la autora y *JekyllDemo* el nombre del sitio que ingresamos en este paso (*es posible pagar y usar tu propia [URL personalizada](#section7-2), pero no lo cubriremos en este tutorial*). **Los sitios en mayúsculas y minúsculas *no* dirigen al mismo sitio web**, así que a diferencia del ejemplo **JekyllDemo** es recomendable elegir un nombre todo en minúsculas para asegurarse de que la gente lo escriba correctamente. En la línea de comandos, escribe lo siguiente (reemplaza *JekyllDemo* con el nombre que desees para tu sitio): @@ -428,7 +428,7 @@ Ya tenemos un sitio web básico privado, accesible únicamente en nuestra comput - **email**: tu dirección de email. - **description**: la descripción del sitio web que será usada por los motores de búsqueda y que será utilizada por RSS. - **baseurl**: completa entre las comillas con una barra oblicua **/** seguida del nombre de la carpeta de tu sitio web (por ej., "/JekyllDemo") para que el sitio tome la URL correcta. Asegúrate de que tu carpeta está en el mismo repositorio de GitHub con el mismo nombre y termina con la barra oblicua ("/"). Esto se requiere para publicarlo en GitHub Pages. - - **url**: reemplaza "http://yourdomain.com" por "localhost:4000" para que el navegador tome la versión local de tu sitio en la URL correcta. + - **url**: reemplaza "https://yourdomain.com" por "localhost:4000" para que el navegador tome la versión local de tu sitio en la URL correcta. - **twitter_username**: tu nombre de usuario de Twitter (no incluir @). - **github_username**: tu nombre de usuario de GitHub. @@ -472,11 +472,11 @@ Esta sección describirá cómo crear páginas o entradas de blog en tu sitio we Markdown es un lenguaje de marcado para dar formato a tus escritos para que puedan ser leídos en la web: es un conjunto de símbolos, fáciles de recordar, que muestran dónde debe añadirse el formato del texto (por ejemplo, un # delante del texto significa que se le da formato como encabezado, mientras que un * significa que tendrá formato como elemento de lista con viñetas). Para Jekyll en particular, Markdown permite escribir páginas web y entradas de blog de una manera cómoda para los autores (por ejemplo, no es necesario buscar/añadir etiquetas HTML mientras se intenta escribir un ensayo), y que el escrito aparezca con un buen formato en la web (es decir, convertido de texto a HTML). -En esta lección no cubriremos Markdown; si no estás familiarizado con él, puedes crear entradas y páginas sin formato (es decir, sin negrita / cursiva, encabezados, listas enumeradas o viñetas). Pero es sencillo aprender a agregarlos: aquí hay una guía de [referencias](http://kramdown.gettalong.org/quickref.html) de markdown en inglés, también puedes consultar esta guía en [español](https://docs.github.com/es/get-started/writing-on-github/getting-started-with-writing-and-formatting-on-github/basic-writing-and-formatting-syntax), así como la lección en [Programming Historian de Sarah Simpkin sobre el cómo y porque escribir con Markdown](/es/lecciones/introduccion-a-markdown). Consulta estos enlaces si quieres dar formato al texto (cursiva, negrita, encabezados, listas enumeradas o viñetas), añadir hipervínculos, incrustar imágenes u otros archivos. +En esta lección no cubriremos Markdown; si no estás familiarizado con él, puedes crear entradas y páginas sin formato (es decir, sin negrita / cursiva, encabezados, listas enumeradas o viñetas). Pero es sencillo aprender a agregarlos: aquí hay una guía de [referencias](https://kramdown.gettalong.org/quickref.html) de markdown en inglés, también puedes consultar esta guía en [español](https://docs.github.com/es/get-started/writing-on-github/getting-started-with-writing-and-formatting-on-github/basic-writing-and-formatting-syntax), así como la lección en [Programming Historian de Sarah Simpkin sobre el cómo y porque escribir con Markdown](/es/lecciones/introduccion-a-markdown). Consulta estos enlaces si quieres dar formato al texto (cursiva, negrita, encabezados, listas enumeradas o viñetas), añadir hipervínculos, incrustar imágenes u otros archivos. -Asegúrate que la guía de referencias de Markdown que consultes sea similar a "[kramdown](http://kramdown.gettalong.org/quickref.html)", porque es lo que admite GitHub Pages (donde alojaremos nuestro sitio web). (Hay [varios "tipos" de Markdown](https://github.com/jgm/CommonMark/wiki/Markdown-Flavors) con sutiles diferencias en lo que respecta a símbolos, pero en su mayoría los que se usan más frecuentemente, como los que crean el formato de encabezados, son iguales. Por lo tanto, puedes utilizar una hoja de referencia Markdown que no especifique que se trate de kramdown, pero si recibes errores en tu sitio web usando símbolos que no están incluidos en kramdown, este podría ser el motivo). +Asegúrate que la guía de referencias de Markdown que consultes sea similar a "[kramdown](https://kramdown.gettalong.org/quickref.html)", porque es lo que admite GitHub Pages (donde alojaremos nuestro sitio web). (Hay [varios "tipos" de Markdown](https://github.com/jgm/CommonMark/wiki/Markdown-Flavors) con sutiles diferencias en lo que respecta a símbolos, pero en su mayoría los que se usan más frecuentemente, como los que crean el formato de encabezados, son iguales. Por lo tanto, puedes utilizar una hoja de referencia Markdown que no especifique que se trate de kramdown, pero si recibes errores en tu sitio web usando símbolos que no están incluidos en kramdown, este podría ser el motivo). -Si te interesa un editor de Markdown, puedes utilizar uno como [Typora](http://www.typora.io/) (OS X y Windows; de descarga gratuita), que te permitirá utilizar atajos de teclado (por ejemplo, resaltar texto y presionar cmd-B o Ctrl-B para ponerlo en negrita) y/o hacer que se muestre tal y cómo se verá en la web (ver los encabezados con el estilo de los encabezados, en lugar del texto normal con un # delante de ellos). +Si te interesa un editor de Markdown, puedes utilizar uno como [Typora](https://www.typora.io/) (OS X y Windows; de descarga gratuita), que te permitirá utilizar atajos de teclado (por ejemplo, resaltar texto y presionar cmd-B o Ctrl-B para ponerlo en negrita) y/o hacer que se muestre tal y cómo se verá en la web (ver los encabezados con el estilo de los encabezados, en lugar del texto normal con un # delante de ellos). ### Creación de páginas @@ -497,7 +497,7 @@ Si te interesa un editor de Markdown, puedes utilizar uno como [Typora](http://w {% include figure.html filename="building-static-sites-with-jekyll-github-pages-22.png" caption="La nueva página en nuestro sitio aparece en el menú" %} -Como referencia, puedes consultar [un ejemplo de página](http://amandavisconti.github.io/JekyllDemo/resume/) en mi sitio de demostración, o ver [el archivo que está detrás de esa página](https://raw.githubusercontent.com/amandavisconti/JekyllDemo/gh-pages/resume.md). +Como referencia, puedes consultar [un ejemplo de página](https://amandavisconti.github.io/JekyllDemo/resume/) en mi sitio de demostración, o ver [el archivo que está detrás de esa página](https://raw.githubusercontent.com/amandavisconti/JekyllDemo/gh-pages/resume.md). ### Creación de entradas @@ -531,7 +531,7 @@ Ten en cuenta que **la URL de la publicación** es la URL de tu sitio web local **Para crear nuevos posts**, duplica un archivo existente. Recuerda cambiar el texto preliminar, el contenido dentro de la entrada, así como el nombre del archivo (fecha y título). -Como referencia, puedes consultar [el siguiente ejemplo de entrada](https://amandavisconti.github.io/JekyllDemo/2016/11/12/a-post-about-my-research.html) en mi sitio de demostración, o acceder al [código que ejecuta esa entrada](http://raw.githubusercontent.com/amandavisconti/JekyllDemo/gh-pages/_posts/2016-02-29-a-post-about-my-research.markdown). +Como referencia, puedes consultar [el siguiente ejemplo de entrada](https://amandavisconti.github.io/JekyllDemo/2016/11/12/a-post-about-my-research.html) en mi sitio de demostración, o acceder al [código que ejecuta esa entrada](https://raw.githubusercontent.com/amandavisconti/JekyllDemo/gh-pages/_posts/2016-02-29-a-post-about-my-research.markdown). ## "Hosting" en GitHub Pages @@ -610,18 +610,18 @@ Puedes personalizar el tema de tu sitio realizando cambios en los archivos que s - Tema ["Ed" para ediciones digitales mínimas](https://github.com/minicomp/ed/), de Alex Gil (gratis) - Tema ["Digital Edition"](https://github.com/emory-libraries-ecds/digitaledition-jekylltheme), de Rebecca Sutton Koese (gratis) -- El directorio de [Jekyll Themes](http://jekyllthemes.org/) (gratis) -- [JekyllThemes.io](http://jekyllthemes.io/) (gratis y pago) +- El directorio de [Jekyll Themes](https://jekyllthemes.org/) (gratis) +- [JekyllThemes.io](https://jekyllthemes.io/) (gratis y pago) ### Funcionalidad -- Los [plugins de Jekyll](http://jekyllrb.com/docs/plugins/) te permiten añadir pequeños segmentos de código que permiten sumar funcionalidades a tu sitio, tales como [realizar búsquedas de texto](https://github.com/PascalW/jekyll_indextank), [permitir el uso de emojis](https://github.com/yihangho/emoji-for-jekyll), o [crear nubes de palabras](https://gist.github.com/ilkka/710577). +- Los [plugins de Jekyll](https://jekyllrb.com/docs/plugins/) te permiten añadir pequeños segmentos de código que permiten sumar funcionalidades a tu sitio, tales como [realizar búsquedas de texto](https://github.com/PascalW/jekyll_indextank), [permitir el uso de emojis](https://github.com/yihangho/emoji-for-jekyll), o [crear nubes de palabras](https://gist.github.com/ilkka/710577). - Si deseas alojar tu sitio en GitHub Pages, como lo hicimos en esta lección, solo podrás utilizar los plugins de Jekyll que ya están incluidos en las _gems_ de GitHub Pages que instalamos (aquí tienes una [lista completa de lo que hemos instalado](https://pages.github.com/versions/), cuando añadimos la _gem_ de GitHub Pages a nuestro Gemfile). -- Si decides alojar tu sitio de Jekyll en otro servidor que no sea GitHub Pages, puedes utilizar cualquier plugin de Jekyll (las instrucciones para alojar tu sitio varían entre diferentes proveedores de hosting web y no las desarrollaremos en esta lección, pero [aquí](http://jekyllrb.com/docs/plugins/) tienes una página que explica cómo instalar plugins, una vez que poseas tu sitio con hosting propio). Puedes realizar una búsqueda utilizando “Jekyll plugin” y añadir la funcionalidad que necesites para explorar si hay una herramienta apropiada disponible, o revisar la [documentación sobre plugins](http://jekyllrb.com/docs/plugins/) en el sitio oficial de Jekyll. +- Si decides alojar tu sitio de Jekyll en otro servidor que no sea GitHub Pages, puedes utilizar cualquier plugin de Jekyll (las instrucciones para alojar tu sitio varían entre diferentes proveedores de hosting web y no las desarrollaremos en esta lección, pero [aquí](https://jekyllrb.com/docs/plugins/) tienes una página que explica cómo instalar plugins, una vez que poseas tu sitio con hosting propio). Puedes realizar una búsqueda utilizando “Jekyll plugin” y añadir la funcionalidad que necesites para explorar si hay una herramienta apropiada disponible, o revisar la [documentación sobre plugins](https://jekyllrb.com/docs/plugins/) en el sitio oficial de Jekyll. -- También puedes mantener GitHub Pages como hosting gratuito para tu sitio, pero darle un **nombre de dominio personalizado** (los dominios pueden ser adquiridos por un costo razonable -que suele rondar los 10 dólares anuales- a través de un registrador de dominios como [NearlyFreeSpeech.net](https://www.nearlyfreespeech.net/services/domains)). Por ejemplo, el blog de la autora de este tutorial, [LiteratureGeek.com](http://literaturegeek.com/), fue hecho con Jekyll y está alojado en GitHub Pages, al igual que el sitio que creaste en esta lección, pero utiliza un dominio personalizado que la autora compró y configuró para que condujera a su sitio web. Las instrucciones para establecer un dominio personalizado pueden ser encontradas [aquí](https://help.github.com/articles/using-a-custom-domain-with-github-pages/). +- También puedes mantener GitHub Pages como hosting gratuito para tu sitio, pero darle un **nombre de dominio personalizado** (los dominios pueden ser adquiridos por un costo razonable -que suele rondar los 10 dólares anuales- a través de un registrador de dominios como [NearlyFreeSpeech.net](https://www.nearlyfreespeech.net/services/domains)). Por ejemplo, el blog de la autora de este tutorial, [LiteratureGeek.com](https://literaturegeek.com/), fue hecho con Jekyll y está alojado en GitHub Pages, al igual que el sitio que creaste en esta lección, pero utiliza un dominio personalizado que la autora compró y configuró para que condujera a su sitio web. Las instrucciones para establecer un dominio personalizado pueden ser encontradas [aquí](https://help.github.com/articles/using-a-custom-domain-with-github-pages/).
    Si configuras un dominio personalizado para tu sitio web alojado en GitHub Pages, asegúrate de leer y seguir los pasos de la documentación de GitHub para verificar tu dominio y evitar el uso de registros DNS comodín, para prevenir así un fallo de seguridad conocido. @@ -649,7 +649,7 @@ Si configuras un dominio personalizado para tu sitio web alojado en GitHub Pages ### Ayuda -Si encuentras algún problema, [Jekyll tiene una página para problemas, conocidos como troubleshooting](https://jekyllrb.com/docs/troubleshooting/), que te puede ayudar. Si estás trabajando en la línea de comandos y recibes un mensaje de error, no te olvides de buscar más acerca del error en la web. Más allá de los motores de búsqueda tradicionales, [el sitio StackExchange](http://stackexchange.com/) es un buen lugar para encontrar preguntas y respuestas de gente que tuvo este tipo de problemas. +Si encuentras algún problema, [Jekyll tiene una página para problemas, conocidos como troubleshooting](https://jekyllrb.com/docs/troubleshooting/), que te puede ayudar. Si estás trabajando en la línea de comandos y recibes un mensaje de error, no te olvides de buscar más acerca del error en la web. Más allá de los motores de búsqueda tradicionales, [el sitio StackExchange](https://stackexchange.com/) es un buen lugar para encontrar preguntas y respuestas de gente que tuvo este tipo de problemas. ### Creditos @@ -660,7 +660,7 @@ Gracias a Fred Gibbs, editor del *Programming Historian* por editar, debatir y r Puedes visitar estos sitios para más documentación, inspiración y para aprender más sobre Jekyll: * [Documentación oficial de Jekyll](https://jekyllrb.com/docs/home/) -* Jekyll tiene links a recursos "no oficiales" sobre su funcionamiento en Windows: [https://jekyll-windows.juthilo.com/](http://jekyll-windows.juthilo.com/) y [https://davidburela.wordpress.com/2015/11/28/easily-install-jekyll-on-windows-with-3-command-prompt-entries-and-chocolatey/](https://davidburela.wordpress.com/2015/11/28/easily-install-jekyll-on-windows-with-3-command-prompt-entries-and-chocolatey/) +* Jekyll tiene links a recursos "no oficiales" sobre su funcionamiento en Windows: [https://jekyll-windows.juthilo.com/](https://jekyll-windows.juthilo.com/) y [https://davidburela.wordpress.com/2015/11/28/easily-install-jekyll-on-windows-with-3-command-prompt-entries-and-chocolatey/](https://davidburela.wordpress.com/2015/11/28/easily-install-jekyll-on-windows-with-3-command-prompt-entries-and-chocolatey/) * [https://help.github.com/articles/using-jekyll-with-pages/](https://help.github.com/articles/using-jekyll-with-pages/) * Amanda Visconti, ["Introducing Static Sites for Digital Humanities Projects (why & what are Jekyll, GitHub, etc.?)"](https://literaturegeek.com/2015/12/08/WhyJekyllGitHub) * Alex Gil, ["How (and Why) to Generate a Static Website Using Jekyll, Part 1"](https://chronicle.com/blogs/profhacker/jekyll1/60913) diff --git a/es/lecciones/topic-modeling-y-mallet.md b/es/lecciones/topic-modeling-y-mallet.md index 8d2ca90717..de925d9fd4 100644 --- a/es/lecciones/topic-modeling-y-mallet.md +++ b/es/lecciones/topic-modeling-y-mallet.md @@ -44,16 +44,16 @@ En esta lección, primero aprenderás qué es *topic modeling*[^1] y por qué po Aplicaremos el modelador de tópicos a algunos archivos de ejemplo y veremos los tipos de *output* que genera MALLET. Esto nos dará una buena idea de cómo se puede aplicar *topic modeling* a un corpus de textos para identificar tópicos o temas que se encuentran en los documentos, sin tener que leerlos individualmente. -Por favor, remítete a la [lista de discusión](http://mallet.cs.umass.edu/mailinglist.php) de los usuarios de MALLET para aprender más sobre todo lo que se pueda hacer con este programa. +Por favor, remítete a la [lista de discusión](https://mallet.cs.umass.edu/mailinglist.php) de los usuarios de MALLET para aprender más sobre todo lo que se pueda hacer con este programa. (Queremos agradecer a Robert Nelson y Elijah Meeks por consejos y sugerencias sobre cómo empezar a utilizar MALLET por primera vez y por sus ejemplos de lo que se puede hacer con esta herramienta.) ¿Qué es *Topic Modeling* y para quién es útil? ---------------------------------------------- -Una herramienta de *topic modeling* toma un texto individual (o un corpus) y busca patrones en el uso de las palabras; es un intento de encontrar significado semántico en el vocabulario de ese texto (o corpus). Antes de empezar con *topic modeling* deberías preguntarte si es o no útil para tu proyecto. Para empezar a entender en qué circunstancias una técnica como esta es la más efectiva, te recomendamos *[Distant Reading](http://www.cs.umbc.edu/~hillol/NGDM07/abstracts/talks/MKirschenbaum.pdf)* de Matthew Kirschenbaum (una charla dada en el simposio de la Fundación Nacional de Ciencias de los Estados Unidos en 2009, sobre la próxima generación de extracción de datos y descubrimiento cibernético para la inovación) y *[Reading Machines](http://www.worldcat.org/title/reading-machines-toward-an-algorithmic-criticism/oclc/708761605&referer=brief_results)* de Stephen Ramsay. +Una herramienta de *topic modeling* toma un texto individual (o un corpus) y busca patrones en el uso de las palabras; es un intento de encontrar significado semántico en el vocabulario de ese texto (o corpus). Antes de empezar con *topic modeling* deberías preguntarte si es o no útil para tu proyecto. Para empezar a entender en qué circunstancias una técnica como esta es la más efectiva, te recomendamos *[Distant Reading](https://www.cs.umbc.edu/~hillol/NGDM07/abstracts/talks/MKirschenbaum.pdf)* de Matthew Kirschenbaum (una charla dada en el simposio de la Fundación Nacional de Ciencias de los Estados Unidos en 2009, sobre la próxima generación de extracción de datos y descubrimiento cibernético para la inovación) y *[Reading Machines](https://www.worldcat.org/title/reading-machines-toward-an-algorithmic-criticism/oclc/708761605&referer=brief_results)* de Stephen Ramsay. -Como toda herramienta, el hecho de que se pueda utilizar no significa que deberías hacerlo. Si trabajas con pocos documentos (o incluso con un solo documento) puede ser que cálculos de frecuencia sean suficientes, en cuyo caso algo como las [herramientas Voyant](http://voyant-tools.org/) quizá serían convenientes. Si, en cambio, tienes cientos de documentos procedentes de un archivo y quieres comprender qué contiene el archivo, pero sin necesariamente leer cada documento, entonces *topic modeling* podría ser una buena opción. +Como toda herramienta, el hecho de que se pueda utilizar no significa que deberías hacerlo. Si trabajas con pocos documentos (o incluso con un solo documento) puede ser que cálculos de frecuencia sean suficientes, en cuyo caso algo como las [herramientas Voyant](https://voyant-tools.org/) quizá serían convenientes. Si, en cambio, tienes cientos de documentos procedentes de un archivo y quieres comprender qué contiene el archivo, pero sin necesariamente leer cada documento, entonces *topic modeling* podría ser una buena opción. Los modelos de tópicos son una familia de programas informáticos que extraen *tópicos* de *textos*. Para la computadora, un *tópico* es una lista de palabras que se presenta de manera que sea estadísticamente significativa. Un *texto* puede ser un email, una entrada de blog, un capítulo de libro, un artículo periodístico, una entrada de diario – es decir, todo tipo de texto no estructurado. No estructurado quiere decir que no haya anotaciones legibles por la computadora que indiquen el significado semántico de las palabras del texto. @@ -69,20 +69,20 @@ Hay muchos programas diferentes para *topic modeling*; esta lección utiliza uno Examinando las palabras clave podemos ver que el político que dio los discursos se refirió a la economía, los empleos, el Medio Oriente, las próximas elecciones, etc. -Como advierte Scott Weingart, quienes utilizan *topic modeling* sin entenderlo completamente enfrentan muchos [peligros](https://web.archive.org/web/20240602215348/https://www.scottbot.net/HIAL/index.html@p=16713.html). Por ejemplo, podría interesarnos el uso de las palabras como un indicador para la ubicación en un espectro político. *Topic modeling* sin duda podría ayudar con eso, pero hay que recordar que el indicador no es en sí lo que queremos comprender - como lo muestra Andrew Gelman en su [estudio de maqueta sobre zombis, utilizando Google Trends](http://arxiv.org/abs/1003.6087/). Ted Underwood y Lisa Rhody (véase Lecturas adicionales) sostienen que para nosotros como historiadores sería mejor considerar estas categorías como discursos; sin embargo, para nuestros objetivos, continuaremos utilizando la palabra: tópico. +Como advierte Scott Weingart, quienes utilizan *topic modeling* sin entenderlo completamente enfrentan muchos [peligros](https://web.archive.org/web/20240602215348/https://www.scottbot.net/HIAL/index.html@p=16713.html). Por ejemplo, podría interesarnos el uso de las palabras como un indicador para la ubicación en un espectro político. *Topic modeling* sin duda podría ayudar con eso, pero hay que recordar que el indicador no es en sí lo que queremos comprender - como lo muestra Andrew Gelman en su [estudio de maqueta sobre zombis, utilizando Google Trends](https://arxiv.org/abs/1003.6087/). Ted Underwood y Lisa Rhody (véase Lecturas adicionales) sostienen que para nosotros como historiadores sería mejor considerar estas categorías como discursos; sin embargo, para nuestros objetivos, continuaremos utilizando la palabra: tópico. Nota: En la bibliografía sobre *topic modeling*, a veces encontrarás el término "*LDA*". Muchas veces, LDA y *topic modeling* se usan como sinónimos, pero la técnica LDA es, en realidad, un caso especial de *topic modeling* desarrollado por [David Blei y amigos](https://es.wikipedia.org/wiki/Latent_Dirichlet_Allocation) en 2002. No fue la primera técnica considerada como *topic modeling* pero es la más popular. Las innumerables variaciones de *topic modeling* han resultado en una sopa de letras de técnicas y programas para implementarlas, lo cual puede ser desconcertante o agobiante para los no iniciados en la materia y por esto no nos detendremos en ellos por ahora. Todos los algoritmos trabajan casi del mismo modo y MALLET en particular utiliza LDA. ### Ejemplos de modelos de tópicos usados por historiadores: -- Rob Nelson, *[Mining the Dispatch](http://dsl.richmond.edu/dispatch/)* +- Rob Nelson, *[Mining the Dispatch](https://dsl.richmond.edu/dispatch/)* - Cameron Blevins, "[Topic Modeling Martha Ballard's Diary](https://perma.cc/39CG-MNLH)" *Historying*, April 1, 2010. - David J Newman y Sharon Block, "Probabilistic topic decomposition of an eighteenth century American newspaper," *Journal of the American Society for Information Science and Technology* vol. 57, no. 6 (April 1, 2006): 753-767.[^2] Instalar MALLET --------------- -Hay muchas herramientas que se podrían utilizar para crear modelos de tópicos, pero al momento de escribir estas líneas (en el verano de 2007) la herramienta más sencilla es MALLET.[^3] [MALLET](http://mallet.cs.umass.edu/index.php) utiliza una implementación del [*Muestreo de Gibbs*](https://es.wikipedia.org/wiki/Muestreo_de_Gibbs), una técnica estadística destinada a construir rápidamente una distribución de muestras, para luego crear los modelos de tópicos correspondientes. Para utilizar MALLET es necesario trabajar en la línea de comandos – hablaremos más de esto en un instante. Lo bueno es que normalmente los mismos comandos se usan repetidamente. +Hay muchas herramientas que se podrían utilizar para crear modelos de tópicos, pero al momento de escribir estas líneas (en el verano de 2007) la herramienta más sencilla es MALLET.[^3] [MALLET](https://mallet.cs.umass.edu/index.php) utiliza una implementación del [*Muestreo de Gibbs*](https://es.wikipedia.org/wiki/Muestreo_de_Gibbs), una técnica estadística destinada a construir rápidamente una distribución de muestras, para luego crear los modelos de tópicos correspondientes. Para utilizar MALLET es necesario trabajar en la línea de comandos – hablaremos más de esto en un instante. Lo bueno es que normalmente los mismos comandos se usan repetidamente. Las instrucciones de instalación son diferentes para Windows y Mac. Sigue las instrucciones apropiadadas para ti: @@ -91,8 +91,8 @@ Las instrucciones de instalación son diferentes para Windows y Mac. Sigue las i ### Instrucciones para Windows -1. Ve a la página del proyecto [MALLET](http://mallet.cs.umass.edu/index.php). Puedes [descargar MALLET aquí](http://mallet.cs.umass.edu/download.php). -2. También necesitarás el [Kit de desarrollo de Java (JDK)](http://www.oracle.com/technetwork/java/javase/downloads/index.html) - esto es, no el Java normal que se encuentra en cada computadora sino el que permite programar cosas. Instala este en tu computadora. +1. Ve a la página del proyecto [MALLET](https://mallet.cs.umass.edu/index.php). Puedes [descargar MALLET aquí](https://mallet.cs.umass.edu/download.php). +2. También necesitarás el [Kit de desarrollo de Java (JDK)](https://www.oracle.com/technetwork/java/javase/downloads/index.html) - esto es, no el Java normal que se encuentra en cada computadora sino el que permite programar cosas. Instala este en tu computadora. 3. Descomprime MALLET en tu directorio `C:`. Esto es importante: no puede ser en ningún otro lugar. Tendrás un directorio llamado `C:\mallet-2.0.8` o parecido. Para simplificar, cambia el nombre simplemente a `mallet`. 4. MALLET utiliza una *variable de entorno* para indicar a la computadora donde encontrar todos los componentes necesarios para sus procesos en el momento de ejecutarse. Es como un atajo para el programa. Un(a) programador(a) no puede saber exactamente donde cada usuario instala un programa. Por eso, él o ella crea una variable en el código que representa el lugar de instalación en cada momento. Por medio de la variable de entorno indicamos a la computadora donde se encuentra ese lugar. Si mueves el programa a otro lugar tendrás que cambiar esa variable. @@ -130,8 +130,8 @@ Ahora estás preparado para avanzar a la próxima sección. Muchas de las instrucciones para la instalación en OS X se parecen a las instrucciones para Windows, con pocas excepciones. En realidad, es un poco más fácil ejecutar comandos de MALLET en Mac. -1. Descarga e [instala MALLET](http://mallet.cs.umass.edu/download.php). -2. Descarga el [Kit de desarrollo de Java (JDK)](http://www.oracle.com/technetwork/java/javase/downloads/index.html). +1. Descarga e [instala MALLET](https://mallet.cs.umass.edu/download.php). +2. Descarga el [Kit de desarrollo de Java (JDK)](https://www.oracle.com/technetwork/java/javase/downloads/index.html). Descomprime MALLET en un directorio en tu sistema (para seguir esta lección con facilidad, escoge tu directorio `/User/`, aunque otro lugar funcionará igualmente). Cuando esté descomprimido, abre tu ventana Terminal (dentro del directorio `Aplicaciones` en tu Finder). Usando la Terminal, navega al directorio donde descomprimiste MALLET (será `mallet-2.0.8` o `mallet` si cambiaste el nombre de la carpeta para simplificarlo. Si descomprimiste MALLET en tu directorio `/User/` como se sugiere en esta lección, puedes navegar al directorio correcto tecleando `cd mallet-2.0.8` o bien `cd mallet`). `cd` es la abreviatura para "cambiar directorio" cuando se trabaja en la Terminal. @@ -297,32 +297,32 @@ Puede resultar difícil leer estos datos. Los tópicos comienzan en la tercera c A partir de esto, se puede ver que en el documento número 0 (es decir, el primer documento cargado en MALLET), `a-aprender-en-las-haciendas.txt`, el tópico 0 tiene un porcentaje de 0.33% (columna C). Si buscamos el valor más alto en esta fila, podemos ver que el tópico 3 es el más importante en este documento, con un porcentaje de 69.24%. Dada la naturaleza de MALLET, tus propios tópicos pueden tener valores diferentes. -Si tienes un corpus de archivos de texto que están organizados en orden cronológico (por ejemplo que `1.txt` sea anterior a `2.txt`), podrías generar un gráfico en tu programa de hoja de cálculo y empezar a ver cambios con el tiempo, tal como lo hizo Robert Nelson en [Mining the Dispatch](http://dsl.richmond.edu/dispatch/). +Si tienes un corpus de archivos de texto que están organizados en orden cronológico (por ejemplo que `1.txt` sea anterior a `2.txt`), podrías generar un gráfico en tu programa de hoja de cálculo y empezar a ver cambios con el tiempo, tal como lo hizo Robert Nelson en [Mining the Dispatch](https://dsl.richmond.edu/dispatch/). ¿Cómo puedes saber cuál es la cantidad adecuada de tópicos? ¿Hay una cantidad *natural* de tópicos? Hemos descubierto que hay que ejecutar `train-topics` varias veces con distintas cantidades de tópicos para ver cómo la distribución de los tópicos en los documentos cambia. Si encontramos que la mayoría de los textos están dominados por muy pocos tópicos, lo interpretamos como una señal de necesitar aumentar la cantidad de tópicos; las preferencias fueron demasiado amplias. Hay maneras de buscar la mejor configuración automáticamente, por ejemplo mediante el comando `hlda` de MALLET, pero para los lectores de esta lección probablemente es más rápido realizar algunas iteraciones (para más información consulta Griffiths, T. L., & Steyvers, M. (2004). *Finding scientific topics.* Proceedings of the National Academy of Science, 101, 5228-5235). ### Analizar tus propios textos con MALLET -La carpeta `sample data` en el directorio de MALLET (`C:\mallet\sample-data`) te puede servir como guía para saber cómo organizar tus textos. Pon todo lo que deseas en una sola carpeta, por ejemplo `C:\mis-datos`. Tus archivos deben contener texto llano y estar en el formato `.txt` (puedes crearlos en un procesador de textos como Notepad, [Sublime Text](https://www.sublimetext.com/) o [Atom](https://atom.io/), por ejemplo, y guardarlos como `Texto (*.txt)` o `Texto sin formato`). Tienes que tomar algunas decisiones. ¿Quieres explorar los tópicos a nivel de párrafos? Entonces cada archivo `.txt` debería contener solo un párrafo. En los nombres de los archivos puedes agregar información como el número de la página u otros identificadores, por ejemplo: `pag32_parr1.txt`. Si trabajas con un diario, cada archivo de texto podría ser una entrada de diario, por ejemplo: `abril_25_1887.txt`. (Nota que es importante no dejar espacios en los nombres de carpetas y archivos). Si los textos que te interesan están en la red, podrías [automatizar](http://electricarchaeology.ca/2012/07/09/mining-a-day-of-archaeology/) este proceso. +La carpeta `sample data` en el directorio de MALLET (`C:\mallet\sample-data`) te puede servir como guía para saber cómo organizar tus textos. Pon todo lo que deseas en una sola carpeta, por ejemplo `C:\mis-datos`. Tus archivos deben contener texto llano y estar en el formato `.txt` (puedes crearlos en un procesador de textos como Notepad, [Sublime Text](https://www.sublimetext.com/) o [Atom](https://atom.io/), por ejemplo, y guardarlos como `Texto (*.txt)` o `Texto sin formato`). Tienes que tomar algunas decisiones. ¿Quieres explorar los tópicos a nivel de párrafos? Entonces cada archivo `.txt` debería contener solo un párrafo. En los nombres de los archivos puedes agregar información como el número de la página u otros identificadores, por ejemplo: `pag32_parr1.txt`. Si trabajas con un diario, cada archivo de texto podría ser una entrada de diario, por ejemplo: `abril_25_1887.txt`. (Nota que es importante no dejar espacios en los nombres de carpetas y archivos). Si los textos que te interesan están en la red, podrías [automatizar](https://electricarchaeology.ca/2012/07/09/mining-a-day-of-archaeology/) este proceso. ### Lecturas adicionales sobre *Topic Modeling* Para ver un ejemplo desarrollado de *topic modeling* basado en materiales obtenidos de páginas web, véase [Mining the Open Web with Looted -Heritage Draft](http://electricarchaeology.ca/2012/06/08/mining-the-open-web-with-looted-heritage-draft/). +Heritage Draft](https://electricarchaeology.ca/2012/06/08/mining-the-open-web-with-looted-heritage-draft/). Puedes reutilizar los datos tomándolos de [Figshare.com](https://ndownloader.figshare.com/files/90972) donde están incluidos algunos archivos `.txt`. Cada uno de los ficheros `.txt` contiene una noticia individual. - Para amplia información adicional y una bibliografía sobre *topic modeling* podrías empezar con el [Guided Tour to Topic Modeling](https://web.archive.org/web/20240520155820/https://www.scottbot.net/HIAL/index.html@p=19113.html) de Scott Weingart. -- Una discusión importante sobre la interpretación del significado de los tópicos es '[Topic modeling made just simple enough](http://tedunderwood.wordpress.com/2012/04/07/topic-modeling-made-just-simple-enough/)' de Ted Underwood. -- El artículo de blog '[Some Assembly Required](http://web.archive.org/web/20160704150726/http://www.lisarhody.com:80/some-assembly-required/)' *Lisa @ Work* 22 de agosto de 2012 escrito por Lisa Rhody también es muy revelador. -- Clay Templeton, '[Topic Modeling in the Humanities: An Overview](https://web.archive.org/web/20130116223500/http://mith.umd.edu/topic-modeling-in-the-humanities-an-overview/)', Maryland Institute for Technology in the Humanities, n.d. -- David Blei, Andrew Ng, and Michael Jordan, '[Latent dirichlet allocation](http://dl.acm.org/citation.cfm?id=944937)', The Journal of Machine Learning Research 3 (2003). -- Finalmente, te recomendamos que consultes la [bibliografía de artículos sobre *topic modeling*](http://mimno.infosci.cornell.edu/topics.html) de David Mimno. Están clasificados por temas para facilitar encontrar el artículo más adecuado para una aplicación determinada. También puedes echar un vistazo a su reciente artículo sobre [Historiografía Computacional](http://www.perseus.tufts.edu/publications/02-jocch-mimno.pdf) en la revista *ACM Transactions on Computational Logic* en el que analiza revistas científicas de los Clásicos a lo largo de cien años para aprender algo sobre este campo. Mientras el artículo debe leerse como un buen ejemplo de *topic modeling*, su sección sobre 'métodos' es especialmente relevante porque incluye una discusión sobre cómo preparar los textos para un análisis de ese tipo.[^13] +- Una discusión importante sobre la interpretación del significado de los tópicos es '[Topic modeling made just simple enough](https://tedunderwood.wordpress.com/2012/04/07/topic-modeling-made-just-simple-enough/)' de Ted Underwood. +- El artículo de blog '[Some Assembly Required](https://web.archive.org/web/20160704150726/https://www.lisarhody.com:80/some-assembly-required/)' *Lisa @ Work* 22 de agosto de 2012 escrito por Lisa Rhody también es muy revelador. +- Clay Templeton, '[Topic Modeling in the Humanities: An Overview](https://web.archive.org/web/20130116223500/https://mith.umd.edu/topic-modeling-in-the-humanities-an-overview/)', Maryland Institute for Technology in the Humanities, n.d. +- David Blei, Andrew Ng, and Michael Jordan, '[Latent dirichlet allocation](https://dl.acm.org/citation.cfm?id=944937)', The Journal of Machine Learning Research 3 (2003). +- Finalmente, te recomendamos que consultes la [bibliografía de artículos sobre *topic modeling*](https://mimno.infosci.cornell.edu/topics.html) de David Mimno. Están clasificados por temas para facilitar encontrar el artículo más adecuado para una aplicación determinada. También puedes echar un vistazo a su reciente artículo sobre [Historiografía Computacional](https://www.perseus.tufts.edu/publications/02-jocch-mimno.pdf) en la revista *ACM Transactions on Computational Logic* en el que analiza revistas científicas de los Clásicos a lo largo de cien años para aprender algo sobre este campo. Mientras el artículo debe leerse como un buen ejemplo de *topic modeling*, su sección sobre 'métodos' es especialmente relevante porque incluye una discusión sobre cómo preparar los textos para un análisis de ese tipo.[^13] ## Notas de traducción [^1]: En esta traducción se utiliza la expresión *topic modeling* en inglés porque en la literatura publicada sobre el tema en español es lo más común. Por supuesto sería posible traducir *topic modeling* por modelaje de tópicos o algo parecido, pero hasta ahora no es habitual. Por otro lado, se ha optado por traducir todas las demás palabras relacionadas al método para estimular su uso en español, por ejemplo *topic* por tópico o *topic model* por modelo de tópicos. -[^2]: También hay algunos ejemplos de modelos de tópicos creados a partir de textos (literarios) en español. Por ejemplo: Borja Navarro-Colorado, *[On Poetic Topic Modeling: Extracting Themes and Motifs From a Corpus of Spanish Poetry](https://www.frontiersin.org/articles/10.3389/fdigh.2018.00015/full)*, frontiers in Digital Humanities, 20 de junio de 2018, [https://doi.org/10.3389/fdigh.2018.00015](https://doi.org/10.3389/fdigh.2018.00015); Borja Navarro-Colorado y David Tomás, *[A fully unsupervised Topic Modeling approach to metaphor identification / Una aproximación no supervisada a la detección de metáforas basada en Topic Modeling](https://www.dlsi.ua.es//~borja/NavarroTomas_PosterSEPLN2015.pdf)*, Actas del XXXI Congreso de la Sociedad Española para el Procesamiento del Lenguaje Natural, 2015; Christof Schöch, Ulrike Henny, José Calvo Tello, Daniel Schlör, Stefanie Popp, *[Topic, Genre, Text. Topics im Textverlauf von Untergattungen des spanischen und hispanoamerikanischen Romans (1880-1930)](https://web.archive.org/web/20180828160609/http://www.dhd2016.de/abstracts/vortr%C3%A4ge-055.html)*, DHd 2016. Modellierung, Vernetzung, Visualisierung. Die Digital Humanities als fächerübergreifendes Forschungsparadigma. Universität Leipzig, 7.-12. März 2016. +[^2]: También hay algunos ejemplos de modelos de tópicos creados a partir de textos (literarios) en español. Por ejemplo: Borja Navarro-Colorado, *[On Poetic Topic Modeling: Extracting Themes and Motifs From a Corpus of Spanish Poetry](https://www.frontiersin.org/articles/10.3389/fdigh.2018.00015/full)*, frontiers in Digital Humanities, 20 de junio de 2018, [https://doi.org/10.3389/fdigh.2018.00015](https://doi.org/10.3389/fdigh.2018.00015); Borja Navarro-Colorado y David Tomás, *[A fully unsupervised Topic Modeling approach to metaphor identification / Una aproximación no supervisada a la detección de metáforas basada en Topic Modeling](https://www.dlsi.ua.es//~borja/NavarroTomas_PosterSEPLN2015.pdf)*, Actas del XXXI Congreso de la Sociedad Española para el Procesamiento del Lenguaje Natural, 2015; Christof Schöch, Ulrike Henny, José Calvo Tello, Daniel Schlör, Stefanie Popp, *[Topic, Genre, Text. Topics im Textverlauf von Untergattungen des spanischen und hispanoamerikanischen Romans (1880-1930)](https://web.archive.org/web/20180828160609/https://www.dhd2016.de/abstracts/vortr%C3%A4ge-055.html)*, DHd 2016. Modellierung, Vernetzung, Visualisierung. Die Digital Humanities als fächerübergreifendes Forschungsparadigma. Universität Leipzig, 7.-12. März 2016. [^3]: En esta traducción, las instrucciones para la instalación de MALLET fueron actualizadas para ajustarse a Windows 10. En el original inglés las instrucciones se refieren a Windows 7. Las capturas de pantalla fueron sustituidas para que el idioma de la pantalla sea español. [^4]: En todos los ejemplos de esta lección en los que aparece la palabra `User`, deberás sustituirla con tu propio nombre de usuario. [^5]: Al final de un comando escrito en la línea de comandos siempre se teclea Entrar para confirmar el comando y ejecutarlo. En adelante no lo mencionaremos más. @@ -333,4 +333,4 @@ Puedes reutilizar los datos tomándolos de [Figshare.com](https://ndownloader.fi [^10]: Nótese que MALLET no reconoce palabras compuestas como `New York` y las trata como dos palabras separadas. Para evitar eso, sería necesario preprocesar el texto y conectar las varias partes de la palabra compuesta con un símbolo, por ejemplo una barra baja (`New_York`) para que MALLET las reconozca como tales. [^11]: Si comparas los tópicos en la figura 10 con los de la figura 9, puedes ver el efecto del elemento aleatorio del *topic modeling*. Esas dos listas de tópicos son los resultados de dos pasadas diferentes y aunque los tópicos se parezcan no son exactamente iguales. [^12]: Como en la línea de comandos, también en el programa de hoja de cálculo puede ser necesario cambiar la codificación de caracteres a UTF-8 para que las letras con acento o ñ salgan correctamente. Esto se puede hacer durante el proceso de importar los datos o ajustando las preferencias del programa. -[^13]: Para introducciones a *topic modeling* escritas en español, véanse la entrada de blog de José Calvo Tello *[Topic modeling: ¿qué, cómo, cuándo?](http://www.morethanbooks.eu/topic-modeling-introduccion/)* y la presentación *[Text Mining con Topic Modeling](https://web.archive.org/web/20180831094856/http://www.dlsi.ua.es/~borja/riilua/6.TopicModeling_v02.pdf)* de Borja Navarro-Colorado. +[^13]: Para introducciones a *topic modeling* escritas en español, véanse la entrada de blog de José Calvo Tello *[Topic modeling: ¿qué, cómo, cuándo?](https://www.morethanbooks.eu/topic-modeling-introduccion/)* y la presentación *[Text Mining con Topic Modeling](https://web.archive.org/web/20180831094856/https://www.dlsi.ua.es/~borja/riilua/6.TopicModeling_v02.pdf)* de Borja Navarro-Colorado. diff --git a/es/lecciones/trabajar-con-archivos-de-texto.md b/es/lecciones/trabajar-con-archivos-de-texto.md index 7ed26ea182..8378f4ba1e 100644 --- a/es/lecciones/trabajar-con-archivos-de-texto.md +++ b/es/lecciones/trabajar-con-archivos-de-texto.md @@ -179,4 +179,4 @@ Lecturas recomendadas [print]: https://docs.python.org/2/reference/simple_stmts.html#the-print-statement [palabra reservada]: https://docs.python.org/release/2.5.4/ref/keywords.html [file objects]: https://docs.python.org/2/library/stdtypes.html#bltin-file-objects -[Non-Programmer’s Tutorial for Python 2.6/Hello, World]: http://en.wikibooks.org/wiki/Non-Programmer%27s_Tutorial_for_Python_2.6/Hello,_World +[Non-Programmer’s Tutorial for Python 2.6/Hello, World]: https://en.wikibooks.org/wiki/Non-Programmer%27s_Tutorial_for_Python_2.6/Hello,_World diff --git a/es/lecciones/trabajar-con-paginas-web.md b/es/lecciones/trabajar-con-paginas-web.md index 558e81c019..aaf4f5071c 100644 --- a/es/lecciones/trabajar-con-paginas-web.md +++ b/es/lecciones/trabajar-con-paginas-web.md @@ -177,12 +177,12 @@ Para seguir a lo largo de las lecciones futuras es importante que tengas los arc - python-es-lecciones1.zip ([zip][]) -[The Old Bailey Online]: http://oldbaileyonline.org +[The Old Bailey Online]: https://oldbaileyonline.org [*cadena de consulta*]: https://es.wikipedia.org/wiki/Query_string [Descarga de registros múltiples usando cadenas de consulta]: /lessons/downloading-multiple-records-using-query-strings -[revueltas de Gordon]: http://en.wikipedia.org/wiki/Gordon_Riots -[View as XML]: http://www.oldbaileyonline.org/browse.jsp?foo=bar&path=sessionsPapers/17800628.xml&div=t17800628-33&xml=yes +[revueltas de Gordon]: https://en.wikipedia.org/wiki/Gordon_Riots +[View as XML]: https://www.oldbaileyonline.org/browse.jsp?foo=bar&path=sessionsPapers/17800628.xml&div=t17800628-33&xml=yes [XML]: https://es.wikipedia.org/wiki/Extensible_Markup_Language -[documento original]: http://www.oldbaileyonline.org/images.jsp?doc=178006280084 +[documento original]: https://www.oldbaileyonline.org/images.jsp?doc=178006280084 [urllib]: https://docs.python.org/3/library/urllib.html [zip]: /assets/python-es-lecciones1.zip diff --git a/es/lecciones/transformacion-datos-xml-xsl.md b/es/lecciones/transformacion-datos-xml-xsl.md index 1ad9abcf35..4b0c77d42a 100644 --- a/es/lecciones/transformacion-datos-xml-xsl.md +++ b/es/lecciones/transformacion-datos-xml-xsl.md @@ -256,7 +256,7 @@ Los ejemplos de código de línea de comandos que mostraremos aquí presupondrá Para empezar a transformar un documento XML, primero es necesario obtener un archivo *bien formado*.[^2] Muchas bases de datos históricas disponibles en línea están modeladas en XML y, a veces, ofrecen sus datos en abierto. -Para realizar este tutorial utilizaremos la base de datos [Scissors and Paste](http://scissors-and-paste.net). +Para realizar este tutorial utilizaremos la base de datos [Scissors and Paste](https://scissors-and-paste.net). La base de datos *Scissors and Paste* es una colección colaborativa, en continuo crecimiento, que contiene noticias procedentes de periódicos británicos e imperiales de los siglos XVIII y XIX. Los dos objetivos originales del proyecto eran facilitar la comparación de reediciones aparecidas en distintos periódicos y detectar temas similares en distintas publicaciones inglesas. Como muchas bases de datos XML, *Scissors and Paste* contiene datos (el texto), información sobre el formato (como las cursivas o las justificación de los párrafos) y metadatos.[^3] Los metadatos recogen la paginación de la noticia, la fecha de impresión, algunos detalles adicionales sobre el periódico, los temas principales y una lista con las personas y lugares mencionados. @@ -336,7 +336,7 @@ La primeras tres líneas de tu archivo XSL serán las siguientes: La primera línea declara que este es un documento XML versión 1.0, codificado como UTF-8. (¡Nota que un documento XSL es en últimas un tipo especial de documento XML!) -La segunda línea declara que se trata de la versión 1.0 de XSL y que el uso del [espacio de nombres](https://es.wikipedia.org/wiki/Espacio_de_nombres_XML) (*namespace*, en inglés) es el estándar establecido por el [Consorcio World Wide Web](http://www.w3.org/), cuya URI (*Uniform Resource Identifier*) figura en la instrucción. +La segunda línea declara que se trata de la versión 1.0 de XSL y que el uso del [espacio de nombres](https://es.wikipedia.org/wiki/Espacio_de_nombres_XML) (*namespace*, en inglés) es el estándar establecido por el [Consorcio World Wide Web](https://www.w3.org/), cuya URI (*Uniform Resource Identifier*) figura en la instrucción. Finalmente, la tercera línea le indica al procesador XSL que queremos generar un archivo de texto plano. (También podrías haber puesto `xml` o `html`, en lugar de `text`, para generar un documento XML o uno HTML, respectivamente.) diff --git a/es/lecciones/uso-las-colecciones-hathitrust-mineria-textual-R.md b/es/lecciones/uso-las-colecciones-hathitrust-mineria-textual-R.md index 23ac12a50d..fa35909065 100644 --- a/es/lecciones/uso-las-colecciones-hathitrust-mineria-textual-R.md +++ b/es/lecciones/uso-las-colecciones-hathitrust-mineria-textual-R.md @@ -78,7 +78,7 @@ library(stringr) ## Obtener los atributos extraídos de un volumen -Cada libro o volumen en HathiTrust posee un número de identificación único (o el "htid"), el cual permite que obtengamos datos sobre el volumen. Cuando el libro no está limitado por los derechos de autor, puedes verlo añadiendo su número de identificación a un URL de la siguiente manera: `http://hdl.handle.net/2027/{número de identificación}`. Por ejemplo, el número que identifica una de las primeras ediciones de la clásica novela colombiana, *María* de Jorge Isaacs, es `uc1.31175010656638` y al visitar el enlace [http://hdl.handle.net/2027/uc1.31175010656638](http://hdl.handle.net/2027/uc1.31175010656638) accedemos a una copia de la obra. +Cada libro o volumen en HathiTrust posee un número de identificación único (o el "htid"), el cual permite que obtengamos datos sobre el volumen. Cuando el libro no está limitado por los derechos de autor, puedes verlo añadiendo su número de identificación a un URL de la siguiente manera: `http://hdl.handle.net/2027/{número de identificación}`. Por ejemplo, el número que identifica una de las primeras ediciones de la clásica novela colombiana, *María* de Jorge Isaacs, es `uc1.31175010656638` y al visitar el enlace [https://hdl.handle.net/2027/uc1.31175010656638](https://hdl.handle.net/2027/uc1.31175010656638) accedemos a una copia de la obra. {% include figure.html filename="es-or-uso-las-colecciones-hathitrust-mineria-textual-R-01.png" alt="Ejemplar de la novela María en el sitio HathiTrust" caption="Figura 1. 'María' de Jorge Isaacs" %} diff --git a/es/lecciones/ver-archivos-html.md b/es/lecciones/ver-archivos-html.md index 0ae4594a40..d8958f1eea 100644 --- a/es/lecciones/ver-archivos-html.md +++ b/es/lecciones/ver-archivos-html.md @@ -112,7 +112,7 @@ Sugerencia de lecturas para aprender HTML: - [W3Schools HTML Tutorial] - [W3Schools HTML5 Tutorial] -[la anterior de la serie]: http://es.programminghistorian.org/lecciones/introduccion-e-instalacion/"> -[tutorial de HTML ofrecido por W3Schools]: http://www.w3schools.com/html/default.asp -[W3Schools HTML Tutorial]: http://www.w3schools.com/html/default.asp -[W3Schools HTML5 Tutorial]: http://www.w3schools.com/html/html5_intro.asp +[la anterior de la serie]: https://es.programminghistorian.org/lecciones/introduccion-e-instalacion/"> +[tutorial de HTML ofrecido por W3Schools]: https://www.w3schools.com/html/default.asp +[W3Schools HTML Tutorial]: https://www.w3schools.com/html/default.asp +[W3Schools HTML5 Tutorial]: https://www.w3schools.com/html/html5_intro.asp diff --git a/es/pia.md b/es/pia.md index 0222dcb148..f27d9b252c 100644 --- a/es/pia.md +++ b/es/pia.md @@ -50,7 +50,7 @@ Al unirte al Programa de Instituciones Asociadas recibirás los siguientes benef
    - + diff --git a/fr/README.md b/fr/README.md index d6351b7a22..3f933212e0 100644 --- a/fr/README.md +++ b/fr/README.md @@ -1,10 +1,10 @@ -Ceci est le dépôt principal pour le _Programming Historian en français_ () où nous stockons les fichiers du site web. +Ceci est le dépôt principal pour le _Programming Historian en français_ () où nous stockons les fichiers du site web. Pour les tutoriels soumis, voir: [_Programming Historian Submissions_](https://github.com/programminghistorian/ph-submissions). SI vous avez des suggestions à faire sur le site ou le projet, merci de cliquer sur [Issues](https://github.com/programminghistorian/jekyll/issues) en haut et créer un nouveau ticket en cliquant sur [New Issue](https://github.com/programminghistorian/jekyll/issues/new) pour décrire votre idée. Merci de noter que le ticket sera public. Si vous souhaitez échanger avec nous en privé, merci de contacter [Jessica Parr](mailto:jarr1129@gmail.com). -Si vous souhaitez contribuer au projet, vous pouvez trouver des consignes précis aux auteur(e)s, aux évaluateurs et évaluatrices et aux rédacteurs et rédactrices sur la [page contributions](http://programminghistorian.org/fr/contribuer) du site web. +Si vous souhaitez contribuer au projet, vous pouvez trouver des consignes précis aux auteur(e)s, aux évaluateurs et évaluatrices et aux rédacteurs et rédactrices sur la [page contributions](https://programminghistorian.org/fr/contribuer) du site web. Pour apporter des contributions techniques au projet ou pour vous renseigner sur notre politique concernant de nouvelles sous-équipes linguistiques qui s'intéressent à créer une version traduite, merci de lire le [wiki du projet](https://github.com/programminghistorian/jekyll/wiki). diff --git a/fr/apropos.md b/fr/apropos.md index 4f5579c76d..ca5d7b650a 100644 --- a/fr/apropos.md +++ b/fr/apropos.md @@ -14,7 +14,7 @@ Notre procédure d'évaluation par les pairs est un peu différente de celle qui ## Open Source (code source ouvert) -Le _Programming Historian en français_ adhère aux principes de l'open source. Toutes les leçons utilisent dans la mesure du possible des langages de programmation et des logiciels ouverts. Cette politique vise à minimiser les coûts pour toutes les parties concernées et à encourager la plus large participation possible. C'est notre conviction que tout un chacun devrait avoir la possibilité de profiter de ces tutoriels, non pas seulement ceux et celles disposant de budgets de recherche considérables qui permettent l'acquisition de logiciels propriétaires coûteux. Depuis 2016, une version citable du projet _Programming Historian_ est déposée sur [Zenodo](https://zenodo.org/). Le dépôt de l'année 2022 est accessible sur [doi.org/10.5281/zenodo.7313045](https://doi.org/10.5281/zenodo.7313045). Depuis 2018, le robot d’indexation du [UK Web Archive](https://www.webarchive.org.uk/) parcourt régulièrement les pages du _Programming Historian_. Celles-ci sont archivées et mises à la disposition du public [via leur site web](https://www.webarchive.org.uk/wayback/en/archive/*/http://programminghistorian.org/). +Le _Programming Historian en français_ adhère aux principes de l'open source. Toutes les leçons utilisent dans la mesure du possible des langages de programmation et des logiciels ouverts. Cette politique vise à minimiser les coûts pour toutes les parties concernées et à encourager la plus large participation possible. C'est notre conviction que tout un chacun devrait avoir la possibilité de profiter de ces tutoriels, non pas seulement ceux et celles disposant de budgets de recherche considérables qui permettent l'acquisition de logiciels propriétaires coûteux. Depuis 2016, une version citable du projet _Programming Historian_ est déposée sur [Zenodo](https://zenodo.org/). Le dépôt de l'année 2022 est accessible sur [doi.org/10.5281/zenodo.7313045](https://doi.org/10.5281/zenodo.7313045). Depuis 2018, le robot d’indexation du [UK Web Archive](https://www.webarchive.org.uk/) parcourt régulièrement les pages du _Programming Historian_. Celles-ci sont archivées et mises à la disposition du public [via leur site web](https://www.webarchive.org.uk/wayback/en/archive/*/https://programminghistorian.org/). ## Voie diamant (Diamond Open Access) @@ -26,7 +26,7 @@ Le _Programming Historian_ (ISSN 2397-2068) est recensé dans le répertoire de ## Récompenses -Le _Programming Historian_ a gagné plusieurs prix qui reconnaissent et célèbrent nos réussites dans les domaines de la publication en libre accès et des humanités numériques. En 2016, la version anglaise de la revue fut la grande gagnante du [Digital Humanities Awards](http://dhawards.org/dhawards2016/results/) dans la catégorie des Meilleures Collections d'Articles, puis l'année suivante, en 2017, _Programming Historian en espagnol_ [remporta la même distinction](http://dhawards.org/dhawards2017/results/). En 2018, _Programming Historian en espagnol_ était le vainqueur de 'Mejor iniciativa formativa desarrollada durante el año 2018', [Humanidades Digitales Hispánicas Association](http://humanidadesdigitaleshispanicas.es/). Nous avons remporté le [Canadian Social Knowledge Institute's Open Scholarship Award](https://etcl.uvic.ca/events-activities/open-scholarship-awards/) en 2020 et en 2021 nous avons été récompensés d'un [Coko Foundation's Open Publishing Award](https://openpublishingawards.org/results/2021/index.html) dans la catégorie Contenu Ouvert. En 2022, nous avons remporté la catégorie "Meilleur support de formation DH" des [Digital Humanities Awards](http://dhawards.org/dhawards2022/results/). +Le _Programming Historian_ a gagné plusieurs prix qui reconnaissent et célèbrent nos réussites dans les domaines de la publication en libre accès et des humanités numériques. En 2016, la version anglaise de la revue fut la grande gagnante du [Digital Humanities Awards](https://dhawards.org/dhawards2016/results/) dans la catégorie des Meilleures Collections d'Articles, puis l'année suivante, en 2017, _Programming Historian en espagnol_ [remporta la même distinction](https://dhawards.org/dhawards2017/results/). En 2018, _Programming Historian en espagnol_ était le vainqueur de 'Mejor iniciativa formativa desarrollada durante el año 2018', [Humanidades Digitales Hispánicas Association](https://humanidadesdigitaleshispanicas.es/). Nous avons remporté le [Canadian Social Knowledge Institute's Open Scholarship Award](https://etcl.uvic.ca/events-activities/open-scholarship-awards/) en 2020 et en 2021 nous avons été récompensés d'un [Coko Foundation's Open Publishing Award](https://openpublishingawards.org/results/2021/index.html) dans la catégorie Contenu Ouvert. En 2022, nous avons remporté la catégorie "Meilleur support de formation DH" des [Digital Humanities Awards](https://dhawards.org/dhawards2022/results/). ## Politique de diversité @@ -40,4 +40,4 @@ Une liste de nos donateurs et des soutiens reçus est consultable sur la page qu ## Histoire du projet -Le *Programming Historian* a été fondé en 2008 par William J. Turkel et Alan MacEachern. À l'époque, Turkel avait publié un [billet de blog](http://digitalhistoryhacks.blogspot.com/2008/01/programming-historian.html), exposant leurs intentions pour le projet. Il s'est focalisé principalement sur Python et il a été publié en libre accès, en tant que projet d'infrastructure numérique de *NiCHE* (*Network in Canadian History & Environment* / *Nouvelle initiative canadienne en histoire de l’environnement*). En 2012, *The Programming Historian* a élargi son équipe éditoriale pour être lancé en tant que revue académique évaluée par les pairs, en libre accès, spécialisée aux questions méthodologiques pour des historiens et historiennes numériques. En 2016, nous avons ajouté une publication hispanophone à la publication anglophone initiale puis, en 2017, nous avons commencé à publier des leçons traduites sous le titre *[Programming Historian en español]({{site.baseurl}}/es)*. En 2018, [nous avons organisé notre premier atelier d'écriture hispanophone](/posts/bogota-workshop-report) et lancé un appel à contributions pour [de nouvelles leçons en espagnol](/posts/convocatoria-de-tutoriales). Dans la même année, nous avons ajouté une publication francophone et le *Programming Historian en français* a été lancé en 2019. Un an plus tard, nous avons été rejoints par une équipe lusophone et avons lancé *[Programming Historian em português]({{site.baseurl}}/pt)* début 2021. +Le *Programming Historian* a été fondé en 2008 par William J. Turkel et Alan MacEachern. À l'époque, Turkel avait publié un [billet de blog](https://digitalhistoryhacks.blogspot.com/2008/01/programming-historian.html), exposant leurs intentions pour le projet. Il s'est focalisé principalement sur Python et il a été publié en libre accès, en tant que projet d'infrastructure numérique de *NiCHE* (*Network in Canadian History & Environment* / *Nouvelle initiative canadienne en histoire de l’environnement*). En 2012, *The Programming Historian* a élargi son équipe éditoriale pour être lancé en tant que revue académique évaluée par les pairs, en libre accès, spécialisée aux questions méthodologiques pour des historiens et historiennes numériques. En 2016, nous avons ajouté une publication hispanophone à la publication anglophone initiale puis, en 2017, nous avons commencé à publier des leçons traduites sous le titre *[Programming Historian en español]({{site.baseurl}}/es)*. En 2018, [nous avons organisé notre premier atelier d'écriture hispanophone](/posts/bogota-workshop-report) et lancé un appel à contributions pour [de nouvelles leçons en espagnol](/posts/convocatoria-de-tutoriales). Dans la même année, nous avons ajouté une publication francophone et le *Programming Historian en français* a été lancé en 2019. Un an plus tard, nous avons été rejoints par une équipe lusophone et avons lancé *[Programming Historian em português]({{site.baseurl}}/pt)* début 2021. diff --git a/fr/consignes-redacteurs.md b/fr/consignes-redacteurs.md index 0a83259b1c..b9aa2a5787 100644 --- a/fr/consignes-redacteurs.md +++ b/fr/consignes-redacteurs.md @@ -279,7 +279,7 @@ Ci-dessous quelques sites pour chercher des images: - The [Virtual Manuscript Library of Switzerland](https://www.flickr.com/photos/e-codices) - The [British Library](https://www.flickr.com/photos/britishlibrary) - The [Internet Archive Book Images](https://archive.org/details/bookimages) - - The [Library of Congress Maps](http://www.loc.gov/maps/collections) + - The [Library of Congress Maps](https://www.loc.gov/maps/collections) Merci de prendre soin de trouver une image dont le style est proche de celles déjà utilisées, donc pas de photographie, mais plutôt une image d'illustration de livre, de taille minimale de 200x200 pixels, sans restriction de droits de copyright. Merci de faire attention à ce que les images ne heurtent pas les sensibilités et, en conformité à notre [engagement en faveur de la diversité](/posts/PH-commitment-to-diversity), qu'elles ne reproduisent pas de stéréotypes sexistes ou raciaux. diff --git a/fr/contribuer.md b/fr/contribuer.md index b546a5f0f5..a894b7cd22 100644 --- a/fr/contribuer.md +++ b/fr/contribuer.md @@ -56,9 +56,9 @@ Nous apprécions tout particulièrement les informations reçues sur les liens c Ce projet est notre démonstration de ce que l'édition scientifique en accès libre peut et doit être. Merci de nous aider à disséminer le message et à fournir le plus large accès possible à cette ressource en demandant à votre bibliothèque d'enregister le projet dans son catalogue. -Le _Programming Historian_ a sa notice dans WorldCat [en français](https://uva.worldcat.org/title/programming-historian-en-franais/oclc/1104391842), [en anglais](http://www.worldcat.org/title/programming-historian/oclc/951537099), [en espagnol](https://www.worldcat.org/title/programming-historian-en-espanol/oclc/1061292935&referer=brief_results) et [en portugais](https://search.worldcat.org/title/1332987197). +Le _Programming Historian_ a sa notice dans WorldCat [en français](https://uva.worldcat.org/title/programming-historian-en-franais/oclc/1104391842), [en anglais](https://www.worldcat.org/title/programming-historian/oclc/951537099), [en espagnol](https://www.worldcat.org/title/programming-historian-en-espanol/oclc/1061292935&referer=brief_results) et [en portugais](https://search.worldcat.org/title/1332987197). -Toute notre gratitude à la [bibliothèque de l'Université de Purdue](http://purdue-primo-prod.hosted.exlibrisgroup.com/primo_library/libweb/action/dlDisplay.do?vid=PURDUE&search_scope=everything&docId=PURDUE_ALMA51671812890001081&fn=permalink), à Amanda Visconti, et à la [bibliothèque de l'Université de Virginia]). +Toute notre gratitude à la [bibliothèque de l'Université de Purdue](https://purdue-primo-prod.hosted.exlibrisgroup.com/primo_library/libweb/action/dlDisplay.do?vid=PURDUE&search_scope=everything&docId=PURDUE_ALMA51671812890001081&fn=permalink), à Amanda Visconti, et à la [bibliothèque de l'Université de Virginia]). L'édition anglais est aussi indexé dans le [Directory of Open Access Journals](https://doaj.org/toc/2397-2068). diff --git a/fr/dons.md b/fr/dons.md index 092f938123..7af10f35f9 100644 --- a/fr/dons.md +++ b/fr/dons.md @@ -19,7 +19,7 @@ Votre soutien contribue directement à l'infrastructure qui assure la cohésion
    - + diff --git a/fr/lecons/analyse-corpus-antconc.md b/fr/lecons/analyse-corpus-antconc.md index 85c10e1275..dd038bb6dd 100644 --- a/fr/lecons/analyse-corpus-antconc.md +++ b/fr/lecons/analyse-corpus-antconc.md @@ -55,13 +55,13 @@ Vous avez déjà fait ce genre de choses auparavant, si vous avez déjà... * utilisé [Voyant Tools][48] pour analyser des schémas dans un texte * suivi les tutoriels d'introduction à Python du [Programming Historian][51] -En quelque sorte, [Voyant Tools](http://voyant-tools.org/) est une passerelle vers la réalisation d'analyses plus sophistiquées et reproductibles, car l'esthétique de bricolage des scripts Python ou R peut ne pas convenir à tout le monde. [AntConc](http://www.laurenceanthony.net/software/antconc/) comble ce vide en étant un logiciel autonome d'analyse linguistique de textes, disponible gratuitement pour Windows, Mac OS et Linux. Par ailleurs, il est régulièrement mis à jour par son créateur, [Laurence Anthony](http://www.laurenceanthony.net/). Il existe d'autres logiciels de concordance, mais AntConc est librement disponible sur toutes les plateformes et très bien maintenu. Voir la [bibliographie][56] pour d'autres ressources. +En quelque sorte, [Voyant Tools](https://voyant-tools.org/) est une passerelle vers la réalisation d'analyses plus sophistiquées et reproductibles, car l'esthétique de bricolage des scripts Python ou R peut ne pas convenir à tout le monde. [AntConc](https://www.laurenceanthony.net/software/antconc/) comble ce vide en étant un logiciel autonome d'analyse linguistique de textes, disponible gratuitement pour Windows, Mac OS et Linux. Par ailleurs, il est régulièrement mis à jour par son créateur, [Laurence Anthony](https://www.laurenceanthony.net/). Il existe d'autres logiciels de concordance, mais AntConc est librement disponible sur toutes les plateformes et très bien maintenu. Voir la [bibliographie][56] pour d'autres ressources. Ce tutoriel explore différentes façons d'aborder un corpus de textes. Il est important de noter que les approches issues de la linguistique de corpus sont rarement, voire jamais, l'unique possibilité. Ainsi, à chaque étape, il vaut la peine de réfléchir à ce que vous faites et comment cela peut vous aider à répondre à une question spécifique avec vos données. Bien que je présente dans ce tutoriel une approche modulaire qui explique 'comment faire ceci puis cela pour atteindre x', il n'est pas toujours nécessaire de suivre l'ordre exact décrit ici. Cette leçon donne un aperçu de certaines des méthodes disponibles, plutôt qu'une recette du succès. ### Téléchargements nécessaires -1. Logiciel : [AntConc](http://www.laurenceanthony.net/software/antconc/). -Dézippez le fichier si nécessaire, et lancez l'application. Les captures d'écran ci-dessous peuvent varier légèrement par rapport à la version que vous avez (et selon le système d'exploitation, bien sûr), mais les procédures sont plus ou moins les mêmes sur les plateformes et les versions récentes d'AntConc. Ce tutoriel a recours à une version plus ancienne d'AntConc, car je la trouve plus facile à utiliser dans un contexte d'introduction. Vous pouvez utiliser la version la plus récente, mais si vous souhaitez suivre avec les captures d'écran fournies, vous pouvez télécharger la version utilisée ici, [version 3.2.4](http://www.laurenceanthony.net/software/antconc/releases/AntConc324/). +1. Logiciel : [AntConc](https://www.laurenceanthony.net/software/antconc/). +Dézippez le fichier si nécessaire, et lancez l'application. Les captures d'écran ci-dessous peuvent varier légèrement par rapport à la version que vous avez (et selon le système d'exploitation, bien sûr), mais les procédures sont plus ou moins les mêmes sur les plateformes et les versions récentes d'AntConc. Ce tutoriel a recours à une version plus ancienne d'AntConc, car je la trouve plus facile à utiliser dans un contexte d'introduction. Vous pouvez utiliser la version la plus récente, mais si vous souhaitez suivre avec les captures d'écran fournies, vous pouvez télécharger la version utilisée ici, [version 3.2.4](https://www.laurenceanthony.net/software/antconc/releases/AntConc324/). 2. Corpus test : Téléchargez un [fichier zip de critiques de films](/assets/corpus-analysis-with-antconc/antconc_corpus_files.zip). @@ -94,10 +94,10 @@ Allez sur votre bureau et vérifiez que vous pouvez trouver votre fichier texte. Répétez la procédure plusieurs fois et c'est ainsi que vous construirez un corpus de fichiers texte brut. Ce processus s'appelle la construction de corpus, ce qui implique très souvent d'aborder des questions d'échantillonnage, de représentativité et d'organisation. Rappelez-vous, *chaque fichier que vous voulez utiliser dans votre corpus __doit__ être un fichier texte brut pour qu'AntConc puisse l'utiliser.* Il est d'usage de nommer les fichiers avec le suffixe .txt pour que vous sachiez de quel type de fichier il s'agit. Comme vous pouvez l'imaginer, il peut être assez fastidieux de constituer un corpus substantiel un fichier à la fois, surtout si vous avez l'intention de traiter un ensemble important de documents. Il est donc très courant d'utiliser l'extraction de contenus (_webscraping_) (en utilisant un petit programme pour télécharger automatiquement les fichiers sur le web pour vous) pour construire votre corpus. Pour en savoir plus sur les concepts et les techniques d'extraction, consultez les tutoriels du _Programming Historian_ sur l'[extraction avec Beautiful Soup][50] et le [téléchargement automatique avec wget][51] (en anglais). -Plutôt que de construire un corpus un document à la fois, nous allons utiliser un corpus préparé de critiques de films positives et négatives, emprunté au [_Natural Language Processing Toolkit_](http://www.nltk.org/). Le corpus des critiques de films de la NLTK compte 2000 critiques, organisées par résultats positifs et négatifs ; aujourd'hui, nous allons aborder un petit sous-ensemble d'entre eux (200 positifs, 200 négatifs). +Plutôt que de construire un corpus un document à la fois, nous allons utiliser un corpus préparé de critiques de films positives et négatives, emprunté au [_Natural Language Processing Toolkit_](https://www.nltk.org/). Le corpus des critiques de films de la NLTK compte 2000 critiques, organisées par résultats positifs et négatifs ; aujourd'hui, nous allons aborder un petit sous-ensemble d'entre eux (200 positifs, 200 négatifs). -La construction de corpus est un sous-domaine à part entière. Voir "[_Representativeness in Corpus Design_](https://academic.oup.com/dsh/article-abstract/8/4/243/928942)", _Literary and Linguistic Computing_, 8 (4) : 243-257 et [_Developing Linguistic Corpora : a Guide to Good Practice_](http://www.amazon.com/Developing-Linguistic-Corpora-Practice-Guides/dp/1842172050/ref=sr_1_1_1) pour plus d'informations. +La construction de corpus est un sous-domaine à part entière. Voir "[_Representativeness in Corpus Design_](https://academic.oup.com/dsh/article-abstract/8/4/243/928942)", _Literary and Linguistic Computing_, 8 (4) : 243-257 et [_Developing Linguistic Corpora : a Guide to Good Practice_](https://www.amazon.com/Developing-Linguistic-Corpora-Practice-Guides/dp/1842172050/ref=sr_1_1_1) pour plus d'informations. @@ -279,7 +279,7 @@ Vous pouvez également opter pour l'échange de corpus de référence et de fich Dans _Keyword List_ (Liste des mots-clés), appuyez simplement sur "_Start_" (Démarrer) (sans rien taper dans le champ de recherche). Si vous venez de changer le corpus de référence et les fichiers cibles, il se peut qu'on vous demande de créer une nouvelle liste de mots avant qu'AntConc ne calcule les mots-clés. Nous voyons une liste de mots-clés qui ont des mots qui sont beaucoup plus "inhabituels" - plus statistiquement inattendus - dans le corpus que nous regardons en comparaison avec le corpus de référence. -> Keyness (spécificité) : c'est la fréquence d'un mot dans le texte par rapport à sa fréquence dans un corpus de référence, "telle que la probabilité statistique calculée par une procédure appropriée soit inférieure ou égale à une valeur p indiquée par l'utilisateur" (tiré d'[ici][41]). Pour ceux et celles qui s'intéressent aux détails statistiques, voir la section sur la spécificité (Keyness) à la page 7 du [fichier read me](http://www.laurenceanthony.net/software/antconc/releases/AntConc335/help.pdf) de Laurence Anthony. +> Keyness (spécificité) : c'est la fréquence d'un mot dans le texte par rapport à sa fréquence dans un corpus de référence, "telle que la probabilité statistique calculée par une procédure appropriée soit inférieure ou égale à une valeur p indiquée par l'utilisateur" (tiré d'[ici][41]). Pour ceux et celles qui s'intéressent aux détails statistiques, voir la section sur la spécificité (Keyness) à la page 7 du [fichier read me](https://www.laurenceanthony.net/software/antconc/releases/AntConc335/help.pdf) de Laurence Anthony. @@ -327,29 +327,29 @@ En résumé, il vaut la peine de réfléchir : ## Ressources supplémentaires pour ce tutoriel #### En anglais [Une courte bibliographie sur la linguistique des corpus][43]. -[Une version plus détaillée de ce tutoriel, en supposant que vous n'avez aucune connaissance en informatique.](http://hfroehli.ch/workshops/getting-started-with-antconc/) +[Une version plus détaillée de ce tutoriel, en supposant que vous n'avez aucune connaissance en informatique.](https://hfroehli.ch/workshops/getting-started-with-antconc/) #### En français (notes de la version traduite) -[Page AntConc de EduTech Wiki de l'UNIGE](http://edutechwiki.unige.ch/fr/AntConc#) -[Page AntConc sur le site Exploration de corpus : outils et pratiques](http://explorationdecorpus.corpusecrits.huma-num.fr/antconc/) -[Tutoriel AntConc du CID-ENS Lyon](http://cid.ens-lyon.fr/ac_article.asp?fic=antconc.asp) +[Page AntConc de EduTech Wiki de l'UNIGE](https://edutechwiki.unige.ch/fr/AntConc#) +[Page AntConc sur le site Exploration de corpus : outils et pratiques](https://explorationdecorpus.corpusecrits.huma-num.fr/antconc/) +[Tutoriel AntConc du CID-ENS Lyon](https://cid.ens-lyon.fr/ac_article.asp?fic=antconc.asp) -En France, des outils similaires à AntConc ont été dévéloppés dans le cadre de la textométrie, de la lexicométrie, et de la logométrie, souvent par des historien(ne)s. On peut nommer notamment [Hyperbase](http://ancilla.unice.fr/), [Iramuteq](http://iramuteq.org/), [Lexico](http://www.lexi-co.com/) ou [TXM](http://textometrie.ens-lyon.fr/?lang=fr). Merci de consulter également: Bénédicte Pincemin, ["Sept logiciels de textométrie"](https://halshs.archives-ouvertes.fr/halshs-01843695/document), 2018. +En France, des outils similaires à AntConc ont été dévéloppés dans le cadre de la textométrie, de la lexicométrie, et de la logométrie, souvent par des historien(ne)s. On peut nommer notamment [Hyperbase](https://ancilla.unice.fr/), [Iramuteq](https://iramuteq.org/), [Lexico](https://www.lexi-co.com/) ou [TXM](https://textometrie.ens-lyon.fr/?lang=fr). Merci de consulter également: Bénédicte Pincemin, ["Sept logiciels de textométrie"](https://halshs.archives-ouvertes.fr/halshs-01843695/document), 2018. #### Bibliographie non-exhaustive -Ludovic Lebart et André Salem, [*Statistique textuelle*](http://lexicometrica.univ-paris3.fr/livre/st94/st94-tdm.html), 1994. +Ludovic Lebart et André Salem, [*Statistique textuelle*](https://lexicometrica.univ-paris3.fr/livre/st94/st94-tdm.html), 1994. Damon Mayaffre, ["L’entrelacement lexical des textes. Cooccurrences et lexicométrie"](https://hal.archives-ouvertes.fr/hal-00553808), _Journées de linguistique de corpus_, 2008, p. 91-102. [La cooccurrence, du fait statistique au fait textuel](https://journals.openedition.org/corpus/2183), _Corpus_, 11, 2012, numéro coordonné par Damon Mayaffre et Jean-Marie Viprey. -[41]: http://www.lexically.net/downloads/version6/HTML/index.html?keyness_definition.htm -[43]: http://hfroehlich.wordpress.com/2014/05/11/intro-bibliography-corpus-linguistics/ -[47]: http://hfroehli.ch/workshops/getting-started-with-antconc/ -[48]: http://voyant-tools.org/ +[41]: https://www.lexically.net/downloads/version6/HTML/index.html?keyness_definition.htm +[43]: https://hfroehlich.wordpress.com/2014/05/11/intro-bibliography-corpus-linguistics/ +[47]: https://hfroehli.ch/workshops/getting-started-with-antconc/ +[48]: https://voyant-tools.org/ [50]: /en/lessons/intro-to-beautiful-soup [51]: /en/lessons/automated-downloading-with-wget -[52]: http://www.antlab.sci.waseda.ac.jp/ -[53]: http://notepad-plus-plus.org/ -[54]: http://www.barebones.com/products/textwrangler/ -[55]: http://www.wordfrequency.info/free.asp -[56]: http://hfroehli.ch/2014/05/11/intro-bibliography-corpus-linguistics/ +[52]: https://www.antlab.sci.waseda.ac.jp/ +[53]: https://notepad-plus-plus.org/ +[54]: https://www.barebones.com/products/textwrangler/ +[55]: https://www.wordfrequency.info/free.asp +[56]: https://hfroehli.ch/2014/05/11/intro-bibliography-corpus-linguistics/ diff --git a/fr/lecons/analyse-de-documents-avec-tfidf.md b/fr/lecons/analyse-de-documents-avec-tfidf.md index 3f0f196297..67355b4fba 100644 --- a/fr/lecons/analyse-de-documents-avec-tfidf.md +++ b/fr/lecons/analyse-de-documents-avec-tfidf.md @@ -42,7 +42,7 @@ En étudiant **tf-idf**, vous découvrirez une méthode d'analyse textuelle que ## Connaissances préalables recommandées -- Être familiarisé(e) avec Python ou un langage de programmation similaire. Le code de cette leçon a été programmé en Python 3.6, mais vous pouvez exécuter **tf-idf** dans toutes les versions courantes de Python, en utilisant l'un des divers modules appropriés, ainsi que dans plusieurs autres langages de programmation. Le niveau de compétence en programmation requis est difficile à évaluer, mais vous devrez au moins être à l'aise avec les types de données et les opérations élémentaires. Pour tirer profit de cette leçon, il serait aussi souhaitable de suivre un cours comme celui proposé par Antoine Rozo sur [zestedesavoir.com](https://perma.cc/7WJ4-WD3P) ou d'avoir suivi certaines des [leçons d'introduction à la programmation en Python](/fr/lecons/introduction-et-installation) du _Programming Historian_. Si vous avez accès à une bibliothèque, n'hésitez pas à consulter le livre d'Émilien Schultz et de Matthias Bussonnier [*Python pour les sciences humaines et sociales*](http://www.worldcat.org/oclc/1232233436). +- Être familiarisé(e) avec Python ou un langage de programmation similaire. Le code de cette leçon a été programmé en Python 3.6, mais vous pouvez exécuter **tf-idf** dans toutes les versions courantes de Python, en utilisant l'un des divers modules appropriés, ainsi que dans plusieurs autres langages de programmation. Le niveau de compétence en programmation requis est difficile à évaluer, mais vous devrez au moins être à l'aise avec les types de données et les opérations élémentaires. Pour tirer profit de cette leçon, il serait aussi souhaitable de suivre un cours comme celui proposé par Antoine Rozo sur [zestedesavoir.com](https://perma.cc/7WJ4-WD3P) ou d'avoir suivi certaines des [leçons d'introduction à la programmation en Python](/fr/lecons/introduction-et-installation) du _Programming Historian_. Si vous avez accès à une bibliothèque, n'hésitez pas à consulter le livre d'Émilien Schultz et de Matthias Bussonnier [*Python pour les sciences humaines et sociales*](https://www.worldcat.org/oclc/1232233436). - À défaut de pouvoir suivre la recommandation précédente, vous pourriez [réviser les bases de Python](https://perma.cc/YDT4-9JJ6), dont les types de données élémentaires (chaînes de caractères, nombres entiers, nombres réels, tuples, listes et dictionnaires), les variables, les boucles, les classes d'objets et leurs instances. - La maîtrise des bases d'Excel ou d'un autre tableur pourrait être utile si vous souhaitez examiner les feuilles de calcul au format CSV liées à cette leçon de plus près. Vous pouvez aussi employer le module Pandas du langage Python pour lire ces fichiers CSV. @@ -438,19 +438,19 @@ Le résumé automatique est une autre manière d'explorer un corpus. Rada Mihalc - C.D. Manning, P. Raghavan et H. Schütze, _Introduction to Information Retrieval_. Cambridge: Cambridge University Press, 2008. -- Rada Mihalcea et Paul Tarau. « Textrank: Bringing order into text », _Proceedings of the 2004 Conference on Empirical Methods in Natural Language Processing_, Barcelone, Espagne, 2004. [http://www.aclweb.org/anthology/W04-3252](https://perma.cc/SMV5-7MYY) +- Rada Mihalcea et Paul Tarau. « Textrank: Bringing order into text », _Proceedings of the 2004 Conference on Empirical Methods in Natural Language Processing_, Barcelone, Espagne, 2004. [https://www.aclweb.org/anthology/W04-3252](https://perma.cc/SMV5-7MYY) - « Nellie Bly, Journalist, Dies of Pneumonia », [_The New York Times_, 28 janvier 1922](https://perma.cc/LA5B-65HL). - G. Salton et M.J. McGill, _Introduction to Modern Information Retrieval_. New York: McGraw-Hill, 1983. -- Ben Schmidt, « Do Digital Humanists Need to Understand Algorithms? », _Debates in the Digital Humanities 2016_. Édition en ligne. Minneapois: University of Minnesota Press. [http://dhdebates.gc.cuny.edu/debates/text/99](https://perma.cc/95WD-SDM5). +- Ben Schmidt, « Do Digital Humanists Need to Understand Algorithms? », _Debates in the Digital Humanities 2016_. Édition en ligne. Minneapois: University of Minnesota Press. [https://dhdebates.gc.cuny.edu/debates/text/99](https://perma.cc/95WD-SDM5). -- Ben Schmidt, « Words Alone: Dismantling Topic Models in the Humanities », _Journal of Digital Humanities_. Vol. 2, No. 1 (2012): n.p. [http://journalofdigitalhumanities.org/2-1/words-alone-by-benjamin-m-schmidt/](https://perma.cc/LT4N-X4MZ). +- Ben Schmidt, « Words Alone: Dismantling Topic Models in the Humanities », _Journal of Digital Humanities_. Vol. 2, No. 1 (2012): n.p. [https://journalofdigitalhumanities.org/2-1/words-alone-by-benjamin-m-schmidt/](https://perma.cc/LT4N-X4MZ). - Karen Spärck Jones, « A Statistical Interpretation of Term Specificity and Its Application in Retrieval. », _Journal of Documentation_ 28, no. 1 (1972): 11–21. -- Jonathan Stray et Julian Burgess. « A Full-text Visualization of the Iraq War Logs », 10 décembre 2010 (dernière mise à jour en avril 2012), [http://jonathanstray.com/a-full-text-visualization-of-the-iraq-war-logs](https://perma.cc/QBZ4-DKTE). +- Jonathan Stray et Julian Burgess. « A Full-text Visualization of the Iraq War Logs », 10 décembre 2010 (dernière mise à jour en avril 2012), [https://jonathanstray.com/a-full-text-visualization-of-the-iraq-war-logs](https://perma.cc/QBZ4-DKTE). - Ted Underwood, « Identifying diction that characterizes an author or genre: why Dunning's may not be the best method », _The Stone and the Shell_, 9 novembre 2011, [https://tedunderwood.com/2011/11/09/identifying-the-terms-that-characterize-an-author-or-genre-why-dunnings-may-not-be-the-best-method/](https://perma.cc/SY25-UXK3). @@ -470,7 +470,7 @@ Si vous n'utilisez pas Anaconda, il faudra vous assurer de disposer des outils p 1. Une installation de Python 3 (préférablement Python 3.6 ou une version plus récente) 2. Idéalement, un environnement virtuel dans lequel installer et exécuter le Python -3. Le module Scikit-Learn et ses dépendances (voir [http://scikit-learn.org/stable/install.html](http://scikit-learn.org/stable/install.html)) +3. Le module Scikit-Learn et ses dépendances (voir [https://scikit-learn.org/stable/install.html](https://scikit-learn.org/stable/install.html)) 4. Jupyter Notebook et ses dépendances # Notes @@ -487,13 +487,13 @@ Si vous n'utilisez pas Anaconda, il faudra vous assurer de disposer des outils p [^6]: Scikit-Learn Developers, « TfidfVectorizer » (en anglais), consulté le 9 juin 2022, [https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.TfidfVectorizer.html](https://perma.cc/JUN8-39Z6). -[^7]: Ben Schmidt, « Do Digital Humanists Need to Understand Algorithms? », _Debates in the Digital Humanities 2016_. Édition en ligne. Minneapolis: University of Minnesota Press. [http://dhdebates.gc.cuny.edu/debates/text/99](https://perma.cc/95WD-SDM5). +[^7]: Ben Schmidt, « Do Digital Humanists Need to Understand Algorithms? », _Debates in the Digital Humanities 2016_. Édition en ligne. Minneapolis: University of Minnesota Press. [https://dhdebates.gc.cuny.edu/debates/text/99](https://perma.cc/95WD-SDM5). [^8]: Guido van Rossum, Barry Warsaw et Nick Coghlan. « PEP 8 - Style Guide for Python Code », 5 juillet 2001 (mise à jour août 2013), [https://www.python.org/dev/peps/pep-0008/](https://perma.cc/P2ZM-VPQM). [^9]: « Ida M. Tarbell, 86, Dies in Bridgeport », [_The New York Times_, 17 janvier 1944](https://perma.cc/NBV6-S2XM); « W. E. B. DuBois Dies in Ghana; Negro Leader and Author, 95 », [_The New York Times_, 28 août 1963](https://perma.cc/W5NX-XZRV); Alden Whitman, « Upton Sinclair, Author, Dead; Crusader for Social Justice, 90 », [_The New York Times_, 26 novembre 1968](https://perma.cc/E4N7-2KD6); « Willa Cather Dies; Noted Novelist, 70 », [_The New York Times_, 25 avril 1947](https://perma.cc/2L7H-WGKN). -[^10]: Jonathan Stray et Julian Burgess. « A Full-text Visualization of the Iraq War Logs », 10 décembre 2010 (dernière mise à jour en avril 2012), [http://jonathanstray.com/a-full-text-visualization-of-the-iraq-war-logs](https://perma.cc/QBZ4-DKTE). +[^10]: Jonathan Stray et Julian Burgess. « A Full-text Visualization of the Iraq War Logs », 10 décembre 2010 (dernière mise à jour en avril 2012), [https://jonathanstray.com/a-full-text-visualization-of-the-iraq-war-logs](https://perma.cc/QBZ4-DKTE). [^11]: C.D. Manning, P. Raghavan et H. Schütze, _Introduction to Information Retrieval_ (Cambridge: Cambridge University Press, 2008), 118-120. @@ -503,6 +503,6 @@ Si vous n'utilisez pas Anaconda, il faudra vous assurer de disposer des outils p [^14]: Il n'est habituellement pas recommandé d'appliquer **tf-idf** comme prétraitement avant de produire un modèle thématique. Voir : [https://datascience.stackexchange.com/questions/21950/why-we-should-not-feed-lda-with-tfidf](https://perma.cc/N5W9-TYX7). -[^15]: Ben Schmidt, « Words Alone: Dismantling Topic Models in the Humanities », _Journal of Digital Humanities_. Vol. 2, No. 1 (2012): n.p., [http://journalofdigitalhumanities.org/2-1/words-alone-by-benjamin-m-schmidt/](https://perma.cc/LT4N-X4MZ). +[^15]: Ben Schmidt, « Words Alone: Dismantling Topic Models in the Humanities », _Journal of Digital Humanities_. Vol. 2, No. 1 (2012): n.p., [https://journalofdigitalhumanities.org/2-1/words-alone-by-benjamin-m-schmidt/](https://perma.cc/LT4N-X4MZ). -[^16]: Rada Mihalcea et Paul Tarau. « Textrank: Bringing order into text », _Proceedings of the 2004 Conference on Empirical Methods in Natural Language Processing_, Barcelone, Espagne, 2004, [http://www.aclweb.org/anthology/W04-3252](https://perma.cc/SMV5-7MYY). +[^16]: Rada Mihalcea et Paul Tarau. « Textrank: Bringing order into text », _Proceedings of the 2004 Conference on Empirical Methods in Natural Language Processing_, Barcelone, Espagne, 2004, [https://www.aclweb.org/anthology/W04-3252](https://perma.cc/SMV5-7MYY). diff --git a/fr/lecons/analyse-donnees-tabulaires-R.md b/fr/lecons/analyse-donnees-tabulaires-R.md index 4a59c7fb0d..16eb0a0a05 100644 --- a/fr/lecons/analyse-donnees-tabulaires-R.md +++ b/fr/lecons/analyse-donnees-tabulaires-R.md @@ -578,7 +578,7 @@ Pour en savoir plus sur R, consultez le [manuel de R](https://cran.r-project.org Vous trouverez en ligne de nombreux tutoriels sur R. Nous vous conseillons : -* [R: A self-learn tutorial](http://web.archive.org/web/20191015004305/https://www.nceas.ucsb.edu/files/scicomp/Dloads/RProgramming/BestFirstRTutorial.pdf) (en anglais). Ce tutoriel passe en revue plusieurs fonctions et propose des exercices pour s’entrainer. +* [R: A self-learn tutorial](https://web.archive.org/web/20191015004305/https://www.nceas.ucsb.edu/files/scicomp/Dloads/RProgramming/BestFirstRTutorial.pdf) (en anglais). Ce tutoriel passe en revue plusieurs fonctions et propose des exercices pour s’entrainer. * [Introduction à R](https://www.datacamp.com/courses/introduction-a-r). Cours proposé par le site Datacamp qui vous permet de vous entrainer en ligne (gratuit, mais il faut s’inscrire pour y accéder). Les exercices interactifs permettent d’identifier vos erreurs et d’apprendre à écrire du code plus efficacement. * [R pour les débutants](https://r.developpez.com/tutoriels/r/debutants/#Lno-I). Écrit par Emmanuel Paradis, il s’agit d’un des premiers manuels francophones d’introduction à R. * L’ouvrage [Computational Historical Thinking](https://dh-r.lincolnmullen.com/). Écrit par Lincoln A. Mullen, c’est une ressource précieuse pour les historiennes et historiens qui souhaitent utiliser R pour faire leurs travaux de recherche. diff --git a/fr/lecons/analyse-reseau-python.md b/fr/lecons/analyse-reseau-python.md index eb5b65aa2a..d6ea90160b 100644 --- a/fr/lecons/analyse-reseau-python.md +++ b/fr/lecons/analyse-reseau-python.md @@ -73,7 +73,7 @@ Cette leçon peut vous aider à répondre à des questions telles que : Bien avant qu'il n'y ait des ami·es sur Facebook, il y avait la «  Société des Amis  », connue sous le nom de quakers. Fondés en Angleterre au milieu du XVIIe siècle, les quakers étaient des chrétien·nes protestantes qui s'opposaient à l'Église officielle d'Angleterre et prônaient une large tolérance religieuse, préférant la « lumière intérieure » et la conscience des chrétien·nes à l'orthodoxie imposée par l'État. Le nombre de quakers a augmenté rapidement entre le milieu et la fin du XVIIe siècle et leurs membres se sont répandus dans les iles britanniques, en Europe et dans les colonies du Nouveau Monde - en particulier en Pennsylvanie, colonie fondée par le leader quaker William Penn et où vivent les quatre auteurs et autrices de cette leçon. -Les universitaires ayant depuis longtemps lié la croissance des effectifs et la pérennité des quakers à l'efficacité de leurs réseaux, les données utilisées dans cette leçon sont une liste de noms et de relations parmi les premiers quakers du XVIIe siècle. Ce jeu de données est issu du *[Oxford Dictionary of National Biography](http://www.oxforddnb.com)* et du projet *[Six Degrees of Francis Bacon](https://perma.cc/Q63S-UZTU)* qui reconstruit les réseaux sociaux du début de la Grande-Bretagne moderne (1500-1700). +Les universitaires ayant depuis longtemps lié la croissance des effectifs et la pérennité des quakers à l'efficacité de leurs réseaux, les données utilisées dans cette leçon sont une liste de noms et de relations parmi les premiers quakers du XVIIe siècle. Ce jeu de données est issu du *[Oxford Dictionary of National Biography](https://www.oxforddnb.com)* et du projet *[Six Degrees of Francis Bacon](https://perma.cc/Q63S-UZTU)* qui reconstruit les réseaux sociaux du début de la Grande-Bretagne moderne (1500-1700). ## Préparation des données et installation de NetworkX @@ -535,7 +535,7 @@ Travailler avec NetworkX permet d'en apprendre beaucoup sur les classes de modul ## Exporter les données -NetworkX prend en charge un très grand nombre de formats de fichiers pour [exporter les données](https://perma.cc/Z7H3-UMKD). Si vous voulez exporter une liste de liens en format texte à charger dans Palladio, il existe un [outil adapté](https://perma.cc/DWK2-J389). Fréquemment, dans le projet *Six Degrees of Francis Bacon*, nous exportons les données NetworkX en [format JSON d3](https://perma.cc/2STT-F466) pour les visualiser dans un navigateur. Vous pouvez aussi [exporter](https://perma.cc/7UCP-YBX4) votre graphe en tant que [tableau de données Pandas](http://pandas.pydata.org/) si vous souhaitez effectuer des manipulations statistiques plus avancées. Il existe de nombreuses options et, si vous avez ajouté toutes vos mesures dans votre objet `Graph` en tant qu’attributs, toutes vos données seront exportées simultanément. +NetworkX prend en charge un très grand nombre de formats de fichiers pour [exporter les données](https://perma.cc/Z7H3-UMKD). Si vous voulez exporter une liste de liens en format texte à charger dans Palladio, il existe un [outil adapté](https://perma.cc/DWK2-J389). Fréquemment, dans le projet *Six Degrees of Francis Bacon*, nous exportons les données NetworkX en [format JSON d3](https://perma.cc/2STT-F466) pour les visualiser dans un navigateur. Vous pouvez aussi [exporter](https://perma.cc/7UCP-YBX4) votre graphe en tant que [tableau de données Pandas](https://pandas.pydata.org/) si vous souhaitez effectuer des manipulations statistiques plus avancées. Il existe de nombreuses options et, si vous avez ajouté toutes vos mesures dans votre objet `Graph` en tant qu’attributs, toutes vos données seront exportées simultanément. La plupart des options d’exportation fonctionnent à peu près de la même manière. Dans cette leçon, vous apprendrez comment exporter vos données au format GEXF de Gephi. Une fois le fichier exporté, vous pouvez le charger [directement dans Gephi](https://perma.cc/46UZ-F6PU) pour le visualiser. diff --git a/fr/lecons/calibration-radiocarbone-avec-r.md b/fr/lecons/calibration-radiocarbone-avec-r.md index 3b5f9d230b..0a0ab87f3f 100644 --- a/fr/lecons/calibration-radiocarbone-avec-r.md +++ b/fr/lecons/calibration-radiocarbone-avec-r.md @@ -127,7 +127,7 @@ On comprend ainsi que ces particularités, si elles sont mal comprises, peuvent ## Applications avec R -De nombreux outils sont aujourd'hui disponibles pour calibrer des âges radiocarbone. [OxCal](https://c14.arch.ox.ac.uk/oxcal/), [CALIB](http://calib.org) et [ChronoModel](https://chronomodel.com) offrent cette possibilité, mais sont plutôt destinés à traiter des problèmes de [modélisation bayésienne](https://fr.wikipedia.org/wiki/Statistique_bay%C3%A9sienne) de séquences chronologiques. Le langage R offre une alternative intéressante. Distribué sous licence libre, il favorise la reproductibilité et permet d'intégrer le traitement d'âges radiocarbone à des études plus larges (analyse spatiale etc.). +De nombreux outils sont aujourd'hui disponibles pour calibrer des âges radiocarbone. [OxCal](https://c14.arch.ox.ac.uk/oxcal/), [CALIB](https://calib.org) et [ChronoModel](https://chronomodel.com) offrent cette possibilité, mais sont plutôt destinés à traiter des problèmes de [modélisation bayésienne](https://fr.wikipedia.org/wiki/Statistique_bay%C3%A9sienne) de séquences chronologiques. Le langage R offre une alternative intéressante. Distribué sous licence libre, il favorise la reproductibilité et permet d'intégrer le traitement d'âges radiocarbone à des études plus larges (analyse spatiale etc.). Plusieurs packages R permettent de réaliser des calibrations d'âges radiocarbone ([Bchron](https://cran.r-project.org/package=Bchron), [oxcAAR](https://cran.r-project.org/package=oxcAAR)...) et sont souvent orientés vers la modélisation (construction de chronologies, modèles âges-profondeur, etc.). La solution retenue ici est [rcarbon](https://cran.r-project.org/package=rcarbon) (Bevan et Crema 2020). Ce package permet de calibrer simplement et d'analyser des âges radiocarbone. @@ -501,7 +501,7 @@ Dean, J. S. "Independent Dating in Archaeological Analysis". In *Advances in Arc Hyndman, R. J. 1996. "Computing and Graphing Highest Density Regions." *The American Statistician* 50 (2): 120-26. https://doi.org/10.2307/2684423. -Libby, W. F. "Radiocarbon Dating". *Nobel Lecture*. Stockholm, 12 décembre 1960. http://www.nobelprize.org/nobel_prizes/chemistry/laureates/1960/libby-lecture.html. +Libby, W. F. "Radiocarbon Dating". *Nobel Lecture*. Stockholm, 12 décembre 1960. https://www.nobelprize.org/nobel_prizes/chemistry/laureates/1960/libby-lecture.html. Millard, A. R. 2014. "Conventions for Reporting Radiocarbon Determinations." *Radiocarbon* 56 (2): 555-59. https://doi.org/10.2458/56.17455. diff --git a/fr/lecons/comprendre-les-expressions-regulieres.md b/fr/lecons/comprendre-les-expressions-regulieres.md index fb20b2367a..ea6f02fdd6 100644 --- a/fr/lecons/comprendre-les-expressions-regulieres.md +++ b/fr/lecons/comprendre-les-expressions-regulieres.md @@ -78,7 +78,7 @@ n'importe quelle année entre 1850 et 1899. Pour cet exercice, nous utilisons LibreOffice Writer et LibreOffice Calc, des logiciels de bureautique libres, utilisés respectivement pour le traitement de texte et les feuilles de calcul. Les paquets d'installation pour Linux, Mac ou -Windows peuvent être téléchargés depuis . +Windows peuvent être téléchargés depuis . D'autre logiciels de traitement de texte et même des langages de programmation ont des fonctionalités similaires de recherche de motifs. Comme sa distribution est libre et comme sa syntaxe pour les expressions régulières est proche de ce @@ -110,7 +110,7 @@ différents types de ressources textuelles utilisées pour toutes sortes de recherche en histoire. Pour notre exercice, nous allons utiliser un rapport de cinq pages contenant des statistiques mensuelles sur la morbidité et la mortalité dans les États et les villes des États-Unis, publié en février 1908. -Il est disponible ici : . +Il est disponible ici : . Prenez un moment pour parcourir brièvement les pages du document grâce au [lien pour lire en ligne][], afin de vous familiariser avec lui. Ce document @@ -725,10 +725,10 @@ Pittsburgh, a quant à lui de bons cas de figures sur la manière de travailler [expressions régulières et des outils pour XML][], dans le but de baliser des fichiers de texte brut pour en faire des fichiers XML. - [lien pour lire en ligne]: http://archive.org/stream/jstor-4560629/4560629#page/n0/mode/2up - [Texte intégral]: http://archive.org/stream/jstor-4560629/4560629_djvu.txt + [lien pour lire en ligne]: https://archive.org/stream/jstor-4560629/4560629#page/n0/mode/2up + [Texte intégral]: https://archive.org/stream/jstor-4560629/4560629_djvu.txt [Liste des expressions régulières]: https://help.libreoffice.org/6.3/fr/text/shared/01/02100001.html?DbPAR=SHARED#bm_id3146765 [expressions régulières]: https://fr.wikipedia.org/wiki/Expression_r%C3%A9guli%C3%A8re - [Rubular]: http://rubular.com/ - [expressions régulières et des outils pour XML]: http://dh.obdurodon.org/regex.html + [Rubular]: https://rubular.com/ + [expressions régulières et des outils pour XML]: https://dh.obdurodon.org/regex.html diff --git a/fr/lecons/comprendre-les-pages-web.md b/fr/lecons/comprendre-les-pages-web.md index d4038e5a62..d8549641c4 100644 --- a/fr/lecons/comprendre-les-pages-web.md +++ b/fr/lecons/comprendre-les-pages-web.md @@ -137,5 +137,5 @@ et l'éditeur de texte qui ne l'interprète pas. - [tutoriels W3 Schools HTML][] - [tutoriels W3 Schools HTML5][] - [tutoriels W3 Schools HTML]: http://www.w3schools.com/html/default.asp - [tutoriels W3 Schools HTML5]: http://www.w3schools.com/html/html5_intro.asp + [tutoriels W3 Schools HTML]: https://www.w3schools.com/html/default.asp + [tutoriels W3 Schools HTML5]: https://www.w3schools.com/html/html5_intro.asp diff --git a/fr/lecons/concevoir-base-donnees-nodegoat.md b/fr/lecons/concevoir-base-donnees-nodegoat.md index 453c6090ba..c88f50a21b 100644 --- a/fr/lecons/concevoir-base-donnees-nodegoat.md +++ b/fr/lecons/concevoir-base-donnees-nodegoat.md @@ -282,6 +282,6 @@ Pour approfondir votre utilisation de nodegoat et explorer tout son potentiel, n [^6]: Voir cette notice de Wikipédia pour plus d’éléments sur la notion de « cardinalité » : « Modèle relationnel », [https://fr.wikipedia.org/wiki/Mod%C3%A8le_relationnel#Relation_1:N](https://perma.cc/KSA4-Y4WL). Voir aussi Gardarin, ouvrage cité, 412-413. -[^7]: Bree, P. van, Kessels, G., (2013). nodegoat: a web-based data management, network analysis & visualisation environment, http://nodegoat.net from [LAB1100](https://perma.cc/LAT9-M6UW) +[^7]: Bree, P. van, Kessels, G., (2013). nodegoat: a web-based data management, network analysis & visualisation environment, https://nodegoat.net from [LAB1100](https://perma.cc/LAT9-M6UW) [^8]: Les créateurs de nodegoat décrivent l’approche relationnelle du logiciel comme « orienté-objet ». Ce concept étant le plus souvent utilisé pour décrire un paradigme de programmation informatique, nous préférons éviter l’emploi de ce terme afin d’éviter des confusions. diff --git a/fr/lecons/debuter-avec-markdown.md b/fr/lecons/debuter-avec-markdown.md index f3a7d18384..50b9a518b5 100644 --- a/fr/lecons/debuter-avec-markdown.md +++ b/fr/lecons/debuter-avec-markdown.md @@ -42,11 +42,11 @@ Cette leçon sert d’initiation à Markdown, qui est une syntaxe en texte brut Puisque les tutoriels de ce site sont soumis sous forme de fichiers Markdown, je mobilise des exemples maison chaque fois que cela est possible. J'espère que ce guide vous sera particulièrement utile si vous envisagez de rédiger un tutoriel en tant qu'auteur(e) pour le *Programming Historian*, même s'il reste d'une portée plus générale. ## Qu'est-ce que le Markdown? -Développé en 2004 par [John Gruber](http://daringfireball.net/projects/markdown/ "Markdown on Daring Fireball"), Markdown est à la fois un langage de balisage de fichiers textes et une fonctionnalité du langage [Perl](https://fr.wikipedia.org/wiki/Perl_(langage)) permettant de convertir des fichiers Markdown en HTML. Notre leçon traite davantage du premier aspect, puisque nous apprendrons à utiliser la syntaxe Markdown pour préparer des fichiers. +Développé en 2004 par [John Gruber](https://daringfireball.net/projects/markdown/ "Markdown on Daring Fireball"), Markdown est à la fois un langage de balisage de fichiers textes et une fonctionnalité du langage [Perl](https://fr.wikipedia.org/wiki/Perl_(langage)) permettant de convertir des fichiers Markdown en HTML. Notre leçon traite davantage du premier aspect, puisque nous apprendrons à utiliser la syntaxe Markdown pour préparer des fichiers. Les fichiers texte brut présentent plusieurs avantages comparés aux autres formats. Non seulement ils sont compatibles avec tout type d'appareil et de système d'exploitation, mais ils s'avèrent aussi plus pérennes. Si jamais vous avez tenté d'ouvrir un document sauvegardé dans une version antérieure d'un logiciel de traitement de texte, vous pouvez comprendre facilement les problèmes de compatibilité qui sont en jeu. -L'utilisation de la syntaxe Markdown vous permettra de produire des fichiers à la fois lisibles en texte brut et prêts à recevoir davantage de traitement sur une autre plateforme. Plusieurs systèmes de gestion de blogs, des générateurs de sites web statiques ou encore des plateformes comme [GitHub](http://github.com "GitHub") prennent en charge des fichiers Markdown pour les convertir en [HTML](https://fr.wikipedia.org/wiki/Hypertext_Markup_Language) et les publier sur le web. De plus, des outils comme Pandoc peuvent convertir des fichiers depuis et vers Markdown. Pour apprendre plus sur Pandoc, vous pouvez faire un tour sur [cette leçon](/fr/lecons/redaction-durable-avec-pandoc-et-markdown) de Dennis Tenen et Grant Wythoff. +L'utilisation de la syntaxe Markdown vous permettra de produire des fichiers à la fois lisibles en texte brut et prêts à recevoir davantage de traitement sur une autre plateforme. Plusieurs systèmes de gestion de blogs, des générateurs de sites web statiques ou encore des plateformes comme [GitHub](https://github.com "GitHub") prennent en charge des fichiers Markdown pour les convertir en [HTML](https://fr.wikipedia.org/wiki/Hypertext_Markup_Language) et les publier sur le web. De plus, des outils comme Pandoc peuvent convertir des fichiers depuis et vers Markdown. Pour apprendre plus sur Pandoc, vous pouvez faire un tour sur [cette leçon](/fr/lecons/redaction-durable-avec-pandoc-et-markdown) de Dennis Tenen et Grant Wythoff. ## La syntaxe Markdown Les fichiers Markdown portent l'extension `.md`. Il est possible de les ouvrir avec un éditeur de texte comme TextEdit, Notepad++, Sublime Text ou Vim. Plusieurs sites web et des plateformes de publication proposent des éditeurs de texte en ligne et/ou des extensions pour insérer du texte avec la syntaxe Markdown. diff --git a/fr/lecons/decomptes-de-frequences-de-mots-en-python.md b/fr/lecons/decomptes-de-frequences-de-mots-en-python.md index 4c5294782c..ab1622d847 100644 --- a/fr/lecons/decomptes-de-frequences-de-mots-en-python.md +++ b/fr/lecons/decomptes-de-frequences-de-mots-en-python.md @@ -362,7 +362,7 @@ Lutz, _Learning Python_ Pilgrim, _Diving into Python_ -- Ch. 7: [Regular Expressions](https://web.archive.org/web/20180416143856/http://www.diveintopython.net/regular_expressions/index.html) +- Ch. 7: [Regular Expressions](https://web.archive.org/web/20180416143856/https://www.diveintopython.net/regular_expressions/index.html) ### Synchronisation du code diff --git a/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.md b/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.md index 3e6d5f0ae3..8448f0c11b 100644 --- a/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.md +++ b/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.md @@ -48,7 +48,7 @@ La liste ci-dessous présente une partie des outils qui permettent de détecter - [Basic Local Alignment Search Tool (BLAST)](https://blast.ncbi.nlm.nih.gov/Blast.cgi) - [Tesserae](https://github.com/tesserae/tesserae) (PHP, Perl) - [TextPAIR (Pairwise Alignment for Intertextual Relations)](https://github.com/ARTFL-Project/text-pair) -- [Passim](https://github.com/dasmiq/passim) (Scala) développé par [David Smith](http://www.ccs.neu.edu/home/dasmith/ +- [Passim](https://github.com/dasmiq/passim) (Scala) développé par [David Smith](https://www.ccs.neu.edu/home/dasmith/ ) (Université Northeastern) Pour ce tutoriel, nous avons choisi de nous concentrer sur la bibliothèque Passim et cela pour trois raisons principales. Premièrement, car celle-ci peut être adaptée à une grande variété d'utilisation, puisqu'elle fonctionne autant sur une petite collection de texte que sur un corpus de grande échelle. Deuxièmement, parce que, bien que la documentation au sujet de Passim soit exhaustive, du fait que ses utilisateurs soient relativement avancés, un guide « pas-à-pas » de la détection de la réutilisation de texte avec Passim plus axé sur l'utilisateur serait bénéfique pour l'ensemble de la communauté. Enfin, les exemples suivants illustrent la variété de scénarios dans lesquels la réutilisation de texte est une méthodologie utile : @@ -212,7 +212,7 @@ export PATH="/home/simon/Passim/bin:$PATH" ### Installation de Spark -1. Accédez à la [section de téléchargement](http://spark.apache.org/downloads) du site Web de Spark et sélectionnez la version publiée de Spark 3.x.x (où *x.x* indique les éditions de la version 3.) ainsi que le type de paquetage « Pre-built for Apache Hadoop 2.7 » dans les menus déroulants. +1. Accédez à la [section de téléchargement](https://spark.apache.org/downloads) du site Web de Spark et sélectionnez la version publiée de Spark 3.x.x (où *x.x* indique les éditions de la version 3.) ainsi que le type de paquetage « Pre-built for Apache Hadoop 2.7 » dans les menus déroulants. 2. Extrayez les données binaires compressées dans le répertoire de votre choix (par exemple `/Applications`) : ```bash @@ -413,7 +413,7 @@ En fin de compte, ce qui compose un document et comment ces documents devraient ## Format JSON de base -Le format d'entrée de Passim consiste en des documents JSON qui sont au format [JSON lines](http://jsonlines.org/) (c'est-à-dire que chaque ligne de texte contient un seul document JSON). +Le format d'entrée de Passim consiste en des documents JSON qui sont au format [JSON lines](https://jsonlines.org/) (c'est-à-dire que chaque ligne de texte contient un seul document JSON). Le contenu suivant d'un fichier nommé `test.json` illustre le format minimal d'entrée requis pour Passim : @@ -715,7 +715,7 @@ Vous êtes maintenant prêt(e)s à vous lancer dans votre premier projet de réu Pour l'instant, ne vous souciez pas des arguments supplémentaires `SPARK_SUBMIT_ARGS='--master local[12] --driver-memory 8G --executor-memory 4G'`; dans la section [Etude de Cas 2](#etude-de-cas-2--réutilisation-de-textes-dans-un-grand-corpus-de-journaux-historiques) nous les expliquerons en détail. -Ce cas de test prend approximativement huit minutes sur un ordinateur portable récent avec huit threads. Vous pouvez également suivre la progression de la détection sur http://localhost:4040 — un tableau de bord interactif créé par Spark. Notez que le tableau de bord se fermera dès que Passim aura terminé son exécution. +Ce cas de test prend approximativement huit minutes sur un ordinateur portable récent avec huit threads. Vous pouvez également suivre la progression de la détection sur https://localhost:4040 — un tableau de bord interactif créé par Spark. Notez que le tableau de bord se fermera dès que Passim aura terminé son exécution. ## Etude de cas 2 : Réutilisation de textes dans un grand corpus de journaux historiques @@ -903,15 +903,15 @@ Matteo Romanello remercie le Fonds national suisse de la recherche scientifique # Bibliographie -1. Franzini, Greta, Maria Moritz, Marco Büchler et Marco Passarotti. « Using and evaluating TRACER for an Index fontium computatus of the Summa contra Gentiles of Thomas Aquinas ». *Proceedings of the Fifth Italian Conference on Computational Linguistics (CLiC-it 2018)* (2018). [Lien](http://ceur-ws.org/Vol-2253/paper22.pdf) -2. Smith, David A., Ryan Cordell et Abby Mullen. « Computational Methods for Uncovering Reprinted Texts in Antebellum Newspapers ». *American Literary History* **27** (2015). [Lien](http://dx.doi.org/10.1093/alh/ajv029) -3. Cordell, Ryan. « Reprinting Circulation, and the Network Author in Antebellum Newspapers ». *American Literary History* **27** (2015): 417–445. [Lien](http://dx.doi.org/10.1093/alh/ajv028) -4. Vogler, Daniel, Linards Udris et Mark Eisenegger. « Measuring Media Content Concentration at a Large Scale Using Automated Text Comparisons ». *Journalism Studies* 21, no.11 (2020). [Lien](http://dx.doi.org/10.1080/1461670x.2020.1761865) +1. Franzini, Greta, Maria Moritz, Marco Büchler et Marco Passarotti. « Using and evaluating TRACER for an Index fontium computatus of the Summa contra Gentiles of Thomas Aquinas ». *Proceedings of the Fifth Italian Conference on Computational Linguistics (CLiC-it 2018)* (2018). [Lien](https://ceur-ws.org/Vol-2253/paper22.pdf) +2. Smith, David A., Ryan Cordell et Abby Mullen. « Computational Methods for Uncovering Reprinted Texts in Antebellum Newspapers ». *American Literary History* **27** (2015). [Lien](https://dx.doi.org/10.1093/alh/ajv029) +3. Cordell, Ryan. « Reprinting Circulation, and the Network Author in Antebellum Newspapers ». *American Literary History* **27** (2015): 417–445. [Lien](https://dx.doi.org/10.1093/alh/ajv028) +4. Vogler, Daniel, Linards Udris et Mark Eisenegger. « Measuring Media Content Concentration at a Large Scale Using Automated Text Comparisons ». *Journalism Studies* 21, no.11 (2020). [Lien](https://dx.doi.org/10.1080/1461670x.2020.1761865) 5. Mullen, Lincoln. *textreuse: Detect Text Reuse and Document Similarity*. Github. (2016). [Lien](https://github.com/ropensci/textreuse) -6. Büchler, Marco, Philip R. Burns, Martin Müller, Emily Franzini et Greta Franzini. « Towards a Historical Text Re-use Detection ». In *Text Mining: From Ontology Learning to Automated Text Processing Applications* dir. Chris Biemann et Alexander Mehler, 221–238. Springer International Publishing, 2014. [Lien](http://dx.doi.org/10.1007/978-3-319-12655-5_11) -7. Vierthaler, Paul et Meet Gelein. « A BLAST-based, Language-agnostic Text Reuse Algorithm with a MARKUS Implementation and Sequence Alignment Optimized for Large Chinese Corpora ». *Journal of Cultural Analytics* 4, vol.2 (2019). [Lien](http://dx.doi.org/10.22148/16.034) +6. Büchler, Marco, Philip R. Burns, Martin Müller, Emily Franzini et Greta Franzini. « Towards a Historical Text Re-use Detection ». In *Text Mining: From Ontology Learning to Automated Text Processing Applications* dir. Chris Biemann et Alexander Mehler, 221–238. Springer International Publishing, 2014. [Lien](https://dx.doi.org/10.1007/978-3-319-12655-5_11) +7. Vierthaler, Paul et Meet Gelein. « A BLAST-based, Language-agnostic Text Reuse Algorithm with a MARKUS Implementation and Sequence Alignment Optimized for Large Chinese Corpora ». *Journal of Cultural Analytics* 4, vol.2 (2019). [Lien](https://dx.doi.org/10.22148/16.034) 8. Vesanto, Aleksi, Asko Nivala, Heli Rantala, Tapio Salakoski, Hannu Salmi et Filip Ginter. « Applying BLAST to Text Reuse Detection in Finnish Newspapers and Journals, 1771-1910 ». *Proceedings of the NoDaLiDa 2017 Workshop on Processing Historical Language* (2017): 54–58. [Lien](https://aclanthology.org/W17-0510.pdf) 9. Salmi, Hannu, Heli Rantala, Aleksi Vesanto et Filip Ginter. « The long-term reuse of text in the Finnish press, 1771–1920 ». *CEUR Workshop Proceedings* 2364 (2019): 394–544. -10. Soto, Axel J, Abidalrahman Mohammad, Andrew Albert, Aminul Islam, Evangelos Milios, Michael Doyle, Rosane Minghim et Maria Cristina de Oliveira. « Similarity-Based Support for Text Reuse in Technical Writing ». *Proceedings of the 2015 ACM Symposium on Document Engineering* (2015): 97–106. [Lien](http://dx.doi.org/10.1145/2682571.2797068) +10. Soto, Axel J, Abidalrahman Mohammad, Andrew Albert, Aminul Islam, Evangelos Milios, Michael Doyle, Rosane Minghim et Maria Cristina de Oliveira. « Similarity-Based Support for Text Reuse in Technical Writing ». *Proceedings of the 2015 ACM Symposium on Document Engineering* (2015): 97–106. [Lien](https://dx.doi.org/10.1145/2682571.2797068) 11. Schofield, Alexandra, Laure Thompson et David Mimno. « Quantifying the Effects of Text Duplication on Semantic Models ». *Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing* (2017): 2737–2747. [Lien](https://doi.org/10.18653/v1/D17-1290) 12. Romanello, Matteo, Aurélien Berra et Alexandra Trachsel. « Rethinking Text Reuse as Digital Classicists ». *Digital Humanities conference* (2014). [Lien](https://wiki.digitalclassicist.org/Text_Reuse) diff --git a/fr/lecons/generer-jeu-donnees-texte-ocr.md b/fr/lecons/generer-jeu-donnees-texte-ocr.md index 1b26d3ed43..f988099746 100644 --- a/fr/lecons/generer-jeu-donnees-texte-ocr.md +++ b/fr/lecons/generer-jeu-donnees-texte-ocr.md @@ -47,7 +47,7 @@ Que se passerait-il si, par exemple, votre OCR interprétait les chaînes de car Bien souvent, les documents que les historien(ne)s souhaitent numériser sont déjà des structures ordonnées de données : une collection ordonnée de documents issus d'une source primaire, un code juridique ou encore un cartulaire. Mais la structure éditoriale imposée à ces ressources est généralement conçue pour un type particulier de technologie d’extraction de données, c'est-à-dire un codex, un livre. Pour un texte numérisé, la structure utilisée sera différente. Si vous pouvez vous débarrasser de l’infrastructure liée au livre et réorganiser le texte selon les sections et les divisions qui vous intéressent, vous vous retrouverez avec des données sur lesquelles il sera beaucoup plus facile d'effectuer des recherches et des opérations de remplacement, et en bonus, votre texte deviendra immédiatement exploitable dans une multitude d’autres contextes. -C'est là qu'un langage de script comme Python devient très utile. Pour notre projet nous avons voulu préparer certains des documents d’une [collection du XIIe siècle d’*imbreviatura*](http://www.worldcat.org/oclc/17591390) du scribe italien connu sous le nom de Giovanni Scriba (vous pouvez [accéder au PDF ici](https://notariorumitinera.eu/Docs/Biblioteca_Digitale/SB/3a47488c28eef2aedfea52ebbde2c634/dd361cb1479ab2309f5ceef1f875c2a5.pdf)) afin qu’ils puissent être traités par des historien(ne)s à des fins d’analyse TAL ou autres. Les pages de l'édition de 1935 ressemblent à cela : +C'est là qu'un langage de script comme Python devient très utile. Pour notre projet nous avons voulu préparer certains des documents d’une [collection du XIIe siècle d’*imbreviatura*](https://www.worldcat.org/oclc/17591390) du scribe italien connu sous le nom de Giovanni Scriba (vous pouvez [accéder au PDF ici](https://notariorumitinera.eu/Docs/Biblioteca_Digitale/SB/3a47488c28eef2aedfea52ebbde2c634/dd361cb1479ab2309f5ceef1f875c2a5.pdf)) afin qu’ils puissent être traités par des historien(ne)s à des fins d’analyse TAL ou autres. Les pages de l'édition de 1935 ressemblent à cela : {% include figure.html filename="gs_pg110.png" caption="GS page 110" %} @@ -1064,7 +1064,7 @@ Ouvrez le fichier résultant avec un navigateur web et vous obtenez une édition Ainsi, notre problème de départ, le nettoyage OCR, est maintenant beaucoup plus gérable parce que nous pouvons cibler des expressions régulières pour les types spécifiques de métadonnées que nous avons : erreurs dans le résumé en italien ou dans le texte latin ? Ou nous pourrions concevoir des routines de recherche et de remplacement uniquement pour des chartes spécifiques ou des groupes de chartes. -Au-delà de cela, il y a beaucoup de choses que vous pouvez faire avec un ensemble de données ordonnnées, y compris l'alimenter grâce à un outil de balisage comme [le « brat »](http://brat.nlplab.org/) dont nous nous sommes servis pour le projet ChartEx. Des spécialistes peuvent alors commencer à ajouter des couches de balisage sémantique, même si vous ne faites plus de correction d’erreur OCR. En outre, avec un ensemble de données ordonnnées, nous pouvons obtenir toutes sortes de sorties : TEI (Text Encoding Initiative) ou EAD (Encoded Archival Description). Ou encore vous pouvez lire votre ensemble de données directement dans une base de données relationnelle ou un répertoire de stockage qui associe une clé et une valeur. Toutes ces choses sont tout bonnement impossibles, si vous travaillez seulement avec un simple fichier texte. +Au-delà de cela, il y a beaucoup de choses que vous pouvez faire avec un ensemble de données ordonnnées, y compris l'alimenter grâce à un outil de balisage comme [le « brat »](https://brat.nlplab.org/) dont nous nous sommes servis pour le projet ChartEx. Des spécialistes peuvent alors commencer à ajouter des couches de balisage sémantique, même si vous ne faites plus de correction d’erreur OCR. En outre, avec un ensemble de données ordonnnées, nous pouvons obtenir toutes sortes de sorties : TEI (Text Encoding Initiative) ou EAD (Encoded Archival Description). Ou encore vous pouvez lire votre ensemble de données directement dans une base de données relationnelle ou un répertoire de stockage qui associe une clé et une valeur. Toutes ces choses sont tout bonnement impossibles, si vous travaillez seulement avec un simple fichier texte. Les morceaux de code ci-dessus ne sont en aucun cas une solution clé en main pour nettoyer une sortie OCR lambda. Il n'existe pas de telle baguette magique. L’approche de Google pour scanner le contenu des bibliothèques de recherche menace de nous noyer dans un océan de mauvaises données. Pire encore, elle élude un fait fondamental du savoir numérique : les sources numériques sont difficiles à obtenir. Des textes numériques fiables, flexibles et utiles nécessitent une rédaction soignée et une conservation pérenne. Google, Amazon, Facebook et d'autres géants du Web n’ont pas à se soucier de la qualité de leurs données, juste de leur quantité. Les historien(ne)s, par contre, doivent d’abord se soucier de l’intégrité de leurs sources. diff --git a/fr/lecons/gestion-manipulation-donnees-r.md b/fr/lecons/gestion-manipulation-donnees-r.md index 77f91fd1ab..21e038a2b0 100644 --- a/fr/lecons/gestion-manipulation-donnees-r.md +++ b/fr/lecons/gestion-manipulation-donnees-r.md @@ -62,7 +62,7 @@ Remplir ces critères nous permet de juger si la donnée est organisée ou pas. 4. Plusieurs unités d'observation sont présentes dans une même table. 5. Une même unité d'observation est présente dans plusieurs tables. -Un avantage peut-être encore plus important est de garder nos données dans ce format propre, qui nous permet d'utiliser une galerie de paquets dans le [« tidyverse »](http://tidyverse.org/), spécifiquement conçus pour fonctionner avec des données bien structurées. En nous assurant que nos données en entrée et en sortie sont bien structurées, nous n'aurons qu'un nombre limité d'outils à utiliser pour répondre à un grand nombre de questions. De plus, nous pourrons combiner, manipuler et séparer des jeux de données comme bon nous semble. +Un avantage peut-être encore plus important est de garder nos données dans ce format propre, qui nous permet d'utiliser une galerie de paquets dans le [« tidyverse »](https://tidyverse.org/), spécifiquement conçus pour fonctionner avec des données bien structurées. En nous assurant que nos données en entrée et en sortie sont bien structurées, nous n'aurons qu'un nombre limité d'outils à utiliser pour répondre à un grand nombre de questions. De plus, nous pourrons combiner, manipuler et séparer des jeux de données comme bon nous semble. Dans cette leçon, nous nous intéresserons particulièrement au paquet [dplyr](https://cran.r-project.org/web/packages/dplyr/index.html) du tidyverse. Mais cela vaut la peine de mentionner brièvement quelques autres paquets que nous utiliserons : diff --git a/fr/lecons/installation-windows-py.md b/fr/lecons/installation-windows-py.md index 1c6cf450fe..ba62e569b2 100644 --- a/fr/lecons/installation-windows-py.md +++ b/fr/lecons/installation-windows-py.md @@ -1,151 +1,151 @@ ---- -title: Installer un environnement de développement intégré pour Python (Windows) -layout: lesson -slug: installation-windows-py -date: 2012-07-17 -authors: -- William J. Turkel -- Adam Crymble -reviewers: -- Jim Clifford -- Amanda Morton -editors: -- Miriam Posner -translation_date: 2020-07-13 -translator: -- Sofia Papastamkou -translation-editor: -- François Dominic Laramée -translation-reviewer: -- Marie-Christine Boucher -difficulty: 1 -exclude_from_check: - - review-ticket -activity: transforming -topics: [get-ready, python] -abstract: "Cette leçon vous montrera comment installer un environnement de développement pour Python sur un ordinateur exécutant le système d'exploitation Windows." -original: windows-installation -doi: 10.46430/phfr0011 -avatar_alt: Un groupe de trois musiciens ---- - -{% include toc.html %} - - - - - -## Sauvegarder son disque dur - -Faites en sorte de toujours disposer de sauvegardes régulières et récentes du contenu de votre disque dur. L'importance de cette pratique dépasse largement le cadre de vos activités de programmation, et il serait avisé d’en faire une habitude. - -## Installer Python (v.3) - -Rendez-vous sur le [site web de Python][], téléchargez la dernière version stable du langage de programmation Python (version 3.8 au mois d'avril 2020) et procédez à l'installation selon les instructions fournies sur le site. *N.D.L.R. Notez que les versions les plus récentes de Python, à partir de la v. 3.5, ne sont pas compatibles avec Windows XP ni avec les versions antérieures de Windows.* - -## Créer un répertoire dédié - -Pour mieux organiser votre travail, il est recommandé de créer un répertoire (dossier) dédié sur votre disque dur, à l'emplacement de votre choix, pour y ranger vos fichiers de programmation (par exemple, `programming-historian`). - -## Installer Komodo Edit - -Komodo Edit est un éditeur de texte au code source ouvert et gratuit, dévelopé par [ActiveState](https://www.activestate.com/). Pour installer Komodo Edit, vous pouvez télécharger le fichier [Komodo-Edit-12.0.1-18441.msi](https://downloads.activestate.com/Komodo/releases/12.0.1/Komodo-Edit-12.0.1-18441.msi) depuis leur liste de [Komodo Edit releases](https://downloads.activestate.com/Komodo/releases/12.0.1/). Il existe néanmoins [un vaste choix d'éditeurs de texte][], si vous souhaitez utiliser un autre programme. - -
    -ActiveState a retiré Komodo Edit en décembre 2022. Comme il est écrit dans cet article de blog (en anglais), "Le retrait de Komodo signifie qu'ActiveState cessera de déveloper du code ou de créer des versions pour [...] Komodo Edit. ActiveState ne fournira plus de nouvelles caractéristiques/fonctionnalités, et ne réparera plus les bugs ou les problèmes de sécurité" [notre traduction]. Vous pouvez toujours télécharger et utiliser Komodo Edit, mais il est probablement préférable de choisir un autre programme. -
    - -## Démarrer Komodo Edit - -Ouvrez Komodo Edit; vous devriez obtenir quelque chose qui ressemble à ceci: - -{% include figure.html filename="komodo-edit11-windows-main.png" caption="Komodo Edit sur Windows" %} - -Si vous ne voyez pas le volet `Toolbox` (*Boîte à outils*) en haut à droite, vous pouvez y accéder via le menu `View -> Tabs & Sidebars -> Toolbox` (*Vue -> Onglets & Volets latéraux -> Boîte à outils*). Peu importe pour le moment si le volet du projet est ouvert ou non. Prenez le temps d'explorer l'interface et de vous familiariser avec son agencement. Si besoin, le menu d'aide `Help` offre une documentation détaillée. - -### Configurer Komodo Edit - -Vous devez maintenant configurer l'éditeur pour pouvoir exécuter vos programmes en Python. - -Sélectionnez d'abord `Edit -> Preferences -> Languages -> Python 3` (*Modifier -> Préférences -> Langages -> Python 3*) puis `Browse` (*Parcourir*). Puis, sélectionnez le chemin du répertoire d'installation de Python, qui ressemble à ceci: `C:\Utilisateurs\VotreNomUtilisateur\AppData\Local\Programs\Python\Python38-32`). Lorsque vous avez trouvé le bon chemin, cliquez sur `OK`: - -{% include figure.html caption="Définissez l'interpréteur Python par défaut" filename="komodo-edit11-windows-interpreter.png" %} - -*(N.D.L.R. En effectuant la manipulation décrite plus haut, après avoir cliqué sur `Browse` pour parcourir votre disque dur et afficher la boîte de dialogue `Open Executable File`, il se peut que vous n'arriviez pas à localiser le dossier AppData pour récupérer le chemin et définir l'interpréteur par défaut. Dans ce cas, entrez `%AppData%` dans la barre de recherche du menu `Démarrer` de Windows, puis cliquez sur l'emplacement pour l'ouvrir. Localisez le chemin spécifié ci-haut (`\AppData\Local\Programs\Python\Python38-32`) puis retournez à la boîte de dialogue `Open Executable File` (à l'intérieur de Komodo Edit) et copiez-le dans la barre `Nom du fichier`. Une fois le répertoire ouvert, sélectionnez `python.exe` (type de fichier: application) et cliquez sur `Ouvrir`.* - -Ensuite, depuis le menu `Preferences` (*Préférences*) à gauche sélectionnez `Internationalization`. Maintenant, allez à la section `Language-specific Default Encoding` (*Encodage par défaut selon le langage de programmation*) et, dans le menu déroulant de `Language-specific`, sélectionnez `Python`. Vérifiez que l'encodage [UTF-8][] est sélectionné en tant qu'encodage par défaut. - -{% include figure.html caption="Paramètre d'encodage du texte en format UTF-8" filename="komodo-edit11-windows-utf-set.png" %} - -Ensuite sélectionnez `Toolbox->Add->New Command` (*Boite à outils->Ajouter->Nouvelle commande*). Vous ouvrirez ainsi une nouvelle fenêtre de dialogue. Renommez votre commande `‘Run Python’` (*Exécuter Python*). Dans la barre `‘Command’` (*Commande*), tapez: - -``` python -%(python3) %f -``` - -Si vous oubliez d'exécuter cette commande, Python ne saura pas coment interpréter les instructions envoyées. - -Dans la barre `‘Start in’`, tapez: - -`%D` - -Si vous obtenez ceci, cliquez sur `OK`: - -{% include figure.html filename="komodo-edit11-windows-python-command.png" caption="Commande « Exécuter Python3 »" %} -{% include figure.html filename="komodo-edit11-windows-python-start.png" caption="Configuration de la commande « Run Python3 Start » ." %} - -Votre nouvelle commande devrait apparaître dans le panneau de la boite à outils `Toolbox`. Après avoir complété cette étape, vous devrez peut-être redémarrer votre ordinateur avant d’être en mesure de travailler avec Python dans Komodo Edit. - -Étape 2 – 'Hello World' en Python --------------------------------- - -Il est de coutume d'inaugurer l'utilisation d'un nouveau langage de programmation avec un script qui dit tout simplement *"hello world"* soit "bonjour le monde". Nous allons voir ensemble comment faire cela en Python et en HTML. - -Python est un langage de très haut niveau, ce qui en fait un choix fréquemment recommandé pour les personnes qui débutent en programmation. Autrement dit: il est possible d'écrire en Python de courts programmes qui sont très performants. Plus un programme est court, plus il est susceptible de tenir sur la taille d'un écran et donc plus il a des chances de rester gravé dans votre mémoire. - -Python est un langage de programmation interprété. Cela signifie qu'il existe un programme informatique spécifique, appelé interpréteur, qui sait reconnaître les instructions écrites dans ce langage. Une manière d'utiliser un interpréteur consiste à stocker toutes vos instructions Python dans un fichier puis à soumettre ce fichier à l'interpréteur. Un fichier contenant des instructions écrites avec un langage de programmation s'appelle un programme (informatique). L'interpréteur exécute chacune des instructions contenues dans le programme, puis il s'arrête. Voyons les étapes à suivre pour y parvenir. - -Dans votre éditeur de texte, créez un nouveau fichier, entrez ce petit programme de deux lignes, puis sauvegardez-le dans votre répertoire `programming-historian` sous le nom -`hello-world.py` - -``` python -# hello-world.py -print('hello world') -``` - -L'éditeur de texte de votre choix doit avoir un bouton de menu “`Run`” qui vous permet d'exécuter votre programme. Si tout s'est bien passé, vous devriez obtenir un résultat semblable à celui présenté dans la figure ci-dessous, que nous avons obtenue avec Komodo Edit: - -{% include figure.html filename="komodo-edit11-windows-hello.png" caption="'Hello World'" %} - -## Interagir via une console Python (shell) - -Une autre manière d'interagir avec un interpréteur est d'utiliser ce que nous appelons une console. Dans ce cas, il suffit de taper une instruction au clavier et d'appuyer sur la touche Entrée pour que la console exécute votre commande. La console est un moyen parfait pour tester votre code et avoir la certitude que vous allez obtenir le résultat que vous recherchez. - -Vous pouvez exécuter une console Python en double-cliquant sur le fichier `python.exe`. Si vous avez installé la version 3.8 (la plus récente au moment de cette traduction en avril 2020), ce fichier se trouve fort probablement dans votre répertoire `C:\Utilisateurs\VotreNomUtilisateur\AppData\Local\Programs\Python\Python38-32`. Lorsque la fenêtre de la console s'affiche sur votre écran, tapez: - -``` python -print('hello world') -``` - -puis appuyez sur la touche Entrée. Votre ordinateur va vous répondre: - -``` python -hello world -``` - -Pour représenter une interaction via la console, nous utilisons -\> pour indiquer la réponse reçue dans celle-ci, comme suit: - -``` python -print('hello world') --> hello world -``` -Sur votre écran, l'affichage ressemble plutôt à cela: - -{% include figure.html caption="La console Python sous Windows" filename="windows-python3-cmd.png" %} - -Maintenant, votre ordinateur est prêt et vous êtes en mesure d'exécuter des tâches plus intéressantes. Si vous travaillez avec nos tutoriels sur Python dans l'ordre, nous vous recommandons de consulter par la suite la leçon « [Comprendre les pages web et le HTML][] » . - - [site web de Python]: http://www.python.org/ - [un vaste choix d'éditeurs de texte]: http://wiki.python.org/moin/PythonEditors/ - [UTF-8]: http://en.wikipedia.org/wiki/UTF-8 - [Comprendre les pages web et le HTML]: /fr/lecons/comprendre-les-pages-web +--- +title: Installer un environnement de développement intégré pour Python (Windows) +layout: lesson +slug: installation-windows-py +date: 2012-07-17 +authors: +- William J. Turkel +- Adam Crymble +reviewers: +- Jim Clifford +- Amanda Morton +editors: +- Miriam Posner +translation_date: 2020-07-13 +translator: +- Sofia Papastamkou +translation-editor: +- François Dominic Laramée +translation-reviewer: +- Marie-Christine Boucher +difficulty: 1 +exclude_from_check: + - review-ticket +activity: transforming +topics: [get-ready, python] +abstract: "Cette leçon vous montrera comment installer un environnement de développement pour Python sur un ordinateur exécutant le système d'exploitation Windows." +original: windows-installation +doi: 10.46430/phfr0011 +avatar_alt: Un groupe de trois musiciens +--- + +{% include toc.html %} + + + + + +## Sauvegarder son disque dur + +Faites en sorte de toujours disposer de sauvegardes régulières et récentes du contenu de votre disque dur. L'importance de cette pratique dépasse largement le cadre de vos activités de programmation, et il serait avisé d’en faire une habitude. + +## Installer Python (v.3) + +Rendez-vous sur le [site web de Python][], téléchargez la dernière version stable du langage de programmation Python (version 3.8 au mois d'avril 2020) et procédez à l'installation selon les instructions fournies sur le site. *N.D.L.R. Notez que les versions les plus récentes de Python, à partir de la v. 3.5, ne sont pas compatibles avec Windows XP ni avec les versions antérieures de Windows.* + +## Créer un répertoire dédié + +Pour mieux organiser votre travail, il est recommandé de créer un répertoire (dossier) dédié sur votre disque dur, à l'emplacement de votre choix, pour y ranger vos fichiers de programmation (par exemple, `programming-historian`). + +## Installer Komodo Edit + +Komodo Edit est un éditeur de texte au code source ouvert et gratuit, dévelopé par [ActiveState](https://www.activestate.com/). Pour installer Komodo Edit, vous pouvez télécharger le fichier [Komodo-Edit-12.0.1-18441.msi](https://downloads.activestate.com/Komodo/releases/12.0.1/Komodo-Edit-12.0.1-18441.msi) depuis leur liste de [Komodo Edit releases](https://downloads.activestate.com/Komodo/releases/12.0.1/). Il existe néanmoins [un vaste choix d'éditeurs de texte][], si vous souhaitez utiliser un autre programme. + +
    +ActiveState a retiré Komodo Edit en décembre 2022. Comme il est écrit dans cet article de blog (en anglais), "Le retrait de Komodo signifie qu'ActiveState cessera de déveloper du code ou de créer des versions pour [...] Komodo Edit. ActiveState ne fournira plus de nouvelles caractéristiques/fonctionnalités, et ne réparera plus les bugs ou les problèmes de sécurité" [notre traduction]. Vous pouvez toujours télécharger et utiliser Komodo Edit, mais il est probablement préférable de choisir un autre programme. +
    + +## Démarrer Komodo Edit + +Ouvrez Komodo Edit; vous devriez obtenir quelque chose qui ressemble à ceci: + +{% include figure.html filename="komodo-edit11-windows-main.png" caption="Komodo Edit sur Windows" %} + +Si vous ne voyez pas le volet `Toolbox` (*Boîte à outils*) en haut à droite, vous pouvez y accéder via le menu `View -> Tabs & Sidebars -> Toolbox` (*Vue -> Onglets & Volets latéraux -> Boîte à outils*). Peu importe pour le moment si le volet du projet est ouvert ou non. Prenez le temps d'explorer l'interface et de vous familiariser avec son agencement. Si besoin, le menu d'aide `Help` offre une documentation détaillée. + +### Configurer Komodo Edit + +Vous devez maintenant configurer l'éditeur pour pouvoir exécuter vos programmes en Python. + +Sélectionnez d'abord `Edit -> Preferences -> Languages -> Python 3` (*Modifier -> Préférences -> Langages -> Python 3*) puis `Browse` (*Parcourir*). Puis, sélectionnez le chemin du répertoire d'installation de Python, qui ressemble à ceci: `C:\Utilisateurs\VotreNomUtilisateur\AppData\Local\Programs\Python\Python38-32`). Lorsque vous avez trouvé le bon chemin, cliquez sur `OK`: + +{% include figure.html caption="Définissez l'interpréteur Python par défaut" filename="komodo-edit11-windows-interpreter.png" %} + +*(N.D.L.R. En effectuant la manipulation décrite plus haut, après avoir cliqué sur `Browse` pour parcourir votre disque dur et afficher la boîte de dialogue `Open Executable File`, il se peut que vous n'arriviez pas à localiser le dossier AppData pour récupérer le chemin et définir l'interpréteur par défaut. Dans ce cas, entrez `%AppData%` dans la barre de recherche du menu `Démarrer` de Windows, puis cliquez sur l'emplacement pour l'ouvrir. Localisez le chemin spécifié ci-haut (`\AppData\Local\Programs\Python\Python38-32`) puis retournez à la boîte de dialogue `Open Executable File` (à l'intérieur de Komodo Edit) et copiez-le dans la barre `Nom du fichier`. Une fois le répertoire ouvert, sélectionnez `python.exe` (type de fichier: application) et cliquez sur `Ouvrir`.* + +Ensuite, depuis le menu `Preferences` (*Préférences*) à gauche sélectionnez `Internationalization`. Maintenant, allez à la section `Language-specific Default Encoding` (*Encodage par défaut selon le langage de programmation*) et, dans le menu déroulant de `Language-specific`, sélectionnez `Python`. Vérifiez que l'encodage [UTF-8][] est sélectionné en tant qu'encodage par défaut. + +{% include figure.html caption="Paramètre d'encodage du texte en format UTF-8" filename="komodo-edit11-windows-utf-set.png" %} + +Ensuite sélectionnez `Toolbox->Add->New Command` (*Boite à outils->Ajouter->Nouvelle commande*). Vous ouvrirez ainsi une nouvelle fenêtre de dialogue. Renommez votre commande `‘Run Python’` (*Exécuter Python*). Dans la barre `‘Command’` (*Commande*), tapez: + +``` python +%(python3) %f +``` + +Si vous oubliez d'exécuter cette commande, Python ne saura pas coment interpréter les instructions envoyées. + +Dans la barre `‘Start in’`, tapez: + +`%D` + +Si vous obtenez ceci, cliquez sur `OK`: + +{% include figure.html filename="komodo-edit11-windows-python-command.png" caption="Commande « Exécuter Python3 »" %} +{% include figure.html filename="komodo-edit11-windows-python-start.png" caption="Configuration de la commande « Run Python3 Start » ." %} + +Votre nouvelle commande devrait apparaître dans le panneau de la boite à outils `Toolbox`. Après avoir complété cette étape, vous devrez peut-être redémarrer votre ordinateur avant d’être en mesure de travailler avec Python dans Komodo Edit. + +Étape 2 – 'Hello World' en Python +-------------------------------- + +Il est de coutume d'inaugurer l'utilisation d'un nouveau langage de programmation avec un script qui dit tout simplement *"hello world"* soit "bonjour le monde". Nous allons voir ensemble comment faire cela en Python et en HTML. + +Python est un langage de très haut niveau, ce qui en fait un choix fréquemment recommandé pour les personnes qui débutent en programmation. Autrement dit: il est possible d'écrire en Python de courts programmes qui sont très performants. Plus un programme est court, plus il est susceptible de tenir sur la taille d'un écran et donc plus il a des chances de rester gravé dans votre mémoire. + +Python est un langage de programmation interprété. Cela signifie qu'il existe un programme informatique spécifique, appelé interpréteur, qui sait reconnaître les instructions écrites dans ce langage. Une manière d'utiliser un interpréteur consiste à stocker toutes vos instructions Python dans un fichier puis à soumettre ce fichier à l'interpréteur. Un fichier contenant des instructions écrites avec un langage de programmation s'appelle un programme (informatique). L'interpréteur exécute chacune des instructions contenues dans le programme, puis il s'arrête. Voyons les étapes à suivre pour y parvenir. + +Dans votre éditeur de texte, créez un nouveau fichier, entrez ce petit programme de deux lignes, puis sauvegardez-le dans votre répertoire `programming-historian` sous le nom +`hello-world.py` + +``` python +# hello-world.py +print('hello world') +``` + +L'éditeur de texte de votre choix doit avoir un bouton de menu “`Run`” qui vous permet d'exécuter votre programme. Si tout s'est bien passé, vous devriez obtenir un résultat semblable à celui présenté dans la figure ci-dessous, que nous avons obtenue avec Komodo Edit: + +{% include figure.html filename="komodo-edit11-windows-hello.png" caption="'Hello World'" %} + +## Interagir via une console Python (shell) + +Une autre manière d'interagir avec un interpréteur est d'utiliser ce que nous appelons une console. Dans ce cas, il suffit de taper une instruction au clavier et d'appuyer sur la touche Entrée pour que la console exécute votre commande. La console est un moyen parfait pour tester votre code et avoir la certitude que vous allez obtenir le résultat que vous recherchez. + +Vous pouvez exécuter une console Python en double-cliquant sur le fichier `python.exe`. Si vous avez installé la version 3.8 (la plus récente au moment de cette traduction en avril 2020), ce fichier se trouve fort probablement dans votre répertoire `C:\Utilisateurs\VotreNomUtilisateur\AppData\Local\Programs\Python\Python38-32`. Lorsque la fenêtre de la console s'affiche sur votre écran, tapez: + +``` python +print('hello world') +``` + +puis appuyez sur la touche Entrée. Votre ordinateur va vous répondre: + +``` python +hello world +``` + +Pour représenter une interaction via la console, nous utilisons -\> pour indiquer la réponse reçue dans celle-ci, comme suit: + +``` python +print('hello world') +-> hello world +``` +Sur votre écran, l'affichage ressemble plutôt à cela: + +{% include figure.html caption="La console Python sous Windows" filename="windows-python3-cmd.png" %} + +Maintenant, votre ordinateur est prêt et vous êtes en mesure d'exécuter des tâches plus intéressantes. Si vous travaillez avec nos tutoriels sur Python dans l'ordre, nous vous recommandons de consulter par la suite la leçon « [Comprendre les pages web et le HTML][] » . + + [site web de Python]: https://www.python.org/ + [un vaste choix d'éditeurs de texte]: https://wiki.python.org/moin/PythonEditors/ + [UTF-8]: https://en.wikipedia.org/wiki/UTF-8 + [Comprendre les pages web et le HTML]: /fr/lecons/comprendre-les-pages-web diff --git a/fr/lecons/intro-a-bash-et-zsh.md b/fr/lecons/intro-a-bash-et-zsh.md index ba0532138f..aea2f13d29 100644 --- a/fr/lecons/intro-a-bash-et-zsh.md +++ b/fr/lecons/intro-a-bash-et-zsh.md @@ -241,7 +241,7 @@ Enfoncer la touche tabulation (TAB) à n'importe quel moment dans le shel Sous Windows, les extensions de fichier sont invisibles par défaut. Si vous souhaitez manipuler des fichiers sous Windows, nous vous recommandons d'activer l'affichage des extensions de fichier. Pour faire cela, ouvrez votre explorateur de fichiers et sous **Affichage**, dans le groupe **Afficher/masquer**, cochez la case **Extensions de nom de fichier**. Pour plus d'informations, vous pouvez vous référer à [cet article](https://perma.cc/5ZWL-XRFF) du support Windows. -Nous avons désormais besoin d'un fichier texte pour nos futures commandes. Nous pouvons utiliser un livre réputé pour être long, l'épique *Guerre et Paix* de Léon Tolstoï. Le fichier est disponible, en anglais, grâce au [Projet Gutenberg](http://www.gutenberg.org/ebooks/2600). Si vous avez déjà installé [wget](/en/lessons/applied-archival-downloading-with-wget), vous pouvez simplement taper : +Nous avons désormais besoin d'un fichier texte pour nos futures commandes. Nous pouvons utiliser un livre réputé pour être long, l'épique *Guerre et Paix* de Léon Tolstoï. Le fichier est disponible, en anglais, grâce au [Projet Gutenberg](https://www.gutenberg.org/ebooks/2600). Si vous avez déjà installé [wget](/en/lessons/applied-archival-downloading-with-wget), vous pouvez simplement taper : ```bash wget http://www.gutenberg.org/files/2600/2600-0.txt diff --git a/fr/lecons/intro-aux-bots-twitter.md b/fr/lecons/intro-aux-bots-twitter.md index 2b05f36382..9a1aba61e7 100644 --- a/fr/lecons/intro-aux-bots-twitter.md +++ b/fr/lecons/intro-aux-bots-twitter.md @@ -39,10 +39,10 @@ L'accès à l'API de Twitter a récemment changé. Le niveau gratuit ne permet p # Une introduction aux bots Twitter avec Tracery -Cette leçon explique comment créer des bots basiques sur Twitter à l’aide de la [grammaire générative Tracery](http://tracery.io) et du service [Cheap Bots Done Quick](http://cheapbotsdonequick.com/). Tracery est interopérable avec plusieurs langages de programmation et peut être intégrée dans des sites web, des jeux ou des bots. Vous pouvez en faire une copie (fork) sur github [ici](https://github.com/galaxykate/tracery/tree/tracery2). +Cette leçon explique comment créer des bots basiques sur Twitter à l’aide de la [grammaire générative Tracery](https://tracery.io) et du service [Cheap Bots Done Quick](https://cheapbotsdonequick.com/). Tracery est interopérable avec plusieurs langages de programmation et peut être intégrée dans des sites web, des jeux ou des bots. Vous pouvez en faire une copie (fork) sur github [ici](https://github.com/galaxykate/tracery/tree/tracery2). ## Pourquoi des bots? -Pour être exact, un bot Twitter est un logiciel permettant de contrôler automatiquement un compte Twitter. Lorsque des centaines de bots sont créés et tweetent plus ou moins le même message, ils peuvent façonner le discours sur Twitter, ce qui influence ensuite le discours d’autres médias. Des bots de ce type [peuvent même être perçus comme des sources crédibles d’information](http://www.sciencedirect.com/science/article/pii/S0747563213003129). Des projets tels que [Documenting the Now](http://www.docnow.io/) mettent au point des outils qui permettent aux chercheur(e)s de créer et d’interroger des archives de réseaux sociaux en ligne à propos d’événements récents qui comprennent très probablement un bon nombre de messages générés par des bots. Dans ce tutoriel, je veux montrer comment construire un bot Twitter basique afin que des historiens et des historiennes, ayant connaissance de leur fonctionnement, puissent plus facilement les repérer dans des archives et, peut-être, même les neutraliser grâce à leurs propres bots. +Pour être exact, un bot Twitter est un logiciel permettant de contrôler automatiquement un compte Twitter. Lorsque des centaines de bots sont créés et tweetent plus ou moins le même message, ils peuvent façonner le discours sur Twitter, ce qui influence ensuite le discours d’autres médias. Des bots de ce type [peuvent même être perçus comme des sources crédibles d’information](https://www.sciencedirect.com/science/article/pii/S0747563213003129). Des projets tels que [Documenting the Now](https://www.docnow.io/) mettent au point des outils qui permettent aux chercheur(e)s de créer et d’interroger des archives de réseaux sociaux en ligne à propos d’événements récents qui comprennent très probablement un bon nombre de messages générés par des bots. Dans ce tutoriel, je veux montrer comment construire un bot Twitter basique afin que des historiens et des historiennes, ayant connaissance de leur fonctionnement, puissent plus facilement les repérer dans des archives et, peut-être, même les neutraliser grâce à leurs propres bots. Mais je crois aussi qu’il y a de la place en histoire et dans les humanités numériques de façon plus large pour un travail créatif, expressif, voire artistique. Les historiens et les historiennes qui connaissent la programmation peuvent profiter des possibilités offertes par les médias numériques pour monter des créations, autrement impossibles à réaliser pour nous émouvoir, nous inspirer, nous interpeller. Il y a de la place pour de la satire, il y a de la place pour commenter. Comme Mark Sample, je crois qu’il y a besoin de « [bots de conviction](https://medium.com/@samplereality/a-protest-bot-is-a-bot-so-specific-you-cant-mistake-it-for-bullshit-90fe10b7fbaa)». Ce sont des bots de contestation, des bots si pointus et pertinents, qu’il devient impossible de les prendre pour autre chose par erreur. Selon Sample, il faudrait que de tels bots soient: @@ -83,11 +83,11 @@ Pour entamer la réflexion, voici quelques suggestions de personnes qui m’ont > - un bot qui imaginerait la réaction d’Afghans, d’Irakiens, de Syriens, de Yéménites lorsque des membres de leur famille sont tués dans des attaques de drones. — Cory Taylor (@CoryTaylor_) 22 avril 2017 -Dans la mesure où beaucoup de données historiques en ligne sont disponibles en format [JSON](http://json.org/), en cherchant un peu, vous devriez en trouver à utiliser avec votre bot. +Dans la mesure où beaucoup de données historiques en ligne sont disponibles en format [JSON](https://json.org/), en cherchant un peu, vous devriez en trouver à utiliser avec votre bot. -Ma méthode est celle du bricoleur qui adapte et assemble des morceaux de code trouvés ici et là. En vérité, la programmation fonctionne en grande partie comme ça. Il existe beaucoup de logiciels pour interagir avec l’API (*Application Programming Interface* soit l'interface de programmation d'application) de Twitter. Dans cette leçon, il y aura peu de « programmation »: les bots ne seront pas écrits en Python, par exemple. Dans cette leçon d’introduction, je vais vous montrer comment construire un bot qui raconte des histoires, qui compose de la poésie, qui fait des choses merveilleuses à l’aide de [Tracery.io](http://tracery.io/) comme _grammaire générative_ et du service Cheap Bots Done Quick comme hébergeur du bot. Pour davantage de tutoriels pour apprendre à construire et héberger des bots Twitter sur d’autres services, voir [la liste de tutoriels de Botwiki](https://botwiki.org/tutorials/twitterbots/) (en anglais). +Ma méthode est celle du bricoleur qui adapte et assemble des morceaux de code trouvés ici et là. En vérité, la programmation fonctionne en grande partie comme ça. Il existe beaucoup de logiciels pour interagir avec l’API (*Application Programming Interface* soit l'interface de programmation d'application) de Twitter. Dans cette leçon, il y aura peu de « programmation »: les bots ne seront pas écrits en Python, par exemple. Dans cette leçon d’introduction, je vais vous montrer comment construire un bot qui raconte des histoires, qui compose de la poésie, qui fait des choses merveilleuses à l’aide de [Tracery.io](https://tracery.io/) comme _grammaire générative_ et du service Cheap Bots Done Quick comme hébergeur du bot. Pour davantage de tutoriels pour apprendre à construire et héberger des bots Twitter sur d’autres services, voir [la liste de tutoriels de Botwiki](https://botwiki.org/tutorials/twitterbots/) (en anglais). -Celui de mes bots qui a connu le plus de succès est [@tinyarchae](http://twitter.com/tinyarchae), un bot qui tweete des scènes de dysfonctionnements au sein d’un horrible projet d’excavation archéologique. Tout projet archéologique est confronté à des problèmes de sexisme, d’insultes, de mauvaise foi. Ainsi, @tinyarchae prend tout ce qui se murmure dans les colloques et le pousse à l’extrême. C’est, en réalité, une caricature qui comporte une part de vérité embarrassante. D’autres bots que j’ai construits détournent de la [photographie archéologique](https://twitter.com/archaeoglitch); l’un est même utile puisqu’il [annonce la sortie de nouveaux articles de revues en archéologie](https://twitter.com/botarchaeo) et fait donc office d’assistant de recherche. Pour plus de réflexions sur le rôle joué par les bots en archéologie publique, voir ce [discours inaugural](https://electricarchaeology.ca/2017/04/27/bots-of-archaeology-machines-writing-public-archaeology/) tiré du [colloque Twitter sur l’archéologie publique](http://web.archive.org/web/20180131161516/https://publicarchaeologyconference.wordpress.com/)). +Celui de mes bots qui a connu le plus de succès est [@tinyarchae](https://twitter.com/tinyarchae), un bot qui tweete des scènes de dysfonctionnements au sein d’un horrible projet d’excavation archéologique. Tout projet archéologique est confronté à des problèmes de sexisme, d’insultes, de mauvaise foi. Ainsi, @tinyarchae prend tout ce qui se murmure dans les colloques et le pousse à l’extrême. C’est, en réalité, une caricature qui comporte une part de vérité embarrassante. D’autres bots que j’ai construits détournent de la [photographie archéologique](https://twitter.com/archaeoglitch); l’un est même utile puisqu’il [annonce la sortie de nouveaux articles de revues en archéologie](https://twitter.com/botarchaeo) et fait donc office d’assistant de recherche. Pour plus de réflexions sur le rôle joué par les bots en archéologie publique, voir ce [discours inaugural](https://electricarchaeology.ca/2017/04/27/bots-of-archaeology-machines-writing-public-archaeology/) tiré du [colloque Twitter sur l’archéologie publique](https://web.archive.org/web/20180131161516/https://publicarchaeologyconference.wordpress.com/)). # Préparation : que fera votre bot ? @@ -97,7 +97,7 @@ Commençons avec un bloc-notes et du papier. À l'école primaire, une activité et les élèves remplissaient les blancs comme demandé. C'était un peu bête et, surtout, c'était amusant. Les Twitterbots sont à ce type d'improvisation ce que les voitures de sport sont aux attelages de chevaux. Les blancs à remplir pourraient, par exemple, être des valeurs dans des graphiques vectoriels svg. Il pourrait s'agir de nombres dans des noms de fichiers numériques (et donc de liens aléatoires vers une base de données ouverte, par exemple). Cela pourrait même être des noms et des adverbes. Comme les bots Twitter vivent sur le web, les blocs de construction à assembler peuvent être autre chose que du texte, même si, pour l'instant, le texte est le plus facile à utiliser. -Nous allons commencer par esquisser une *grammaire de remplacement*. Cette grammaire s’appelle [Tracery.io](http://tracery.io) et ses conventions ont été développées par Kate Compton ([@galaxykate](https://twitter.com/galaxykate) sur Twitter). Elle s’utilise comme une bibliothèque [javascript](https://fr.wikipedia.org/wiki/JavaScript) dans des pages web, des jeux, et des bots. Une grammaire de remplacement fonctionne en grande partie comme les improvisations ci-dessus. +Nous allons commencer par esquisser une *grammaire de remplacement*. Cette grammaire s’appelle [Tracery.io](https://tracery.io) et ses conventions ont été développées par Kate Compton ([@galaxykate](https://twitter.com/galaxykate) sur Twitter). Elle s’utilise comme une bibliothèque [javascript](https://fr.wikipedia.org/wiki/JavaScript) dans des pages web, des jeux, et des bots. Une grammaire de remplacement fonctionne en grande partie comme les improvisations ci-dessus. Afin de clarifier d'abord ce que fait la _grammaire_, nous n'allons _pas_ créer un bot en histoire pour l'instant. Νous allons plutôt construire quelque chose de surréaliste pour montrer comment cette grammaire fonctionne. Imaginons que vous souhaitiez créer un bot qui parle avec la voix d'une plante en pot. Que pourrait-il bien dire ce bot que nous appelerons tout simplement _PlanteEnPot_? Notez quelques idées. @@ -219,7 +219,7 @@ Vous pouvez certes associer un bot à votre propre compte Twitter. Toutefois, si Normalement, quand on construit un bot Twitter, il faut créer [une application sur Twitter en tant que développeur ou développeuse](https://developer.twitter.com/)), obtenir les clés d'accès d'utilisateur/utilisatrice de l'API (Application Programming Interface, il s'agit de l'interface de programmation applicative), ainsi que le *token* (jeton). Ensuite, il faudrait programmer l'authentification pour que Twitter sache que le programme essayant d'accéder à la plate-forme est autorisé. -Heureusement, nous n'avons pas à faire tout cela, puisque George Buckenham a créé le site d'hébergement de bot [Cheap Bots Done Quick](http://cheapbotsdonequick.com/) (ce site web montre également la grammaire source en JSON pour un certain nombre de bots différents, ce qui peut vous donner des idées). Une fois que vous avez créé le compte Twitter de votre bot et que vous y êtes connecté, allez sur Cheap Bots Done Quick et cliquez sur le bouton `Sign in with Twitter`(*Connexion avec Twitter*). Le site vous redirigera vers Twitter pour approuver l'autorisation, puis vous ramènera à Cheap Bots Done Quick. +Heureusement, nous n'avons pas à faire tout cela, puisque George Buckenham a créé le site d'hébergement de bot [Cheap Bots Done Quick](https://cheapbotsdonequick.com/) (ce site web montre également la grammaire source en JSON pour un certain nombre de bots différents, ce qui peut vous donner des idées). Une fois que vous avez créé le compte Twitter de votre bot et que vous y êtes connecté, allez sur Cheap Bots Done Quick et cliquez sur le bouton `Sign in with Twitter`(*Connexion avec Twitter*). Le site vous redirigera vers Twitter pour approuver l'autorisation, puis vous ramènera à Cheap Bots Done Quick. Le JSON qui décrit votre bot peut être rédigé ou collé dans la case blanche principale qui se trouve en bas. Copiez le script que vous avez préparé dans Tracery depuis votre éditeur de texte et collez-le dans la case blanche principale. S'il y a des erreurs dans votre JSON, la fenêtre de résultat en bas deviendra rouge et le site essaiera de vous indiquer ce qui pose problème. Dans la plupart des cas, ce sera à cause d'une virgule ou d'un guillemet erronés ou égarés. Si vous cliquez sur le bouton d'actualisation à droite de la fenêtre de résultat (attention, il n'est PAS question ici du bouton d'actualisation de votre navigateur!), le site va générer un nouveau texte à partir de votre grammaire. @@ -239,7 +239,7 @@ Cheap Bots Done Quick est un service fourni par George Buckenham dans un esprit > Si vous créez un bot que je juge spammeux, injurieux ou désagréable d'une manière ou d'une autre (par exemple, en @mentionnant des personnes qui n'ont pas donné leur consentement, en publiant des insultes ou en proférant des calomnies), je le retirerai. -Darius Kazemi, l'un des grands artistes du bot, fournit davantage de conseils en matière de bonnes manières concernant les bots [ici](http://tinysubversions.com/2013/03/basic-twitter-bot-etiquette/). +Darius Kazemi, l'un des grands artistes du bot, fournit davantage de conseils en matière de bonnes manières concernant les bots [ici](https://tinysubversions.com/2013/03/basic-twitter-bot-etiquette/). # Aller plus loin avec Tracery Ce que nous avons décrit ici est suffisant pour vous permettre de vous lancer. Toutefois, beaucoup de bots sont plus compliqués que cela et il est possible d'en créer qui sont étonnamment efficaces en utilisant Tracery. @@ -259,7 +259,7 @@ Les modificateurs `.capitalize` et `.s` sont ajoutés à l'intérieur du `#` du ## Utiliser des emoji -Les emojis peuvent être utilisés avec beaucoup d'efficacité dans des bots Twitter. Vous pouvez copier et coller des emojis directement dans l'éditeur Cheap Bots Done Quick, en les plaçant chacun entre guillemets comme toute autre valeur qui vous sert de règle. Utilisez [cette liste](http://unicode.org/emoji/charts/full-emoji-list.html) pour repérer les emojis que vous souhaitez utiliser, en veillant à les copier/coller depuis la colonne Twitter pour vous assurer qu'ils vont bien s'afficher. +Les emojis peuvent être utilisés avec beaucoup d'efficacité dans des bots Twitter. Vous pouvez copier et coller des emojis directement dans l'éditeur Cheap Bots Done Quick, en les plaçant chacun entre guillemets comme toute autre valeur qui vous sert de règle. Utilisez [cette liste](https://unicode.org/emoji/charts/full-emoji-list.html) pour repérer les emojis que vous souhaitez utiliser, en veillant à les copier/coller depuis la colonne Twitter pour vous assurer qu'ils vont bien s'afficher. ## Réutilisation de symboles générés avec la fonctionnalité action @@ -284,7 +284,7 @@ Cette fonctionnalité ne serait probablement pas très utile dans le cas d'un bo ``` -Un autre exemple un peu plus complexe est le numéro 5 sur le site du tutoriel de Kate Compton à l'adresse [http://www.crystalcodepalace.com/traceryTut.html](http://www.crystalcodepalace.com/traceryTut.html) : +Un autre exemple un peu plus complexe est le numéro 5 sur le site du tutoriel de Kate Compton à l'adresse [https://www.crystalcodepalace.com/traceryTut.html](https://www.crystalcodepalace.com/traceryTut.html) : ```JSON { @@ -303,7 +303,7 @@ Tracery lit le symbole `origin` (*N.D.L.R.: si vous travaillez sur un exemple en ## Répondre à des mentions dans Cheap Bots Done Quick -[Cheap Bots Done Quick](http://cheapbotsdonequick.com/) possède une fonctionnalité bêta qui permet à votre robot de répondre aux mentions. Attention, si vous créez deux bots configurés pour que l'un mentionne l'autre, la « conversation » qui s'ensuit peut durer très longtemps. A noter qu'il y a 5% de chances dans tout échange que le bot ne réponde pas, interrompant ainsi la conversation. +[Cheap Bots Done Quick](https://cheapbotsdonequick.com/) possède une fonctionnalité bêta qui permet à votre robot de répondre aux mentions. Attention, si vous créez deux bots configurés pour que l'un mentionne l'autre, la « conversation » qui s'ensuit peut durer très longtemps. A noter qu'il y a 5% de chances dans tout échange que le bot ne réponde pas, interrompant ainsi la conversation. Pour configurer un modèle de réponse, cliquez au bas de la page pour paramétrer le bouton pour répondre aux tweets (`Reply`). Dans la fenêtre de modification JSON qui apparaît, configurez le modèle pour les phrases auxquelles votre bot va répondre. Par exemple, voici ci-dessous une partie de ce que mon bot @tinyarchae détecte : @@ -331,7 +331,7 @@ Tout en bas de la page, vous pouvez tester vos mentions en écrivant un exemple {% include figure.html filename="bot-lesson-response.png" caption="Tester la réponse du bot" %} ## Graphiques SVG -Puisque le [SVG](https://fr.wikipedia.org/wiki/Scalable_Vector_Graphics) est un format de données qui décrit la géométrie d'un graphique vectoriel, Tracery peut être utilisé pour réaliser un travail plutôt artistique. Par exemple, il existe le bot [Tiny Space Adventure](https://twitter.com/TinyAdv) qui dessine un champ d'étoiles, un vaisseau spatial et un descriptif. Sa grammaire [peut être consultée ici](https://pastebin.com/YYtZnzZ0). Pour que le format SVG soit généré correctement, il est d'une importance capitale d'avoir paramétré correctement Tracery. N'hésitez donc pas de prendre comme modèle le code source du [bot softlandscapes](http://cheapbotsdonequick.com/source/softlandscapes) qui commence par définir le texte critique qui délimite le SVG : +Puisque le [SVG](https://fr.wikipedia.org/wiki/Scalable_Vector_Graphics) est un format de données qui décrit la géométrie d'un graphique vectoriel, Tracery peut être utilisé pour réaliser un travail plutôt artistique. Par exemple, il existe le bot [Tiny Space Adventure](https://twitter.com/TinyAdv) qui dessine un champ d'étoiles, un vaisseau spatial et un descriptif. Sa grammaire [peut être consultée ici](https://pastebin.com/YYtZnzZ0). Pour que le format SVG soit généré correctement, il est d'une importance capitale d'avoir paramétré correctement Tracery. N'hésitez donc pas de prendre comme modèle le code source du [bot softlandscapes](https://cheapbotsdonequick.com/source/softlandscapes) qui commence par définir le texte critique qui délimite le SVG : ``` "origin2": ["#preface##defs##bg##mountains##clouds##ending#"], @@ -351,12 +351,12 @@ Note : cette fonctionnalité est encore en développement, le bouton tweet sur c Les bots qui génèrent du SVG dépassent le cadre de cette leçon, mais une étude minutieuse des bots existants devrait pouvoir vous aider, si vous souhaitez approfondir cette question. ## Musique -À proprement parler, il ne s'agit plus de bots, mais comme la musique peut être écrite en texte, on peut utiliser Tracery pour composer de la musique et utiliser d'autres bibliothèques pour convertir cette notation en fichiers Midi. Pour aller plus loin, vous pouvez consulter [cet article-ci](http://www.codingblocks.net/videos/generating-music-in-javascript/) et mon [propre retour d'expérience](https://electricarchaeology.ca/2017/04/07/tracery-continues-to-be-awesome/). +À proprement parler, il ne s'agit plus de bots, mais comme la musique peut être écrite en texte, on peut utiliser Tracery pour composer de la musique et utiliser d'autres bibliothèques pour convertir cette notation en fichiers Midi. Pour aller plus loin, vous pouvez consulter [cet article-ci](https://www.codingblocks.net/videos/generating-music-in-javascript/) et mon [propre retour d'expérience](https://electricarchaeology.ca/2017/04/07/tracery-continues-to-be-awesome/). # Autres tutoriels et ressources sur les bots **En anglais:** -- Zach Whalen, [How to make a Twitter Bot with Google Spreadsheets](http://www.zachwhalen.net/posts/how-to-make-a-twitter-bot-with-google-spreadsheets-version-04/), site web de Zach Whalen, http://www.zachwhalen.net/, 7 mai 2015 +- Zach Whalen, [How to make a Twitter Bot with Google Spreadsheets](https://www.zachwhalen.net/posts/how-to-make-a-twitter-bot-with-google-spreadsheets-version-04/), site web de Zach Whalen, https://www.zachwhalen.net/, 7 mai 2015 - Casey Bergman, [Keeping Up With the Scientific Literature using Twitterbots: The FlyPapers Experiment](https://caseybergman.wordpress.com/2014/02/24/keeping-up-with-the-scientific-literature-using-twitterbots-the-flypapers-experiment/) (et aussi [ce repository de Robert Lanfear sur Github](https://github.com/roblanf/phypapers)). Cette méthode consiste à collecter les flux RSS des articles de revues, puis à utiliser un service tel que [Dlvr.it](https://dlvrit.com/) pour rediriger les liens vers un compte Twitter. - Abandonnée: Stefan Bohacek propose des modèles de code pour différents types de bots sur le site de remixage de code Glitch.com. Si vous vous rendez sur sa page, vous verrez une liste de différents types de bots. Séléctionnez-en un, cliquez sur le bouton `remix` puis étudiez attentivement la documentation `README.md` qui s'affiche sur la page. Glitch nécessite une identification (login) via un compte Github ou Facebook. - Enfin, je suggère de rejoindre le groupe BotMakers Slack pour découvrir d'autres tutoriels, des personnes partageant les mêmes intérêts, et d'autres ressources : [Inscrivez-vous ici](https://botmakers.org). diff --git a/fr/lecons/introduction-a-la-stylometrie-avec-python.md b/fr/lecons/introduction-a-la-stylometrie-avec-python.md index 7f91476781..ed3d09aa4f 100644 --- a/fr/lecons/introduction-a-la-stylometrie-avec-python.md +++ b/fr/lecons/introduction-a-la-stylometrie-avec-python.md @@ -66,7 +66,7 @@ Ce tutoriel utilise un jeu de données et des logiciels que vous devrez téléch ### Le jeu de données -Pour compléter les exercices de ce tutoriel, vous devrez télécharger et ouvrir l'archive des _Federalist Papers_ [.zip](/assets/introduction-to-stylometry-with-python/stylometry-federalist.zip) qui contient les 85 articles dont nous aurons besoin pour effectuer notre analyse. L'archive contient également le [livre électronique du Projet Gutenberg](http://www.gutenberg.org/cache/epub/1404/pg1404.txt) dont ces 85 documents ont été extraits. L'ouverture du fichier .zip créera un [répertoire](https://fr.wikipedia.org/wiki/R%C3%A9pertoire_(informatique)) nommé `data` dans votre répertoire de travail courant. Assurez-vous de rester dans ce répertoire de travail courant et d'y sauvegarder tout le travail que vous réaliserez en suivant le tutoriel. +Pour compléter les exercices de ce tutoriel, vous devrez télécharger et ouvrir l'archive des _Federalist Papers_ [.zip](/assets/introduction-to-stylometry-with-python/stylometry-federalist.zip) qui contient les 85 articles dont nous aurons besoin pour effectuer notre analyse. L'archive contient également le [livre électronique du Projet Gutenberg](https://www.gutenberg.org/cache/epub/1404/pg1404.txt) dont ces 85 documents ont été extraits. L'ouverture du fichier .zip créera un [répertoire](https://fr.wikipedia.org/wiki/R%C3%A9pertoire_(informatique)) nommé `data` dans votre répertoire de travail courant. Assurez-vous de rester dans ce répertoire de travail courant et d'y sauvegarder tout le travail que vous réaliserez en suivant le tutoriel. ### Le logiciel @@ -80,11 +80,11 @@ Certaines de ces ressources peuvent être absentes de votre ordinateur. Si vous ## Quelques notes au sujet des langues -Ce tutoriel applique des méthodes d'analyse stylométrique à un ensemble de textes rédigés en anglais à l'aide d'un module Python nommé `nltk`. Plusieurs des fonctions offertes par `nltk` sont cependant disponibles dans d'autres langues. Pour peu qu'une langue écrite divise ses mots de façon claire et précise, `nltk` devrait fonctionner correctement. Les langues pour lesquelles il n'y a pas de séparation nette entre les mots à l'écrit, comme par exemple le chinois, pourraient poser problème. J'ai utilisé `nltk` avec des textes français sans difficulté; les autres langues qui utilisent des [signes diacritiques](https://fr.wikipedia.org/wiki/Diacritique), comme l'espagnol et l'allemand, devraient être compatibles avec `nltk` elles aussi. Veuillez consulter la [documentation de nltk (en anglais seulement)](http://www.nltk.org/book/) pour plus de détails. +Ce tutoriel applique des méthodes d'analyse stylométrique à un ensemble de textes rédigés en anglais à l'aide d'un module Python nommé `nltk`. Plusieurs des fonctions offertes par `nltk` sont cependant disponibles dans d'autres langues. Pour peu qu'une langue écrite divise ses mots de façon claire et précise, `nltk` devrait fonctionner correctement. Les langues pour lesquelles il n'y a pas de séparation nette entre les mots à l'écrit, comme par exemple le chinois, pourraient poser problème. J'ai utilisé `nltk` avec des textes français sans difficulté; les autres langues qui utilisent des [signes diacritiques](https://fr.wikipedia.org/wiki/Diacritique), comme l'espagnol et l'allemand, devraient être compatibles avec `nltk` elles aussi. Veuillez consulter la [documentation de nltk (en anglais seulement)](https://www.nltk.org/book/) pour plus de détails. Une seule des tâches de ce tutoriel exige du code qui varie en fonction de la langue. Pour diviser un texte en un ensemble de mots en français ou en espagnol, vous devrez spécifier la langue appropriée à [l'analyseur lexical](https://fr.wikipedia.org/wiki/Analyse_lexicale#Analyseur_lexical) de `nltk`. La procédure à suivre sera expliquée au moment venu. -Enfin, veuillez noter que certaines tâches linguistiques, comme [l'étiquetage grammatical](https://fr.wikipedia.org/wiki/%C3%89tiquetage_morpho-syntaxique) des mots, peuvent ne pas être supportées par `nltk` dans les langues autres que l'anglais. Ce tutoriel ne couvre pas l'étiquetage grammatical. Si vos propres projets en ont besoin, veuillez consulter la [documentation de nltk](http://www.nltk.org/book/) pour obtenir des conseils. +Enfin, veuillez noter que certaines tâches linguistiques, comme [l'étiquetage grammatical](https://fr.wikipedia.org/wiki/%C3%89tiquetage_morpho-syntaxique) des mots, peuvent ne pas être supportées par `nltk` dans les langues autres que l'anglais. Ce tutoriel ne couvre pas l'étiquetage grammatical. Si vos propres projets en ont besoin, veuillez consulter la [documentation de nltk](https://www.nltk.org/book/) pour obtenir des conseils. # Les _Federalist Papers_ - Contexte historique @@ -344,7 +344,7 @@ Cependant, le khi carré constitue toujours une méthode approximative. Par exem Dans certaines langues, il peut être utile d'étiqueter grammaticalement les occurrences de mots avant de les compter, pour que les occurrences de certains mots polysémiques puissent être divisées entre deux traits distincts. Par exemple, en français, les mots "le" et "la" servent à la fois d'articles et de pronoms. Ce tutoriel n'applique pas l'étiquetage grammatical puisqu'il est rarement utile pour l'analyse stylométrique de textes en anglais contemporain et parce que l'analyseur syntaxique de `nltk` ne gère pas très bien les autres langues. -Si vous avez besoin d'étiqueter les occurrences pour vos propres projets, il est possible de télécharger des analyseurs extérieurs, d'obtenir des outils séparés comme [Tree Tagger](http://www.cis.uni-muenchen.de/~schmid/tools/TreeTagger/), ou même d'entraîner vos propres modèles d'étiquetage, mais ces techniques sont hors du cadre de ce tutoriel. +Si vous avez besoin d'étiqueter les occurrences pour vos propres projets, il est possible de télécharger des analyseurs extérieurs, d'obtenir des outils séparés comme [Tree Tagger](https://www.cis.uni-muenchen.de/~schmid/tools/TreeTagger/), ou même d'entraîner vos propres modèles d'étiquetage, mais ces techniques sont hors du cadre de ce tutoriel. # Troisième test stylométrique : la méthode du Delta de John Burrows (Concepts avancés) @@ -631,7 +631,7 @@ La première itération de ce projet a été développée dans le cadre des sém [^16]: John Burrows, "'Delta': a Measure of Stylistic Difference and a Guide to Likely Authorship", _Literary and Linguistic Computing_, vol. 17, no. 3 (2002), p. 267-287. -[^17]: José Calvo Tello, “Entendiendo Delta desde las Humanidades,” _Caracteres_, 27 mai 2016, http://revistacaracteres.net/revista/vol5n1mayo2016/entendiendo-delta/. +[^17]: José Calvo Tello, “Entendiendo Delta desde las Humanidades,” _Caracteres_, 27 mai 2016, https://revistacaracteres.net/revista/vol5n1mayo2016/entendiendo-delta/. [^18]: Stefan Evert et al., "Understanding and explaining Delta measures for authorship attribution", _Digital Scholarship in the Humanities_, vol. 32, no. suppl_2 (2017), p. ii4-ii16. diff --git a/fr/lecons/introduction-aux-carnets-jupyter-notebooks.md b/fr/lecons/introduction-aux-carnets-jupyter-notebooks.md index d4dc90fce0..dba86e209d 100644 --- a/fr/lecons/introduction-aux-carnets-jupyter-notebooks.md +++ b/fr/lecons/introduction-aux-carnets-jupyter-notebooks.md @@ -417,7 +417,7 @@ Qu'il s'agisse d'expérimenter avec la programmation, de documenter les processu [^2]: Millman, KJ et Fernando Perez. 2014. « Developing open source scientific practice », dans *Implementing Reproducible Research*, édité par Victoria Stodden, Friedrich Leisch et Roger D. Peng. https://osf.io/h9gsd/ -[^3]: Sinclair, Stéfan et Geoffrey Rockwell. 2013. « Voyant Notebooks: Literate Programming and Programming Literacy ». Journal of Digital Humanities, Vol. 2, No. 3 Été 2013. http://journalofdigitalhumanities.org/2-3/voyant-notebooks-literate-programming-and-programming-literacy/ +[^3]: Sinclair, Stéfan et Geoffrey Rockwell. 2013. « Voyant Notebooks: Literate Programming and Programming Literacy ». Journal of Digital Humanities, Vol. 2, No. 3 Été 2013. https://journalofdigitalhumanities.org/2-3/voyant-notebooks-literate-programming-and-programming-literacy/ [^4]: Haley Di Pressi, Stephanie Gorman, Miriam Posner, Raphael Sasayama et Tori Schmitt, avec la collaboration de Roderic Crooks, Megan Driscoll, Amy Earhart, Spencer Keralis, Tiffany Naiman et Todd Presner. « A Student Collaborator's Bill of Rights ». https://humtech.ucla.edu/news/a-student-collaborators-bill-of-rights/ diff --git a/fr/lecons/introduction-et-installation.md b/fr/lecons/introduction-et-installation.md index d3012329a6..b138ef64af 100644 --- a/fr/lecons/introduction-et-installation.md +++ b/fr/lecons/introduction-et-installation.md @@ -42,7 +42,7 @@ Dans cette leçon d'introduction, vous allez installer le [langage de programmat Le langage de programmation Python ------------------------------- -Dans le cadre de cette série de leçons, nous utiliserons Python, un langage de programmation gratuit et à code ouvert. À moins d'indications contraires, nous utiliserons la **version 3** de Python, la version 2 n'étant plus soutenue. Il se pourrait cependant que vous rencontriez du code Python 2 dans des projets ou des tutoriels plus anciens. [Python 3 présente des différences avec ses prédécesseurs](http://sebastianraschka.com/Articles/2014_python_2_3_key_diff.html) - à titre de comparaison, imaginez une langue dont les règles de grammaire évoluent avec le temps. Méfiez-vous donc d'exemples que vous pourriez trouver en ligne utilisant Python 2, car il est possible qu'ils ne fonctionnent pas en Python 3. +Dans le cadre de cette série de leçons, nous utiliserons Python, un langage de programmation gratuit et à code ouvert. À moins d'indications contraires, nous utiliserons la **version 3** de Python, la version 2 n'étant plus soutenue. Il se pourrait cependant que vous rencontriez du code Python 2 dans des projets ou des tutoriels plus anciens. [Python 3 présente des différences avec ses prédécesseurs](https://sebastianraschka.com/Articles/2014_python_2_3_key_diff.html) - à titre de comparaison, imaginez une langue dont les règles de grammaire évoluent avec le temps. Méfiez-vous donc d'exemples que vous pourriez trouver en ligne utilisant Python 2, car il est possible qu'ils ne fonctionnent pas en Python 3. Sauvegardez votre travail! ----------------- @@ -60,11 +60,11 @@ Pour utiliser les techniques présentées ici, vous devrez d'abord télécharger - [Installation de Python pour Windows](/fr/lecons/installation-windows-py) - [Installation de Python pour Linux](/en/lessons/linux-installation) - [langage de programmation Python]: http://www.python.org/ - [l'analyseur HTML / XML Beautiful Soup]: http://www.crummy.com/software/BeautifulSoup/ - [Komodo Edit]: http://www.activestate.com/komodo-edit - [éditeurs compatibles avec Python]: http://wiki.python.org/moin/PythonEditors/ - [Zotero]: http://www.zotero.org/ + [langage de programmation Python]: https://www.python.org/ + [l'analyseur HTML / XML Beautiful Soup]: https://www.crummy.com/software/BeautifulSoup/ + [Komodo Edit]: https://www.activestate.com/komodo-edit + [éditeurs compatibles avec Python]: https://wiki.python.org/moin/PythonEditors/ + [Zotero]: https://www.zotero.org/ [Jungle Disk]: https://www.jungledisk.com/ [Dropbox]: https://www.dropbox.com/ [Affichage des fichiers HTML]: /lessons/viewing-html-files diff --git a/fr/lecons/manipuler-chaines-caracteres-python.md b/fr/lecons/manipuler-chaines-caracteres-python.md index cb61798696..6ab3637b02 100644 --- a/fr/lecons/manipuler-chaines-caracteres-python.md +++ b/fr/lecons/manipuler-chaines-caracteres-python.md @@ -213,7 +213,7 @@ Deux autres séquences d'échappement vous permettent d'imprimer des tabulateurs ``` ## Bibliographie -- Mark Lutz, *[Learning Python](http://www.worldcat.org/oclc/1061273329)* +- Mark Lutz, *[Learning Python](https://www.worldcat.org/oclc/1061273329)* - Ch. 7: Strings *(Chaînes de caractères)* - Ch. 8: Lists and Dictionaries *(Listes et dictionnaires)* - Ch. 10: Introducing Python Statements *(Introduction aux déclarations en Python)* diff --git a/fr/lecons/nettoyer-ses-donnees-avec-openrefine.md b/fr/lecons/nettoyer-ses-donnees-avec-openrefine.md index 518f82f751..930f66c801 100644 --- a/fr/lecons/nettoyer-ses-donnees-avec-openrefine.md +++ b/fr/lecons/nettoyer-ses-donnees-avec-openrefine.md @@ -132,13 +132,13 @@ Une fois vos données nettoyées, vous pouvez passer à l'étape suivante et exp Si vous devez vous souvenir d'une seule chose de ce cours, ce doit être celle-ci : *toutes les données sont sales, mais vous pouvez y faire quelque chose*. Comme nous l'avons montré ici, il y a déjà beaucoup de choses que vous pouvez faire par vous-mêmes pour améliorer la qualité de vos données. Vous avez ainsi appris comment avoir un rapide aperçu du nombre de valeurs vides que contient votre jeu de données et à quelle fréquence une valeur particulière (par exemple un mot-clé) est utilisée dans une collection. Ces cours vous ont également montré comment résoudre des problèmes récurrents tels que les doublons et les incohérences orthographiques de manière automatisée à l'aide d'*OpenRefine*. -[*OpenRefine*]: http://openrefine.org "OpenRefine" +[*OpenRefine*]: https://openrefine.org "OpenRefine" [Powerhouse museum]: https://powerhouse.com.au/ "Powerhouse museum" [*Potter’s Wheel ABC*]: https://perma.cc/Q6QD-E64N "Potter's Wheel ABC " - [*Wrangler*]: http://vis.stanford.edu/papers/wrangler/ "Wrangler" + [*Wrangler*]: https://vis.stanford.edu/papers/wrangler/ "Wrangler" [profilage]: https://fr.wikipedia.org/wiki/Data_profiling [reconnaissance d'entités nommées]: https://fr.wikipedia.org/wiki/Reconnaissance_d%27entit%C3%A9s_nomm%C3%A9es - [Bibliothèque du Congrès]: http://www.loc.gov/index.html "Bibliothèque du Congrès" + [Bibliothèque du Congrès]: https://www.loc.gov/index.html "Bibliothèque du Congrès" [OCLC]: https://www.oclc.org/fr/home.html "OCLC" [site web]: https://api.maas.museum/docs "site web" [licence Creative Commons Attribution - Partage dans les Mêmes Conditions]: https://creativecommons.org/licenses/by-sa/4.0/deed.fr diff --git a/fr/lecons/normaliser-donnees-textuelles-python.md b/fr/lecons/normaliser-donnees-textuelles-python.md index 27e6126bcb..d88365ceb7 100644 --- a/fr/lecons/normaliser-donnees-textuelles-python.md +++ b/fr/lecons/normaliser-donnees-textuelles-python.md @@ -96,7 +96,7 @@ En effet, la fonction `stripTags()` du module `obo.py` retourne une chaine de ca Modifier `html-to-list1.py` pour y appliquer la méthode `lower()` à `obo.stripTags(html)` :
    -Attention : à cause des modifications faites au site du Old Bailey Online depuis la publication de cette leçon, le lien http://www.oldbaileyonline.org/browse.jsp?id=t17800628-33&div=t17800628-33 ne fonctionnera plus dans le code ci-dessous. Vous avez deux options pour contourner le problème . Si vous suivez actuellement cette leçon en utilisant un autre site qui fonctionne, vous pouvez simplement remplacer le lien du Old Bailey Online avec votre propre lien correspondant (en d'autres termes, il suffit de modifier la variable url) : +Attention : à cause des modifications faites au site du Old Bailey Online depuis la publication de cette leçon, le lien https://www.oldbaileyonline.org/browse.jsp?id=t17800628-33&div=t17800628-33 ne fonctionnera plus dans le code ci-dessous. Vous avez deux options pour contourner le problème . Si vous suivez actuellement cette leçon en utilisant un autre site qui fonctionne, vous pouvez simplement remplacer le lien du Old Bailey Online avec votre propre lien correspondant (en d'autres termes, il suffit de modifier la variable url) :
    ``` python @@ -193,11 +193,11 @@ wordlist = obo.stripNonAlphaNum(text) print(wordlist) ``` -En exécutant le programme et en regardant ce qu'il en ressort dans le panneau `Command Output`, vous verrez qu'il fait plutôt du bon travail. Ce code sépare les mots composés avec un trait d'union comme _coach-wheels_ en deux mots, et compte le possessif anglais _'s_ ou la forme _o'clock_ comme des mots distincts, en retirant l'apostrophe. Il s'agit cependant d'une approximation satisfaisante de ce que nous voulions obtenir, et nous pouvons continuer d'avancer vers nos mesures de fréquences avant d'essayer de l'améliorer. (Si les sources sur lesquelles vous travaillez sont dans plus d'une langue, vous aurez besoin d'en apprendre plus sur le standard [Unicode](https://home.unicode.org/) et sur sa [prise en charge Python](https://web.archive.org/web/20180502053841/http://www.diveintopython.net/xml_processing/unicode.html).) +En exécutant le programme et en regardant ce qu'il en ressort dans le panneau `Command Output`, vous verrez qu'il fait plutôt du bon travail. Ce code sépare les mots composés avec un trait d'union comme _coach-wheels_ en deux mots, et compte le possessif anglais _'s_ ou la forme _o'clock_ comme des mots distincts, en retirant l'apostrophe. Il s'agit cependant d'une approximation satisfaisante de ce que nous voulions obtenir, et nous pouvons continuer d'avancer vers nos mesures de fréquences avant d'essayer de l'améliorer. (Si les sources sur lesquelles vous travaillez sont dans plus d'une langue, vous aurez besoin d'en apprendre plus sur le standard [Unicode](https://home.unicode.org/) et sur sa [prise en charge Python](https://web.archive.org/web/20180502053841/https://www.diveintopython.net/xml_processing/unicode.html).) ## Pour aller plus loin -Si vous souhaitez pratiquer davantage les expressions régulières, le chapitre 7 de [Dive into Python](https://web.archive.org/web/20180416143856/http://www.diveintopython.net/regular_expressions/index.html) de Mark Pilgrim peut être un bon entrainement. +Si vous souhaitez pratiquer davantage les expressions régulières, le chapitre 7 de [Dive into Python](https://web.archive.org/web/20180416143856/https://www.diveintopython.net/regular_expressions/index.html) de Mark Pilgrim peut être un bon entrainement. ### Synchronisation du code diff --git a/fr/lecons/preserver-ses-donnees-de-recherche.md b/fr/lecons/preserver-ses-donnees-de-recherche.md index 00040bc759..e5024b35e7 100644 --- a/fr/lecons/preserver-ses-donnees-de-recherche.md +++ b/fr/lecons/preserver-ses-donnees-de-recherche.md @@ -89,7 +89,7 @@ Le moment de la documentation dépend beaucoup de l'individu et du rythme de ses Les données de la recherche et la documentation devraient dans l'idéal être sauvegardées dans des formats ouverts, qui sont [compatibles avec tous les systèmes d'exploitation][], comme .txt pour les notes et .csv (comma-separated values) ou .tsv (tab-seperated values) pour les données tabulées. Ces formats texte brut sont préférables aux formats propriétaires utilisés par défaut avec Microsoft Office ou iWork parce qu'ils peuvent être ouverts avec de nombreux logiciels et ont une forte chance de rester lisibles et modifiables dans le futur. La plupart des suites bureautiques standards incluent une option permettant de sauvegarder les fichiers dans les formats .txt, .csv et .tsv, ce qui signifie que vous pouvez continuer à travailler avec vos logiciels habituels tout en faisant ce qu'il faut pour préserver votre travail. Comparés à du .doc ou du .xls, ces formats ont en plus l'atout, dans une perspective de préservation, de ne contenir que des éléments lisibles par la machine. Bien que l'utilisation des caractères gras, italiques ou colorés pour signifier des titres ou établir des connections visuelles entre des données soit une pratique courante, ces annotations servent à l'affichage sans être lisibles par les machines et ne peuvent pas être interrogées ni fouillées. Elles ne sont pas non plus appropriées pour les grandes quantités d'informations. Il est préférable d'utiliser des schémas de notations simples comme des doubles astérisques ou des triples croisillons pour représenter des caractéristiques: dans mes notes, par exemple, trois points d'interrogations indiquent un point que je dois suivre, j'ai choisi "???' car cette suite peut être facilement trouvée avec une recherche CTRL+F. -Dans de nombreuses occasions, il est probable que ces schémas de notation émergent de la pratique individuelle (et doivent par conséquent être documentés), alors qu'il existe des schémas comme le [Markdown][] de [GitHub](https://github.com) (les fichiers Markdown sont enregistrés au format .md). Un excellent aide-mémoire au Markdown est disponible sur [GitHub](https://github.com/adam-p/markdown-here) pour ceux qui veulent suivre - ou adapter - le schéma existant. [Notepad++](http://notepad-plus-plus.org) est recommandé pour les utilisateurs de Windows, bien que nullement essentiel pour travailler avec des fichiers .md. Les utilisateurs de Mac ou d'Unix peuvent utiliser [Komodo Edit](https://www.activestate.com/products/komodo-edit/) ou [Text Wrangler](https://www.barebones.com/support/textwrangler/updates.html). +Dans de nombreuses occasions, il est probable que ces schémas de notation émergent de la pratique individuelle (et doivent par conséquent être documentés), alors qu'il existe des schémas comme le [Markdown][] de [GitHub](https://github.com) (les fichiers Markdown sont enregistrés au format .md). Un excellent aide-mémoire au Markdown est disponible sur [GitHub](https://github.com/adam-p/markdown-here) pour ceux qui veulent suivre - ou adapter - le schéma existant. [Notepad++](https://notepad-plus-plus.org) est recommandé pour les utilisateurs de Windows, bien que nullement essentiel pour travailler avec des fichiers .md. Les utilisateurs de Mac ou d'Unix peuvent utiliser [Komodo Edit](https://www.activestate.com/products/komodo-edit/) ou [Text Wrangler](https://www.barebones.com/support/textwrangler/updates.html). * * * * * @@ -117,22 +117,22 @@ L'examen des URL est un bon moyen de réfléchir à la raison pour laquelle la s Les URL utilisés par les sites d’informations ou les blogues en sont un exemple typique. Les URL Wordpress suivent le format suivant : - *nom du site Web*/*année (4 chiffres)*/*mois (2 chiffres)*/*jour (2 chiffres)*/*mots du titre séparés par des traits d'union* -- +- Un usage similaire est utilisé par les agences de presse telles que le journal *The Guardian* : - *nom du site Web*/*section de section*/*année (4 chiffres)*/*mois (3 caractères)*/*jour (2 chiffres)*/*mots-descripteurs-contenus-séparés-par-tirets* -- +- De leur côté, les catalogues d’archives utilisent souvent des URL structurées avec un seul élément de données. La British Cartoon Archive structure ses archives en ligne en utilisant le format : - *nom du site*/record/*numéro de référence* -- +- Et l'Old Bailey (la Haute Cour criminelle britannique) utilise le format : - *nom du site*/browse.jsp?ref=*numéro de référence* -- +- Ce que nous apprenons de ces exemples, c’est qu’une combinaison de description parlante et d’éléments de données rend les structures de données cohérentes et intuitives, lisibles à la fois par les humains et par les machines. Appliqué aux données numériques accumulées au cours de recherches historiques, cela facilite la navigation, la recherche et l'interrogation des données de recherche à l'aide des outils standard fournis par nos systèmes d'exploitation (et, comme nous le verrons dans une prochaine leçon, d'outils plus perfectionnés). @@ -245,11 +245,11 @@ blog (17 octobre 2013) Hitchcock, Tim, 'Judging a book by its URLs', Historyonics blog (3 janvier 2014) - + Howard, Sharon, 'Unclean, unclean! What historians can do about sharing our messy research data', Early Modern Notes blog (18 mai 2013) - + Noble, William Stafford, A Quick Guide to Organizing Computational Biology Projects.PLoSComputBiol 5(7): e1000424 (2009) @@ -262,7 +262,7 @@ Information Management: Organising Humanities Material' (2011) Pennock, Maureen, 'The Twelve Principles of Digital Preservation (and a cartridge in a repository…)', British Library Collection Care blog (3 septembre 2013) - + Pritchard, Adam, 'Markdown Cheatsheet' (2013) @@ -271,12 +271,12 @@ Rosenzweig, Roy, 'Scarcity or Abundance? Preserving the Past in a Digital Era', The American Historical Review 108:3 (2003), 735-762. UK Data Archive, 'Documenting your Data' - + [PRINCE2]: https://fr.wikipedia.org/wiki/PRINCE2 [compatibles avec tous les systèmes d'exploitation]: https://fr.wikipedia.org/wiki/Logiciel_multiplateforme [Markdown]: https://fr.wikipedia.org/wiki/Markdown [GitHub] : https://github.com/adam-p/markdown-here - [Notepad++] : http://notepad-plus-plus.org/fr/ + [Notepad++] : https://notepad-plus-plus.org/fr/ [Komodo Edit]: https://www.activestate.com/products/komodo-edit/ [Text Wrangler]: https://www.barebones.com/support/textwrangler/updates.html diff --git a/fr/lecons/redaction-durable-avec-pandoc-et-markdown.md b/fr/lecons/redaction-durable-avec-pandoc-et-markdown.md index 4ecca67bdb..6ec8c631f6 100644 --- a/fr/lecons/redaction-durable-avec-pandoc-et-markdown.md +++ b/fr/lecons/redaction-durable-avec-pandoc-et-markdown.md @@ -57,7 +57,7 @@ C'est ici qu'excelle Markdown. Markdown est une syntaxe qui permet le marquage s Écrire ce cette façon libère l'auteur(e) de son outil. Vous pouvez écrire en Markdown dans n'importe quel éditeur de texte brut, et la syntaxe dispose d'un riche écosystème de logiciels qui peuvent transformer ces textes en de magnifiques documents. C'est pour cette raison que Markdown connaît actuellement une hausse de popularité, non seulement comme outil de rédaction d'articles scientifiques, mais aussi comme norme pour l'édition en général. -[Atom](https://atom.io/) (disponible sur toutes les plateformes) et [Notepad++](http://notepad-plus-plus.org) (Windows seulement) sont parmi les éditeurs de texte brut tout usage les plus populaires. +[Atom](https://atom.io/) (disponible sur toutes les plateformes) et [Notepad++](https://notepad-plus-plus.org) (Windows seulement) sont parmi les éditeurs de texte brut tout usage les plus populaires. Il est important de comprendre que Markdown n'est qu'une convention d'écriture. Les fichiers Markdown sont enregistrés en texte brut, ce qui contribue d'autant plus à la flexibilité de ce format. Les fichiers en texte brut existent depuis l'apparition des machines à écrire électroniques. La longévité de cette norme fait du texte brut un format intrinsèquement plus durable et plus stable que les formats propriétaires. Alors que des fichiers créés avec Microsoft Word et Apple Pages il y a à peine dix ans peuvent provoquer des difficultés importantes lorsqu'ils sont affichés dans une version plus récente de ces logiciels, il est encore possible aujourd'hui d'afficher sans problème un fichier créé avec l'un des nombreux éditeurs de texte brut "disparus" depuis quelques décennies : AlphaPlus, Perfect Writer, Text Wizard, Spellbinder, WordStar, ou le préféré d'Isaac Asimov, SCRIPSIT 2.0, créé par la chaîne de magasins d'électronique Radio Shack. Écrire en texte brut permettra à vos fichiers d'être encore lisibles dans 10, 15 ou 20 ans. Cette leçon propose un processus de rédaction qui libère les chercheurs et chercheuses des logiciels de traitement de texte propriétaires et des formats non durables. @@ -315,7 +315,7 @@ Le filtre "citeproc" analysera toutes les clés de citation trouvées dans votre ## Changer de style de citation -Le style de citation par défaut de Pandoc est le Chicago Auteur-Date. On peut spécifier un style différent en utilisant une feuille de style écrite en "Citation Style Language" (une autre convention au format texte, qui décrit les styles de citation) et désignée par l'extension de fichier .csl. Heureusement, le projet CSL maintient un dépôt de styles de citation communs, certains étant même adaptés à des revues spécifiques. Visitez le site pour trouver le fichier .csl de la Modern Language Association (MLA), téléchargez `modern-language-association.csl`, et sauvegardez-le dans le répertoire de votre projet sous le nom `mla.csl`. Maintenant, nous devons signaler à Pandoc d'utiliser la feuille de style MLA au lieu du style par défaut, Chicago. Nous faisons ceci en mettant à jour l'en-tête YAML : +Le style de citation par défaut de Pandoc est le Chicago Auteur-Date. On peut spécifier un style différent en utilisant une feuille de style écrite en "Citation Style Language" (une autre convention au format texte, qui décrit les styles de citation) et désignée par l'extension de fichier .csl. Heureusement, le projet CSL maintient un dépôt de styles de citation communs, certains étant même adaptés à des revues spécifiques. Visitez le site pour trouver le fichier .csl de la Modern Language Association (MLA), téléchargez `modern-language-association.csl`, et sauvegardez-le dans le répertoire de votre projet sous le nom `mla.csl`. Maintenant, nous devons signaler à Pandoc d'utiliser la feuille de style MLA au lieu du style par défaut, Chicago. Nous faisons ceci en mettant à jour l'en-tête YAML : ``` --- @@ -354,13 +354,13 @@ Considérez vos fichiers sources comme une version faisant autorité de votre te ### En anglais : -En cas de problème, il n'y a pas de meilleur endroit pour commencer votre recherche que [le site web de Pandoc](https://pandoc.org/) et sa [liste de discussion](https://groups.google.com/g/pandoc-discuss). Des sites de type "Questions-réponses" peuvent répertorier des questions sur Pandoc, tel [Stack Overflow](http://stackoverflow.com/questions/tagged/pandoc); vous pouvez aussi consulter les archives du site [Digital Humanities Q&A](https://dhanswers.ach.org/) qui était actif de 2010 à 2019. Les questions peuvent également être posées en direct, sur la chaîne Pandoc de Freenode IRC, qui est fréquentée par un groupe d'habitué(e)s plutôt sympathiques. Au fur et à mesure que vous en apprendrez davantage sur Pandoc, vous pouvez également explorer l'une de ses fonctionnalités les plus puissantes : les [filtres](https://github.com/jgm/pandoc/wiki/Pandoc-Filters). +En cas de problème, il n'y a pas de meilleur endroit pour commencer votre recherche que [le site web de Pandoc](https://pandoc.org/) et sa [liste de discussion](https://groups.google.com/g/pandoc-discuss). Des sites de type "Questions-réponses" peuvent répertorier des questions sur Pandoc, tel [Stack Overflow](https://stackoverflow.com/questions/tagged/pandoc); vous pouvez aussi consulter les archives du site [Digital Humanities Q&A](https://dhanswers.ach.org/) qui était actif de 2010 à 2019. Les questions peuvent également être posées en direct, sur la chaîne Pandoc de Freenode IRC, qui est fréquentée par un groupe d'habitué(e)s plutôt sympathiques. Au fur et à mesure que vous en apprendrez davantage sur Pandoc, vous pouvez également explorer l'une de ses fonctionnalités les plus puissantes : les [filtres](https://github.com/jgm/pandoc/wiki/Pandoc-Filters). -Bien que nous suggérions de commencer avec un simple éditeur de texte, nombre d'alternatives à MS Word spécifiques à Markdown sont disponibles en ligne, et souvent sans frais (d'après [cette entrée de blogue](http://web.archive.org/web/20140120195538/http://mashable.com/2013/06/24/markdown-tools/) qui date de 2013, il en existait alors plus de 70). Parmi les projets autonomes, nous apprécions particulièrement [Mou](http://mouapp.com/), [Write Monkey](http://writemonkey.com), et [Sublime Text](http://www.sublimetext.com/). Plusieurs plateformes sur le Web ont récemment vu le jour et fournissent des interfaces graphiques élégantes pour l'écriture collaborative et le suivi des versions à l'aide de Markdown. Il s'agit entre autres de [prose.io](http://prose.io), [Authorea](http://www.authorea.com), [Draft](http://www.draftin.com), et [StackEdit](https://stackedit.io). +Bien que nous suggérions de commencer avec un simple éditeur de texte, nombre d'alternatives à MS Word spécifiques à Markdown sont disponibles en ligne, et souvent sans frais (d'après [cette entrée de blogue](https://web.archive.org/web/20140120195538/https://mashable.com/2013/06/24/markdown-tools/) qui date de 2013, il en existait alors plus de 70). Parmi les projets autonomes, nous apprécions particulièrement [Mou](https://mouapp.com/), [Write Monkey](https://writemonkey.com), et [Sublime Text](https://www.sublimetext.com/). Plusieurs plateformes sur le Web ont récemment vu le jour et fournissent des interfaces graphiques élégantes pour l'écriture collaborative et le suivi des versions à l'aide de Markdown. Il s'agit entre autres de [prose.io](https://prose.io), [Authorea](https://www.authorea.com), [Draft](https://www.draftin.com), et [StackEdit](https://stackedit.io). -Cependant, l'écosystème ne se limite pas aux éditeurs de texte. [Gitit](https://github.com/jgm/gitit) et [Ikiwiki](https://github.com/dubiousjim/pandoc-iki) supportent la rédaction en Markdown avec Pandoc comme analyseur de syntaxe. À cette liste, nous pouvons ajouter une gamme d'outils qui génèrent des pages web rapides et statiques, [Yst](https://github.com/jgm/yst), [Jekyll](http://github.com/fauno/jekyll-pandoc-multiple-formats), [Hakyll](http://jaspervdj.be/hakyll/), et [bash shell script](https://github.com/wcaleb/website), un projet de l'historien Caleb McDaniel. +Cependant, l'écosystème ne se limite pas aux éditeurs de texte. [Gitit](https://github.com/jgm/gitit) et [Ikiwiki](https://github.com/dubiousjim/pandoc-iki) supportent la rédaction en Markdown avec Pandoc comme analyseur de syntaxe. À cette liste, nous pouvons ajouter une gamme d'outils qui génèrent des pages web rapides et statiques, [Yst](https://github.com/jgm/yst), [Jekyll](https://github.com/fauno/jekyll-pandoc-multiple-formats), [Hakyll](https://jaspervdj.be/hakyll/), et [bash shell script](https://github.com/wcaleb/website), un projet de l'historien Caleb McDaniel. -Enfin, des plates-formes d'édition entières se développent autour de l'utilisation de Markdown. Un marché de l'édition en Markdown, comme le fait déjà [Leanpub](https://leanpub.com), pourrait être une alternative intéressante au modèle d'édition traditionnel. Nous-mêmes expérimentons avec la conception de revues universitaires basées sur GitHub et [readthedocs.org](http://readthedocs.org) (ces outils sont habituellement utilisés pour la documentation technique). +Enfin, des plates-formes d'édition entières se développent autour de l'utilisation de Markdown. Un marché de l'édition en Markdown, comme le fait déjà [Leanpub](https://leanpub.com), pourrait être une alternative intéressante au modèle d'édition traditionnel. Nous-mêmes expérimentons avec la conception de revues universitaires basées sur GitHub et [readthedocs.org](https://readthedocs.org) (ces outils sont habituellement utilisés pour la documentation technique). ### En français (N.D.L.R. : il s’agit de notes ajoutées à la version traduite) : @@ -378,7 +378,7 @@ Pour la gestion des bibliographies, consulter aussi: Raphaël Grolimund, Frédé [^2]: Les documents d'origine peuvent être [téléchargés à partir de GitHub](https://github.com/dhcolumbia/pandoc-workflow). Utilisez l'option "Raw" (brut) lors de la visualisation dans GitHub pour voir la source en Markdown. Les auteurs tiennent à remercier Alex Gil et ses collègues du Digital Humanities Center de Columbia, ainsi que les participants du studio openLab de la Bibliothèque Butler, qui ont testé le code de ce tutoriel sur diverses plateformes. -[^3]: Voir l'excellente réflexion de Charlie Stross sur ce sujet: [Why Microsoft Word Must Die](http://www.antipope.org/charlie/blog-static/2013/10/why-microsoft-word-must-die.html). +[^3]: Voir l'excellente réflexion de Charlie Stross sur ce sujet: [Why Microsoft Word Must Die](https://www.antipope.org/charlie/blog-static/2013/10/why-microsoft-word-must-die.html). [^4]: Il n'y a pas de bonnes solutions pour passer directement de LaTeX à MS Word. diff --git a/fr/lecons/telecharger-des-pages-web-avec-python.md b/fr/lecons/telecharger-des-pages-web-avec-python.md index c7c2e9beef..c0bc39b338 100644 --- a/fr/lecons/telecharger-des-pages-web-avec-python.md +++ b/fr/lecons/telecharger-des-pages-web-avec-python.md @@ -55,7 +55,7 @@ Voyons quelques exemples. http://oldbaileyonline.org ``` -Le type d'URL le plus élémentaire se contente de spécifier le protocole et l'hôte. Si vous fournissez cet URL à votre navigateur, vous obtiendrez la page d'accueil du site [Old Bailey Online](http://www.oldbaileyonline.org/). Par défaut, on assume que la page principale dans un répertoire donné est nommée 'index', le plus souvent `index.html`. +Le type d'URL le plus élémentaire se contente de spécifier le protocole et l'hôte. Si vous fournissez cet URL à votre navigateur, vous obtiendrez la page d'accueil du site [Old Bailey Online](https://www.oldbaileyonline.org/). Par défaut, on assume que la page principale dans un répertoire donné est nommée 'index', le plus souvent `index.html`. L'URL peut aussi inclure un *numéro de port* (optionnel). Sans entrer dans les détails, le protocole de communication qui gouverne les échanges d'information sur Internet permet aux ordinateurs de connecter de multiples façons. Les numéros de ports servent à identifier ces différentes manières de se connecter. Puisque le port par défaut pour les connexions HTTP est le 80, l'URL suivant est équivalent au précédent : @@ -95,7 +95,7 @@ En étudiant la structure de l'URL, il est possible d'apprendre plusieurs choses {% include figure.html filename="bowsey-trial-page.png" caption="Page de la transcription du procès de Benjamin Bowsey, 1780" %} -Examinez la page du procès de Benjamin Bowsey pendant quelques minutes. Concentrez-vous sur les caractéristiques de la page plutôt que sur la transcription elle-même. Par exemple, notez la présence du lien [View as XML](http://www.oldbaileyonline.org/browse.jsp?foo=bar&path=sessionsPapers/17800628.xml&div=t17800628-33&xml=yes) au bas de la page, qui vous amènera vers une version abondamment balisée du texte qui pourrait être utile pour certains types de recherche. Vous pouvez aussi consulter une [image numérisée du document d'origine](http://www.oldbaileyonline.org/images.jsp?doc=178006280084) qui a été transcrit pour construire cette ressource. +Examinez la page du procès de Benjamin Bowsey pendant quelques minutes. Concentrez-vous sur les caractéristiques de la page plutôt que sur la transcription elle-même. Par exemple, notez la présence du lien [View as XML](https://www.oldbaileyonline.org/browse.jsp?foo=bar&path=sessionsPapers/17800628.xml&div=t17800628-33&xml=yes) au bas de la page, qui vous amènera vers une version abondamment balisée du texte qui pourrait être utile pour certains types de recherche. Vous pouvez aussi consulter une [image numérisée du document d'origine](https://www.oldbaileyonline.org/images.jsp?doc=178006280084) qui a été transcrit pour construire cette ressource. Essayons maintenant d'ouvrir cette page en Python. Copiez le programme suivant dans votre éditeur de texte et sauvegardez-le sous le titre `open-webpage.py`. Lorsque vous exécuterez le programme, il ouvrira (`open`) la page du procès, il lira (`read`) son contenu dans une chaîne de caractères Python nommée contenu_web, puis il affichera les 300 premiers caractères du fichier à l'écran. Utilisez la commande `Outils -> Développement web -> Code source de la page` de Firefox (ou son équivalent dans votre propre navigateur) pour vérifier que le code source HTML de la page est bien identique à ce que vous venez de télécharger. Notez aussi que chaque navigateur possède son propre raccourci clavier qui permet d'accéder au code source HTML d'une page ; dans le cas de la version Windows de Firefox, il s'agit de `CTRL+u`. Si vous ne parvenez pas à trouver l'équivalent pour votre propre navigateur, essayez de faire appel à votre moteur de recherche favori pour y arriver. (Consultez la documentation de Python pour en savoir plus au sujet de [urllib](https://docs.python.org/fr/3/library/urllib.html?highlight=urllib).) diff --git a/fr/lecons/transcription-automatisee-graphies-non-latines.md b/fr/lecons/transcription-automatisee-graphies-non-latines.md index d02f331793..54693a5ba9 100644 --- a/fr/lecons/transcription-automatisee-graphies-non-latines.md +++ b/fr/lecons/transcription-automatisee-graphies-non-latines.md @@ -436,7 +436,7 @@ Une approche par *baselines* (en rouge sur la figure 10, il s'agit de la li ```xml - + Calfa 2022-08-23T14:48:18+00:00 @@ -838,7 +838,7 @@ Les données générées pour cet article et dans le cadre du projet CGPG sont d [^38]: *Ibid.* -[^39]: Bastien Kindt et Vidal-Gorène Chahan, « From Manuscript to Tagged Corpora. An Automated Process for Ancient Armenian or Other Under-Resourced Languages of the Christian East ». *Armeniaca. International Journal of Armenian Studies* 1, 73-96, 2022. [http://doi.org/10.30687/arm/9372-8175/2022/01/005]( http://doi.org/10.30687/arm/9372-8175/2022/01/005) +[^39]: Bastien Kindt et Vidal-Gorène Chahan, « From Manuscript to Tagged Corpora. An Automated Process for Ancient Armenian or Other Under-Resourced Languages of the Christian East ». *Armeniaca. International Journal of Armenian Studies* 1, 73-96, 2022. [https://doi.org/10.30687/arm/9372-8175/2022/01/005]( https://doi.org/10.30687/arm/9372-8175/2022/01/005) [^40]: Vidal-Gorène, Lucas, Salah, Decours-Perez, et Dupin. « RASAM–A Dataset for the Recognition and Analysis of Scripts in Arabic Maghrebi », 265-281. diff --git a/fr/nos-soutiens.md b/fr/nos-soutiens.md index e373ddb04c..2053957ab4 100644 --- a/fr/nos-soutiens.md +++ b/fr/nos-soutiens.md @@ -29,7 +29,7 @@ Les institutions suivantes font partie de notre programme de [Partenariat instit - [Cambridge Digital Humanities](https://www.cdh.cam.ac.uk/), Royaume-Uni - [Georg-August-Universität Göttingen](https://www.uni-goettingen.de/), Allemagne - [MIT Libraries](https://libraries.mit.edu/), États-Unis -- [Center for Digital Research in the Humanities, University of Nebraska-Lincoln](http://cdrh.unl.edu/), États-Unis +- [Center for Digital Research in the Humanities, University of Nebraska-Lincoln](https://cdrh.unl.edu/), États-Unis - [The National Archives](https://www.nationalarchives.gov.uk/), Royaume-Uni - [College of the Liberal Arts, Penn State University](https://la.psu.edu/), États-Unis - [Purdue University](https://www.purdue.edu/), États-Unis diff --git a/fr/pi.md b/fr/pi.md index 8c5cdd2f13..4776d5e10f 100644 --- a/fr/pi.md +++ b/fr/pi.md @@ -54,7 +54,7 @@ Joindre le programme de Partenariat institutionnel vous donnera accès aux avant
    - + diff --git a/fr/recherche.md b/fr/recherche.md index fd80bc0313..f710f3375c 100644 --- a/fr/recherche.md +++ b/fr/recherche.md @@ -10,17 +10,17 @@ L'équipe du projet et les membres de la communauté plus large qui la compose s ## Édition originale du Programming Historian -* William J. Turkel et Alan MacEachern, [_The Programming Historian_](http://niche-canada.org/wp-content/uploads/2013/09/programming-historian-1.pdf) 1ère édition (Network in Canadian History & Environment: 2007-2008). +* William J. Turkel et Alan MacEachern, [_The Programming Historian_](https://niche-canada.org/wp-content/uploads/2013/09/programming-historian-1.pdf) 1ère édition (Network in Canadian History & Environment: 2007-2008). * Traduction en japonais de William J. Turkel et Alan MacEachern, [_The Programming Historian_](https://www.dh.ku-orcas.kansai-u.ac.jp/?cat=2) 1ère édition (Network in Canadian History & Environment: 2007-2008). ## Comptes-rendus -* Björn Ekström, Elisa Tattersall Wallin and Hana Marčetić, '[_Programming Historian_: Novice-friendly tutorials on digital methods](http://www.diva-portal.org/smash/record.jsf?pid=diva2%3A1508542&dswid=7551)', _Tidskrift för ABM_, Vol. 5, no 1 (2020), pp. 71-75. +* Björn Ekström, Elisa Tattersall Wallin and Hana Marčetić, '[_Programming Historian_: Novice-friendly tutorials on digital methods](https://www.diva-portal.org/smash/record.jsf?pid=diva2%3A1508542&dswid=7551)', _Tidskrift för ABM_, Vol. 5, no 1 (2020), pp. 71-75. * Dries Daems, '[A Review and Roadmap of Online Learning Platforms and Tutorials in Digital Archaeology](https://doi.org/10.1017/aap.2019.47)', _Advances in Archaeological Practice_, vol. 8, issue 1 (2020), pp. 87-92. * Martin Dröge, '[Review of: The Programming Historian](https://www.hsozkult.de/webreview/id/rezwww-184)', _H-Soz-Kult_ (2019). * Priscila Pilatowsky Goñi, '[Reseña a The programming historian](https://revistas.uned.es/index.php/RHD/article/view/22420)', _Revista de Humanidades Digitales_, vol. 2 (2018). * Lincoln Mullen, '[Review of the Programming Historian](https://academic.oup.com/jah/article-abstract/103/1/299/1751315)', _The Journal of American History_, vol. 103, no. 1 (2016), pp. 299-301. -* Cameron Blevins, '[Review of the Programming Historian](http://jitp.commons.gc.cuny.edu/review-of-the-programming-historian/)', _The Journal of Interactive Technology & Pedagogy_, vol. 8 (2015). +* Cameron Blevins, '[Review of the Programming Historian](https://jitp.commons.gc.cuny.edu/review-of-the-programming-historian/)', _The Journal of Interactive Technology & Pedagogy_, vol. 8 (2015). ## Publications scientifiques @@ -30,22 +30,22 @@ L'équipe du projet et les membres de la communauté plus large qui la compose s * Jennifer Isasi, Riva Quiroga, Nabeel Sidiqqui, Joana Vieira Paulino, Alex Wermer-Colan, [“A Model for Multilingual and Multicultural Digital Scholarship Methods Publishing"](https://www.taylorfrancis.com/chapters/edit/10.4324/9781003393696-3/model-multilingual-multicultural-digital-scholarship-methods-publishing-jennifer-isasi-riva-quiroga-nabeel-siddiqui-joana-vieira-paulino-alex-wermer-colan), dans _Multilingual Digital Humanities_, edité par Viola, L., & Spence, P., Routledge, 2023. * Adam Crymble & Charlotte M. H. Im, ['Measuring digital humanities learning requirements in Spanish & English-speaking practitioner communities'](https://doi.org/10.1007/s42803-023-00066-x), International Journal of Digital Humanities, (2023). * Eric Brasil, '[_pyHDB - Ferramenta Heurística para a Hemeroteca Digital Brasileira: utilizando técnicas de web scraping para a pesquisa em História_'](https://doi.org/10.15848/hh.v15i40.1904), _História Da Historiografia: International Journal of Theory and History of Historiography_, 15(40) (2022), 186–217. -* Matthew Lincoln, Sarah Melton, Jennifer Isasi, François Dominic Laramée, '[Relocating Complexity: The Programming Historian and Multilingual Static Site Generation](http://www.digitalhumanities.org/dhq/vol/16/2/000585/000585.html)', _Digital Humanities Quarterly_ 16, 2 (2022). +* Matthew Lincoln, Sarah Melton, Jennifer Isasi, François Dominic Laramée, '[Relocating Complexity: The Programming Historian and Multilingual Static Site Generation](https://www.digitalhumanities.org/dhq/vol/16/2/000585/000585.html)', _Digital Humanities Quarterly_ 16, 2 (2022). * Jennifer Isasi et Antonio Rojas Castro, ‘[¿Sin equivalencia? Una reflexión sobre la traducción al español de recursos educativos abiertos](https://muse.jhu.edu/article/842253)’, _Hispania_, 104, no. 4 (2021), 613-624. * Adam Crymble et Maria José Afanador Llach, ‘The Globally Unequal Promise of Digital Tools for History: UK and Colombia Case Study’ dans _Teaching History for the Contemporary World_, edité par Adele Nye, 85-98, Springer, 2021. * Daniel Alves, ['Ensinar Humanidades Digitais sem as Humanidades Digitais: um olhar a partir das licenciaturas em História'](https://novaresearch.unl.pt/files/32228034/Ensinar_Humanidades_Digitais.pdf), _Revista EducaOnline_, v. 15, n. 2 (2021). * Adam Crymble, [_Technology & the Historian: Transformations in the Digital Age_](https://www.press.uillinois.edu/books/catalog/57hxp7wr9780252043710.html), (University of Illinois Press, 2021). * Anna-Maria Sichani, James Baker, Maria José Afanador Llach, et Brandon Walsh, [‘Diversity and Inclusion in Digital Scholarship and Pedagogy: The Case of The Programming Historian’](https://doi.org/10.1629/uksg.465), _Insights_, (2019). -* Katrina Navickas et Adam Crymble, ['From Chartist Newspaper to Digital Map of Grass-roots Meetings, 1841-44: Documenting Workflows'](http://www.tandfonline.com/doi/full/10.1080/13555502.2017.1301179), _Journal of Victorian Culture_, (2017). +* Katrina Navickas et Adam Crymble, ['From Chartist Newspaper to Digital Map of Grass-roots Meetings, 1841-44: Documenting Workflows'](https://www.tandfonline.com/doi/full/10.1080/13555502.2017.1301179), _Journal of Victorian Culture_, (2017). * Adam Crymble, ['Identifying and Removing Gender Barriers in Open Learning Communities: The Programming Historian'](https://www.herts.ac.uk/__data/assets/pdf_file/0016/138013/Blip-2016-Autumn-2016-Final-Autumn-2016.pdf), _Blended Learning in Practice_, (2016), 49-60. [[pre-print pdf](/researchpapers/openLearningCommunities2016.pdf)] -* Fred Gibbs, ['Editorial Sustainability and Open Peer Review at Programming Historian',](http://web.archive.org/web/20180713014622/http://dhcommons.org/journal/issue-1/editorial-sustainability-and-open-peer-review-programming-historian) _DH Commons_, Vol. 1 (2015). -* Shawn Graham, Ian Milligan et Scott Weingart, [_Exploring Big Historical Data: The Historian's Macroscope_](http://www.themacroscope.org/2.0/), (Imperial College Press, 2015). +* Fred Gibbs, ['Editorial Sustainability and Open Peer Review at Programming Historian',](https://web.archive.org/web/20180713014622/https://dhcommons.org/journal/issue-1/editorial-sustainability-and-open-peer-review-programming-historian) _DH Commons_, Vol. 1 (2015). +* Shawn Graham, Ian Milligan et Scott Weingart, [_Exploring Big Historical Data: The Historian's Macroscope_](https://www.themacroscope.org/2.0/), (Imperial College Press, 2015). ## Rapports * Maria José Afanador-Llach & Andrés Rivera, '[Segundo ciclo de talleres: Herramientas y procesos digitales para la investigación y creación en artes y humanidades](/researchpapers/Informe_final_Talleres%20EHCN_2023-ENG_PH.pdf)', (2023). * Incllewsion et l'équipe du Programming Historian, 'Initial Accessibility Testing: Summary of Findings', (2021). -* Penny Andrews et l'équipe du Programming Historian, ['The Programming Historian: developing and sustaining impact in the Global South'](http://doi.org/10.5281/zenodo.3813763) (2020). +* Penny Andrews et l'équipe du Programming Historian, ['The Programming Historian: developing and sustaining impact in the Global South'](https://doi.org/10.5281/zenodo.3813763) (2020). * Amy Kavanagh et l'équipe du Programming Historian, 'Programming Historian – Access for visually impaired researchers', (n.d.). ## Ateliers et évènements @@ -56,7 +56,7 @@ L'équipe du projet et les membres de la communauté plus large qui la compose s * Alex Wermer-Colan, ['Learning Digital Methods with the _Programming Historian_'](https://charlesstudy.temple.edu/event/11953011), Temple University [En ligne], (22 février 2024). * Carlo Blum, Adam Crymble, Vicky Garnett, Timothée Giraud, Alíz Horváth, Stefan Krebs, Ralph Marschall, Sofia Papastamkou, & Lorella Viola, 'Invisible College of Digital History: Workshop on Multilingual Educational Resources', C²DH [En ligne], (8 novembre 2023). * Nabeel Siddiqui, 'Convolutional Neural Networks for Image Classification', University of Edinburgh [En ligne], (7 novembre 2023). -* Eric Brasil, '[História Digital e História Digital da Educação: Caminhos Cruzados](http://www.iea.usp.br/eventos/historia-digital-educacao-caminhos-cruzados)', Instituto de Estudos Avançados, USP, São Paulo, Brésil, (17 octobre 2023). +* Eric Brasil, '[História Digital e História Digital da Educação: Caminhos Cruzados](https://www.iea.usp.br/eventos/historia-digital-educacao-caminhos-cruzados)', Instituto de Estudos Avançados, USP, São Paulo, Brésil, (17 octobre 2023). * Scott Kleinman, Alex Wermer-Colan, Joana Vieira Paulino, Nabeel Siddiqui, Zoe LeBlanc, 'Developing a Digital Humanities Tutorial', [DH 2023](https://dh2023.adho.org/), Graz, Autriche, (10 juillet 2023). * Daphné Mathelier, 'Atelier Markdown', [11e journées du réseau Medici](https://medici2023.sciencesconf.org/resource/page/id/2), Université de Liège, Belgique, (29 juin 2023). * María José Afanador Llach, Jennifer Isasi, Riva Quiroga, 'Sobre _Programming Historian en español_ y cómo contribuir a la publicación', Semana de Humanidades Digitales 2023 [En ligne], (10 mai 2023). @@ -141,10 +141,10 @@ L'équipe du projet et les membres de la communauté plus large qui la compose s * Adam Crymble, 'Facilitating Making in Digital Humanities', The Archaeology of Making, Université de Londres, Royaume-Uni, 5 mai 2021. * Daniel Alves, Jennifer Isasi, Sarah Melton, Sofia Papastamkou, Jessica Parr, Riva Quiroga, Nabeel Siddiqui, Brandon Walsh, '[The Programming Historian: A Global Case Study in Multilingual Open Access and DH Tutelage/Instruction](https://msuglobaldh.org/abstracts/#programming-historian)' (panel), _Global Digital Humanities Symposium_, Michigan State University, East Lansing, USA, 12 avril, 2021. * Jessica Parr, '[Cambridge Cultural Heritage Data School: Final plenary](https://www.cdh.cam.ac.uk/events/cambridge-cultural-heritage-data-school-final-plenary)', University of Cambridge, Royaume-Uni, 30 mars 2021. -* Jennifer Isasi & Riva Quiroga, ['_Programming Historian_: Un proyecto colaborativo para poner la programación al alcance de los humanistas'](http://ixa2.si.ehu.eus/intele/?q=webinars), _INTELE : INfraestructura de TEcnologías del LEnguaje_, España, 25 mars, 2021. +* Jennifer Isasi & Riva Quiroga, ['_Programming Historian_: Un proyecto colaborativo para poner la programación al alcance de los humanistas'](https://ixa2.si.ehu.eus/intele/?q=webinars), _INTELE : INfraestructura de TEcnologías del LEnguaje_, España, 25 mars, 2021. * Sofia Papastamkou, Jessica Parr & Riva Quiroga, 'Challenges for Digital Literacy in the Humanities: The Open, Community-Based and Multilinguistic Approach of _The Programming Historian_', NewsEye’s International Conference, France, 17 mars, 2021. * Riva Quiroga, ['Multilingual Digital Humanites'](https://mediacentral.ucl.ac.uk/Play/59506), Digital Humanities Long View Seminar, UCLDH, UK & CESTA, USA, 10 mars, 2021. -* Brandon Walsh, '[The Programming Historian and Editorial Process in Digital Publishing](http://walshbr.com/blog/the-programming-historian-and-editorial-process-in-digital-publishing/)', Modern Languages Association Conference 2021, 7-10 janvier 2021. +* Brandon Walsh, '[The Programming Historian and Editorial Process in Digital Publishing](https://walshbr.com/blog/the-programming-historian-and-editorial-process-in-digital-publishing/)', Modern Languages Association Conference 2021, 7-10 janvier 2021. * Sofia Papastamkou, François Dominic Laramée, Martin Grandjean, '[Le Programming Historian en français: quelles ressources éducatives libres pour les méthodes numériques ?](https://zenodo.org/record/3819954)', *Humanistica 2020*, Bordeaux, France, 12-14 mai 2020. * Sofia Papastamkou, 'A Beating Heart of Digital History: The Programming Historian', [Atelier Teaching Digital History](https://cas.au.dk/en/cedhar/events/show/artikel/teaching-digital-history-workshop), Center for Digital History Aarhus, Université d'Aarhus, Danemark, 23 Octobre 2019. * Jennifer Isasi, Maria José Afanador y Antonio Rojas Castro, 'Retos en la producción de tutoriales de HD en contexto hispanohablantes', Conferencia ACH 2019, The Association for Computers and the Humanities, Pittsburgh, 23-26 juillet, 2019, Pittsburgh. @@ -157,7 +157,7 @@ L'équipe du projet et les membres de la communauté plus large qui la compose s * Victor Gayol, 'La investigación del pasado y la historia digital: análisis de datos y cómo aprender (The Programming Historian en español)', _Humanidades Digitales_, IV Feria Internacional de Ciencias Sociales y Humanidades, Centro Universitario de Los Lagos - Universidad de Guadalajara, Lagos de Moreno, Jalisco (9 mars 2017). * Victor Gayol, 'The Programming Historian: 'un modelo colaborativo para la investigación y la ensenñanza en ciencias sociales y humanidades digitales', _Mesa de Trabajo sobre Ciencias Sociales y Humanidades Digitales_, El Colegio De Michoacán, Mexico (21 février 2017). * Adam Crymble, 'Bringing Digital Humanities into the University for Free', Université de Cape Town, Afrique du Sud (27-28 juin 2016). -* Fred Gibbs, 'The Programming Historian' (Poster), _American Historical Association_, New York (janvier 2015). +* Fred Gibbs, 'The Programming Historian' (Poster), _American Historical Association_, New York (janvier 2015). * Adam Crymble, 'The Programming Historian 2', _Digital History Seminar_, Institute of Historical Research, Londres (13 octobre 2013). * Adam Crymble, 'The Programming Historian 2', _Digital Humanities 2012_, Hamburg (juillet 2012). @@ -168,11 +168,11 @@ L'équipe du projet et les membres de la communauté plus large qui la compose s * Matthew Lincoln, 'Multilingual Jekyll: How The Programming Historian Does That', *matthewlincoln.net*, 1er mars 2020, . * Sue Levine, 'The Early-Stage Ph.D.'s Guide to Summer', _Inside Higher Education_, 10 juin 2019, . * 'Championing open access with online digital history journal', _University of Sussex Press Office_, 9 octobre 2018, . -* Adam Crymble, 'A Decade of Programming Historians', _Network in Canadian History & Environment_, 23 mars 2018, . -* Fred Gibbs, "Sustainable Publishing: Reflections of a Former Programming Historian Editor", FredGibbs.net, 2017, . -* Anaclet Pons, "The Programming Historian en español", _Clionauta: Blog de historia_, 14 juin 2017, . +* Adam Crymble, 'A Decade of Programming Historians', _Network in Canadian History & Environment_, 23 mars 2018, . +* Fred Gibbs, "Sustainable Publishing: Reflections of a Former Programming Historian Editor", FredGibbs.net, 2017, . +* Anaclet Pons, "The Programming Historian en español", _Clionauta: Blog de historia_, 14 juin 2017, . * Seth Denbo, “Historian, Program! Self-Help for Digital Neophytes,” _Perspectives on History: The Newsmagazine of the American Historical Association_, mai 2017, . -* Víctor Gayol, '*The Programming Historian* en español', *Blog de Humanidades Digitales*, 17 mars 2017, . +* Víctor Gayol, '*The Programming Historian* en español', *Blog de Humanidades Digitales*, 17 mars 2017, . ## Projets utilisant le Programming Historian diff --git a/htmlproofer-output.txt b/htmlproofer-output.txt index a2f41a477c..d80221f0da 100644 --- a/htmlproofer-output.txt +++ b/htmlproofer-output.txt @@ -2,7 +2,7 @@ Running 3 checks (Images, Links, Scripts) in ["_site"] on *.html files ... -Checking 6252 external links +Checking 6147 external links Checking 5062 internal links Checking internal link hashes in 311 files Ran on 657 files! @@ -212,46 +212,6 @@ For the Links check, the following failures were found: 'a' tag is missing a reference -* At _site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html:198: - - http://markhadley.com is not an HTTPS link - -* At _site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html:199: - - http://www.sheffield.ac.uk/hri/ is not an HTTPS link - -* At _site/assets/mapping-with-python-leaflet/exercises/exercise00 - original/mymap.html:3: - - http://cdn.leafletjs.com/leaflet-0.6.4/leaflet.css is not an HTTPS link - -* At _site/assets/mapping-with-python-leaflet/exercises/exercise01/mymap.html:3: - - http://cdn.leafletjs.com/leaflet-0.6.4/leaflet.css is not an HTTPS link - -* At _site/assets/mapping-with-python-leaflet/exercises/exercise02/mymap.html:3: - - http://cdn.leafletjs.com/leaflet-0.6.4/leaflet.css is not an HTTPS link - -* At _site/assets/mapping-with-python-leaflet/exercises/exercise03/mymap.html:3: - - http://cdn.leafletjs.com/leaflet-0.6.4/leaflet.css is not an HTTPS link - -* At _site/assets/mapping-with-python-leaflet/exercises/exercise04/mymap.html:3: - - http://cdn.leafletjs.com/leaflet-0.6.4/leaflet.css is not an HTTPS link - -* At _site/assets/mapping-with-python-leaflet/exercises/exercise05/mymap.html:3: - - http://cdn.leafletjs.com/leaflet-0.6.4/leaflet.css is not an HTTPS link - -* At _site/assets/mapping-with-python-leaflet/map/mymap-onepage.html:3: - - http://cdn.leafletjs.com/leaflet-0.6.4/leaflet.css is not an HTTPS link - -* At _site/assets/mapping-with-python-leaflet/map/mymap.html:3: - - http://cdn.leafletjs.com/leaflet-0.6.4/leaflet.css is not an HTTPS link - * At _site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html:81: 'a' tag is missing a reference @@ -300,14 +260,6 @@ For the Links check, the following failures were found: 'a' tag is missing a reference -* At _site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html:198: - - http://markhadley.com is not an HTTPS link - -* At _site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html:199: - - http://www.sheffield.ac.uk/hri/ is not an HTTPS link - * At _site/blog/index.html:86: 'a' tag is missing a reference @@ -456,14 +408,6 @@ For the Links check, the following failures were found: 'a' tag is missing a reference -* At _site/blog/page17/index.html:346: - - http://en.wikipedia.org/wiki/Static_web_page is not an HTTPS link - -* At _site/blog/page17/index.html:346: - - http://pages.github.com is not an HTTPS link - * At _site/blog/page2/index.html:86: 'a' tag is missing a reference @@ -608,26 +552,6 @@ For the Links check, the following failures were found: 'a' tag is missing a reference -* At _site/en/about.html:264: - - http://dhawards.org/dhawards2016/results/ is not an HTTPS link - -* At _site/en/about.html:264: - - http://dhawards.org/dhawards2017/results/ is not an HTTPS link - -* At _site/en/about.html:264: - - http://humanidadesdigitaleshispanicas.es/ is not an HTTPS link - -* At _site/en/about.html:264: - - http://dhawards.org/dhawards2022/results/ is not an HTTPS link - -* At _site/en/about.html:278: - - http://digitalhistoryhacks.blogspot.com/2008/01/programming-historian.html is not an HTTPS link - * At _site/en/author-guidelines.html:86: 'a' tag is missing a reference @@ -660,14 +584,6 @@ For the Links check, the following failures were found: 'a' tag is missing a reference -* At _site/en/contribute.html:298: - - http://www.worldcat.org/title/programming-historian/oclc/951537099 is not an HTTPS link - -* At _site/en/contribute.html:300: - - http://purdue-primo-prod.hosted.exlibrisgroup.com/primo_library/libweb/action/dlDisplay.do?vid=PURDUE&search_scope=everything&docId=PURDUE_ALMA51671812890001081&fn=permalink is not an HTTPS link - * At _site/en/editor-guidelines.html:86: 'a' tag is missing a reference @@ -684,10 +600,6 @@ For the Links check, the following failures were found: 'a' tag is missing a reference -* At _site/en/editor-guidelines.html:587: - - http://www.loc.gov/maps/collections is not an HTTPS link - * At _site/en/events.html:86: 'a' tag is missing a reference @@ -816,26 +728,6 @@ For the Links check, the following failures were found: 'a' tag is missing a reference -* At _site/en/lessons/analyzing-documents-with-tfidf.html:1478: - - http://jonathanstray.com/a-full-text-visualization-of-the-iraq-war-logs is not an HTTPS link - -* At _site/en/lessons/analyzing-documents-with-tfidf.html:1612: - - http://scikit-learn.org/stable/install.html is not an HTTPS link - -* At _site/en/lessons/analyzing-documents-with-tfidf.html:1642: - - http://dhdebates.gc.cuny.edu/debates/text/99 is not an HTTPS link - -* At _site/en/lessons/analyzing-documents-with-tfidf.html:1651: - - http://jonathanstray.com/a-full-text-visualization-of-the-iraq-war-logs is not an HTTPS link - -* At _site/en/lessons/analyzing-documents-with-tfidf.html:1666: - - http://journalofdigitalhumanities.org/2-1/words-alone-by-benjamin-m-schmidt/ is not an HTTPS link - * At _site/en/lessons/analyzing-multilingual-text-nltk-spacy-stanza.html:117: 'a' tag is missing a reference @@ -868,74 +760,6 @@ For the Links check, the following failures were found: 'a' tag is missing a reference -* At _site/en/lessons/applied-archival-downloading-with-wget.html:471: - - http://www.activehistory.ca is not an HTTPS link - -* At _site/en/lessons/applied-archival-downloading-with-wget.html:480: - - http://chronicle.com/blogs/profhacker/download-a-sequential-range-of-urls-with-curl/41055 is not an HTTPS link - -* At _site/en/lessons/applied-archival-downloading-with-wget.html:511: - - http://data2.archives.ca/e/e061/e001518029.jpg is not an HTTPS link - -* At _site/en/lessons/applied-archival-downloading-with-wget.html:512: - - http://data2.archives.ca/e/e061/e001518109.jpg is not an HTTPS link - -* At _site/en/lessons/applied-archival-downloading-with-wget.html:526: - - http://en.wikipedia.org/wiki/Leading_zero is not an HTTPS link - -* At _site/en/lessons/applied-archival-downloading-with-wget.html:627: - - http://www.nla.gov.au/apps/cdview/?pi=nla.ms-ms5393-1 is not an HTTPS link - -* At _site/en/lessons/applied-archival-downloading-with-wget.html:631: - - http://nla.gov.au/nla.ms-ms5393-1-s1-v.jpg is not an HTTPS link - -* At _site/en/lessons/applied-archival-downloading-with-wget.html:633: - - http://nla.gov.au/nla.ms-ms5393-1-s127-v.jpg is not an HTTPS link - -* At _site/en/lessons/applied-archival-downloading-with-wget.html:680: - - http://memory.loc.gov/cgi-bin/ampage?collId=mtj1&fileName=mtj1page001.db&recNum=1&itemLink=/ammem/collections/jefferson_papers/mtjser1.html&linkText=6 is not an HTTPS link - -* At _site/en/lessons/applied-archival-downloading-with-wget.html:689: - - http://memory.loc.gov/master/mss/mtj/mtj1/001/0000/ is not an HTTPS link - -* At _site/en/lessons/applied-archival-downloading-with-wget.html:701: - - http://memory.loc.gov/master/mss/mtj/mtj1/001/0000/ is not an HTTPS link - -* At _site/en/lessons/applied-archival-downloading-with-wget.html:703: - - http://memory.loc.gov/master/mss/mtj/mtj1/001/0100/ is not an HTTPS link - -* At _site/en/lessons/applied-archival-downloading-with-wget.html:705: - - http://memory.loc.gov/master/mss/mtj/mtj1/001/0200/ is not an HTTPS link - -* At _site/en/lessons/applied-archival-downloading-with-wget.html:709: - - http://memory.loc.gov/master/mss/mtj/mtj1/001/1400 is not an HTTPS link - -* At _site/en/lessons/applied-archival-downloading-with-wget.html:735: - - http://cushing.med.yale.edu/gsdl/collect/mdposter/ is not an HTTPS link - -* At _site/en/lessons/applied-archival-downloading-with-wget.html:748: - - http://cushing.med.yale.edu/images/mdposter/full/poster0001.jpg is not an HTTPS link - -* At _site/en/lessons/applied-archival-downloading-with-wget.html:753: - - http://cushing.med.yale.edu/images/mdposter/full/poster0637.jpg is not an HTTPS link - * At _site/en/lessons/automated-downloading-with-wget.html:117: 'a' tag is missing a reference @@ -952,46 +776,6 @@ For the Links check, the following failures were found: 'a' tag is missing a reference -* At _site/en/lessons/automated-downloading-with-wget.html:679: - - http://www.gnu.org/software/wget/ is not an HTTPS link - -* At _site/en/lessons/automated-downloading-with-wget.html:680: - - http://ftp.gnu.org/gnu/wget/ is not an HTTPS link - -* At _site/en/lessons/automated-downloading-with-wget.html:743: - - http://www.gnu.org/software/wget/manual/wget.html is not an HTTPS link - -* At _site/en/lessons/automated-downloading-with-wget.html:748: - - http://activehistory.ca/papers/ is not an HTTPS link - -* At _site/en/lessons/automated-downloading-with-wget.html:751: - - http://activehistory.ca/papers/historypaper-9/ is not an HTTPS link - -* At _site/en/lessons/automated-downloading-with-wget.html:798: - - http://activehistory.ca/papers/ is not an HTTPS link - -* At _site/en/lessons/automated-downloading-with-wget.html:824: - - http://activehistory.ca/papers/ is not an HTTPS link - -* At _site/en/lessons/automated-downloading-with-wget.html:825: - - http://activehistory.ca/papers/historypaper-9/ is not an HTTPS link - -* At _site/en/lessons/automated-downloading-with-wget.html:827: - - http://uwo.ca is not an HTTPS link - -* At _site/en/lessons/automated-downloading-with-wget.html:966: - - http://www.gnu.org/software/wget/manual/wget.html is not an HTTPS link - * At _site/en/lessons/basic-text-processing-in-r.html:123: 'a' tag is missing a reference @@ -1008,14 +792,6 @@ For the Links check, the following failures were found: 'a' tag is missing a reference -* At _site/en/lessons/basic-text-processing-in-r.html:1154: - - http://www.presidency.ucsb.edu/sou.php is not an HTTPS link - -* At _site/en/lessons/basic-text-processing-in-r.html:1157: - - http://norvig.com/ngrams/ is not an HTTPS link - * At _site/en/lessons/beginners-guide-to-twitter-data.html:124: 'a' tag is missing a reference @@ -1032,18 +808,6 @@ For the Links check, the following failures were found: 'a' tag is missing a reference -* At _site/en/lessons/beginners-guide-to-twitter-data.html:341: - - http://programminghistorian.github.io/ph-submissions/lessons/beginners-guide-to-twitter-data is not an HTTPS link - -* At _site/en/lessons/beginners-guide-to-twitter-data.html:868: - - http://journalofdigitalhumanities.org/1-1/demystifying-networks-by-scott-weingart/ is not an HTTPS link - -* At _site/en/lessons/beginners-guide-to-twitter-data.html:1145: - - http://hdlab.stanford.edu/palladio/ is not an HTTPS link - * At _site/en/lessons/building-static-sites-with-jekyll-github-pages.html:117: 'a' tag is missing a reference @@ -1060,10 +824,6 @@ For the Links check, the following failures were found: 'a' tag is missing a reference -* At _site/en/lessons/building-static-sites-with-jekyll-github-pages.html:489: - - http://amandavisconti.github.io/JekyllDemo/ is not an HTTPS link - * At _site/en/lessons/building-static-sites-with-jekyll-github-pages.html:496: 'a' tag is missing a reference @@ -1220,18 +980,10 @@ For the Links check, the following failures were found: 'a' tag is missing a reference -* At _site/en/lessons/building-static-sites-with-jekyll-github-pages.html:556: - - http://jekyllrb.com/docs/home/ is not an HTTPS link - * At _site/en/lessons/building-static-sites-with-jekyll-github-pages.html:558: 'a' tag is missing a reference -* At _site/en/lessons/building-static-sites-with-jekyll-github-pages.html:570: - - http://jekyllrb.com/docs/home/ is not an HTTPS link - * At _site/en/lessons/building-static-sites-with-jekyll-github-pages.html:572: 'a' tag is missing a reference @@ -1260,10 +1012,6 @@ For the Links check, the following failures were found: 'a' tag is missing a reference -* At _site/en/lessons/building-static-sites-with-jekyll-github-pages.html:643: - - http://www.barebones.com/products/textwrangler/download.html is not an HTTPS link - * At _site/en/lessons/building-static-sites-with-jekyll-github-pages.html:647: 'a' tag is missing a reference @@ -1280,18 +1028,10 @@ For the Links check, the following failures were found: 'a' tag is missing a reference -* At _site/en/lessons/building-static-sites-with-jekyll-github-pages.html:718: - - http://brew.sh/ is not an HTTPS link - * At _site/en/lessons/building-static-sites-with-jekyll-github-pages.html:778: 'a' tag is missing a reference -* At _site/en/lessons/building-static-sites-with-jekyll-github-pages.html:780: - - http://brew.sh/ is not an HTTPS link - * At _site/en/lessons/building-static-sites-with-jekyll-github-pages.html:803: 'a' tag is missing a reference @@ -1340,42 +1080,18 @@ For the Links check, the following failures were found: 'a' tag is missing a reference -* At _site/en/lessons/building-static-sites-with-jekyll-github-pages.html:1207: - - http://kramdown.gettalong.org/quickref.html is not an HTTPS link - -* At _site/en/lessons/building-static-sites-with-jekyll-github-pages.html:1209: - - http://kramdown.gettalong.org/quickref.html is not an HTTPS link - -* At _site/en/lessons/building-static-sites-with-jekyll-github-pages.html:1211: - - http://www.typora.io/ is not an HTTPS link - * At _site/en/lessons/building-static-sites-with-jekyll-github-pages.html:1213: 'a' tag is missing a reference -* At _site/en/lessons/building-static-sites-with-jekyll-github-pages.html:1247: - - http://amandavisconti.github.io/JekyllDemo/resume/ is not an HTTPS link - * At _site/en/lessons/building-static-sites-with-jekyll-github-pages.html:1249: 'a' tag is missing a reference -* At _site/en/lessons/building-static-sites-with-jekyll-github-pages.html:1325: - - http://raw.githubusercontent.com/amandavisconti/JekyllDemo/gh-pages/_posts/2016-02-29-a-post-about-my-research.markdown is not an HTTPS link - * At _site/en/lessons/building-static-sites-with-jekyll-github-pages.html:1327: 'a' tag is missing a reference -* At _site/en/lessons/building-static-sites-with-jekyll-github-pages.html:1448: - - http://amandavisconti.github.io/JekyllDemo/ is not an HTTPS link - * At _site/en/lessons/building-static-sites-with-jekyll-github-pages.html:1450: 'a' tag is missing a reference @@ -1388,30 +1104,10 @@ For the Links check, the following failures were found: 'a' tag is missing a reference -* At _site/en/lessons/building-static-sites-with-jekyll-github-pages.html:1474: - - http://jekyllthemes.org/ is not an HTTPS link - -* At _site/en/lessons/building-static-sites-with-jekyll-github-pages.html:1475: - - http://jekyllthemes.io/ is not an HTTPS link - * At _site/en/lessons/building-static-sites-with-jekyll-github-pages.html:1478: 'a' tag is missing a reference -* At _site/en/lessons/building-static-sites-with-jekyll-github-pages.html:1482: - - http://jekyllrb.com/docs/plugins/ is not an HTTPS link - -* At _site/en/lessons/building-static-sites-with-jekyll-github-pages.html:1489: - - http://jekyllrb.com/docs/plugins/ is not an HTTPS link - -* At _site/en/lessons/building-static-sites-with-jekyll-github-pages.html:1489: - - http://jekyllrb.com/docs/plugins/ is not an HTTPS link - * At _site/en/lessons/building-static-sites-with-jekyll-github-pages.html:1506: 'a' tag is missing a reference @@ -1424,10 +1120,6 @@ For the Links check, the following failures were found: 'a' tag is missing a reference -* At _site/en/lessons/building-static-sites-with-jekyll-github-pages.html:1531: - - http://stackexchange.com/ is not an HTTPS link - * At _site/en/lessons/building-static-sites-with-jekyll-github-pages.html:1533: 'a' tag is missing a reference @@ -1436,30 +1128,6 @@ For the Links check, the following failures were found: 'a' tag is missing a reference -* At _site/en/lessons/building-static-sites-with-jekyll-github-pages.html:1544: - - http://jekyllrb.com/docs/home/ is not an HTTPS link - -* At _site/en/lessons/building-static-sites-with-jekyll-github-pages.html:1545: - - http://jekyll-windows.juthilo.com/ is not an HTTPS link - -* At _site/en/lessons/building-static-sites-with-jekyll-github-pages.html:1547: - - http://literaturegeek.com/2015/12/08/WhyJekyllGitHub is not an HTTPS link - -* At _site/en/lessons/building-static-sites-with-jekyll-github-pages.html:1548: - - http://chronicle.com/blogs/profhacker/jekyll1/60913 is not an HTTPS link - -* At _site/en/lessons/building-static-sites-with-jekyll-github-pages.html:1550: - - http://ben.balter.com/jekyll-style-guide/ is not an HTTPS link - -* At _site/en/lessons/building-static-sites-with-jekyll-github-pages.html:1551: - - http://prose.io/ is not an HTTPS link - * At _site/en/lessons/calibrating-radiocarbon-dates-r.html:137: 'a' tag is missing a reference @@ -1476,10 +1144,6 @@ For the Links check, the following failures were found: 'a' tag is missing a reference -* At _site/en/lessons/calibrating-radiocarbon-dates-r.html:785: - - http://calib.org is not an HTTPS link - * At _site/en/lessons/cleaning-data-with-openrefine.html:121: 'a' tag is missing a reference @@ -1496,54 +1160,6 @@ For the Links check, the following failures were found: 'a' tag is missing a reference -* At _site/en/lessons/cleaning-data-with-openrefine.html:528: - - http://openrefine.org is not an HTTPS link - -* At _site/en/lessons/cleaning-data-with-openrefine.html:579: - - http://vis.stanford.edu/papers/wrangler/ is not an HTTPS link - -* At _site/en/lessons/cleaning-data-with-openrefine.html:579: - - http://openrefine.org is not an HTTPS link - -* At _site/en/lessons/cleaning-data-with-openrefine.html:584: - - http://en.wikipedia.org/wiki/Data_profiling is not an HTTPS link - -* At _site/en/lessons/cleaning-data-with-openrefine.html:586: - - http://en.wikipedia.org/wiki/Named-entity_recognition is not an HTTPS link - -* At _site/en/lessons/cleaning-data-with-openrefine.html:590: - - http://www.loc.gov/index.html is not an HTTPS link - -* At _site/en/lessons/cleaning-data-with-openrefine.html:590: - - http://www.oclc.org/home.en.html is not an HTTPS link - -* At _site/en/lessons/cleaning-data-with-openrefine.html:608: - - http://creativecommons.org/licenses/by-nc/2.5/au/ is not an HTTPS link - -* At _site/en/lessons/cleaning-data-with-openrefine.html:622: - - http://en.wikipedia.org/wiki/Controlled_vocabulary is not an HTTPS link - -* At _site/en/lessons/cleaning-data-with-openrefine.html:630: - - http://en.wikipedia.org/wiki/Linked_data is not an HTTPS link - -* At _site/en/lessons/cleaning-data-with-openrefine.html:678: - - http://en.wikipedia.org/wiki/Faceted_search is not an HTTPS link - -* At _site/en/lessons/cleaning-data-with-openrefine.html:886: - - http://en.wikipedia.org/wiki/Comma-separated_values is not an HTTPS link - * At _site/en/lessons/cleaning-ocrd-text-with-regular-expressions.html:117: 'a' tag is missing a reference @@ -1560,50 +1176,6 @@ For the Links check, the following failures were found: 'a' tag is missing a reference -* At _site/en/lessons/cleaning-ocrd-text-with-regular-expressions.html:462: - - http://home.heinonline.org/ is not an HTTPS link - -* At _site/en/lessons/cleaning-ocrd-text-with-regular-expressions.html:502: - - http://www.unixuser.org/~euske/python/pdfminer/index.html is not an HTTPS link - -* At _site/en/lessons/cleaning-ocrd-text-with-regular-expressions.html:617: - - http://krillapps.com/patterns/ is not an HTTPS link - -* At _site/en/lessons/cleaning-ocrd-text-with-regular-expressions.html:627: - - http://docs.python.org/2/library/re.html is not an HTTPS link - -* At _site/en/lessons/cleaning-ocrd-text-with-regular-expressions.html:628: - - http://docs.python.org/2/howto/regex.html#regex-howto is not an HTTPS link - -* At _site/en/lessons/cleaning-ocrd-text-with-regular-expressions.html:636: - - http://www.gnu.org/software/sed/ is not an HTTPS link - -* At _site/en/lessons/cleaning-ocrd-text-with-regular-expressions.html:636: - - http://www.gnu.org/software/grep/ is not an HTTPS link - -* At _site/en/lessons/cleaning-ocrd-text-with-regular-expressions.html:719: - - http://docs.python.org/2/library/re.html#re.search is not an HTTPS link - -* At _site/en/lessons/cleaning-ocrd-text-with-regular-expressions.html:727: - - http://docs.python.org/2/library/re.html#re.sub is not an HTTPS link - -* At _site/en/lessons/cleaning-ocrd-text-with-regular-expressions.html:754: - - http://docs.python.org/2/library/re.html#re.VERBOSE is not an HTTPS link - -* At _site/en/lessons/cleaning-ocrd-text-with-regular-expressions.html:789: - - http://docs.python.org/2/library/re.html#re.compile is not an HTTPS link - * At _site/en/lessons/clustering-visualizing-word-embeddings.html:135: 'a' tag is missing a reference @@ -1672,10 +1244,6 @@ For the Links check, the following failures were found: 'a' tag is missing a reference -* At _site/en/lessons/collaborative-blog-with-jekyll-github.html:873: - - http://github.com/join is not an HTTPS link - * At _site/en/lessons/collaborative-blog-with-jekyll-github.html:901: 'a' tag is missing a reference @@ -1684,45 +1252,13 @@ For the Links check, the following failures were found: 'a' tag is missing a reference -* At _site/en/lessons/collaborative-blog-with-jekyll-github.html:1512: +* At _site/en/lessons/common-similarity-measures.html:133: - http://stackexchange.com/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/collaborative-blog-with-jekyll-github.html:1512: +* At _site/en/lessons/common-similarity-measures.html:152: - http://tinyurl.com/DHslack is not an HTTPS link - -* At _site/en/lessons/collaborative-blog-with-jekyll-github.html:1519: - - http://literaturegeek.com/2015/12/08/WhyJekyllGitHub is not an HTTPS link - -* At _site/en/lessons/collaborative-blog-with-jekyll-github.html:1521: - - http://chronicle.com/blogs/profhacker/jekyll1/60913 is not an HTTPS link - -* At _site/en/lessons/collaborative-blog-with-jekyll-github.html:1527: - - http://jekyllrb.com/docs/home/ is not an HTTPS link - -* At _site/en/lessons/collaborative-blog-with-jekyll-github.html:1528: - - http://jekyll-windows.juthilo.com/ is not an HTTPS link - -* At _site/en/lessons/collaborative-blog-with-jekyll-github.html:1530: - - http://ben.balter.com/jekyll-style-guide/ is not an HTTPS link - -* At _site/en/lessons/collaborative-blog-with-jekyll-github.html:1557: - - http://prose.io/ is not an HTTPS link - -* At _site/en/lessons/common-similarity-measures.html:133: - - 'a' tag is missing a reference - -* At _site/en/lessons/common-similarity-measures.html:152: - - 'a' tag is missing a reference + 'a' tag is missing a reference * At _site/en/lessons/common-similarity-measures.html:189: @@ -1780,70 +1316,6 @@ For the Links check, the following failures were found: 'a' tag is missing a reference -* At _site/en/lessons/corpus-analysis-with-antconc.html:527: - - http://voyant-tools.org/ is not an HTTPS link - -* At _site/en/lessons/corpus-analysis-with-antconc.html:531: - - http://voyant-tools.org/ is not an HTTPS link - -* At _site/en/lessons/corpus-analysis-with-antconc.html:531: - - http://www.laurenceanthony.net/software/antconc/ is not an HTTPS link - -* At _site/en/lessons/corpus-analysis-with-antconc.html:531: - - http://www.laurenceanthony.net/ is not an HTTPS link - -* At _site/en/lessons/corpus-analysis-with-antconc.html:531: - - http://hfroehli.ch/2014/05/11/intro-bibliography-corpus-linguistics/ is not an HTTPS link - -* At _site/en/lessons/corpus-analysis-with-antconc.html:538: - - http://www.laurenceanthony.net/software/antconc/ is not an HTTPS link - -* At _site/en/lessons/corpus-analysis-with-antconc.html:539: - - http://www.laurenceanthony.net/software/antconc/releases/AntConc324/ is not an HTTPS link - -* At _site/en/lessons/corpus-analysis-with-antconc.html:569: - - http://notepad-plus-plus.org/ is not an HTTPS link - -* At _site/en/lessons/corpus-analysis-with-antconc.html:569: - - http://www.barebones.com/products/textwrangler/ is not an HTTPS link - -* At _site/en/lessons/corpus-analysis-with-antconc.html:578: - - http://www.nltk.org/ is not an HTTPS link - -* At _site/en/lessons/corpus-analysis-with-antconc.html:581: - - http://www.amazon.com/Developing-Linguistic-Corpora-Practice-Guides/dp/1842172050/ref=sr_1_1 is not an HTTPS link - -* At _site/en/lessons/corpus-analysis-with-antconc.html:911: - - http://www.wordfrequency.info/free.asp is not an HTTPS link - -* At _site/en/lessons/corpus-analysis-with-antconc.html:961: - - http://www.lexically.net/downloads/version6/HTML/index.html?keyness_definition.htm is not an HTTPS link - -* At _site/en/lessons/corpus-analysis-with-antconc.html:961: - - http://www.laurenceanthony.net/software/antconc/releases/AntConc335/help.pdf is not an HTTPS link - -* At _site/en/lessons/corpus-analysis-with-antconc.html:1024: - - http://hfroehlich.wordpress.com/2014/05/11/intro-bibliography-corpus-linguistics/ is not an HTTPS link - -* At _site/en/lessons/corpus-analysis-with-antconc.html:1025: - - http://hfroehli.ch/workshops/getting-started-with-antconc/ is not an HTTPS link - * At _site/en/lessons/corpus-analysis-with-spacy.html:117: 'a' tag is missing a reference @@ -1876,26 +1348,6 @@ For the Links check, the following failures were found: 'a' tag is missing a reference -* At _site/en/lessons/correspondence-analysis-in-R.html:570: - - http://www.ourcommons.ca/Committees/en/Home is not an HTTPS link - -* At _site/en/lessons/correspondence-analysis-in-R.html:665: - - http://factominer.free.fr/ is not an HTTPS link - -* At _site/en/lessons/correspondence-analysis-in-R.html:988: - - http://www.sthda.com/english/wiki/multiple-correspondence-analysis-essentials-interpretation-and-application-to-investigate-the-associations-between-categories-of-multiple-qualitative-variables-r-software-and-data-mining is not an HTTPS link - -* At _site/en/lessons/correspondence-analysis-in-R.html:1034: - - http://davetang.org/file/Singular_Value_Decomposition_Tutorial.pdf is not an HTTPS link - -* At _site/en/lessons/correspondence-analysis-in-R.html:1125: - - http://www.cbc.ca/news/indigenous/mmiw-inquiry-not-reaching-out-to-families-says-advocates-1.4053694 is not an HTTPS link - * At _site/en/lessons/counting-frequencies.html:119: 'a' tag is missing a reference @@ -1912,14 +1364,6 @@ For the Links check, the following failures were found: 'a' tag is missing a reference -* At _site/en/lessons/counting-frequencies.html:592: - - http://docs.python.org/tutorial/datastructures.html#list-comprehensions is not an HTTPS link - -* At _site/en/lessons/counting-frequencies.html:765: - - http://ir.dcs.gla.ac.uk/resources/linguistic_utils/stop_words is not an HTTPS link - * At _site/en/lessons/creating-an-omeka-exhibit.html:119: 'a' tag is missing a reference @@ -1952,18 +1396,6 @@ For the Links check, the following failures were found: 'a' tag is missing a reference -* At _site/en/lessons/creating-and-viewing-html-files-with-python.html:529: - - http://zotero.org is not an HTTPS link - -* At _site/en/lessons/creating-and-viewing-html-files-with-python.html:533: - - http://www.w3schools.com/html/default.asp is not an HTTPS link - -* At _site/en/lessons/creating-and-viewing-html-files-with-python.html:549: - - http://www.w3schools.com/tags/tag_doctype.asp is not an HTTPS link - * At _site/en/lessons/creating-apis-with-python-and-flask.html:120: 'a' tag is missing a reference @@ -1980,15155 +1412,7503 @@ For the Links check, the following failures were found: 'a' tag is missing a reference -* At _site/en/lessons/creating-apis-with-python-and-flask.html:605: +* At _site/en/lessons/creating-guis-in-python-for-digital-humanities-projects.html:117: - http://chroniclingamerica.loc.gov/about/api/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/creating-apis-with-python-and-flask.html:607: +* At _site/en/lessons/creating-guis-in-python-for-digital-humanities-projects.html:136: - http://chroniclingamerica.loc.gov/about/api/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/creating-apis-with-python-and-flask.html:626: +* At _site/en/lessons/creating-guis-in-python-for-digital-humanities-projects.html:173: - http://chroniclingamerica.loc.gov/search/pages/results/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/creating-apis-with-python-and-flask.html:635: +* At _site/en/lessons/creating-guis-in-python-for-digital-humanities-projects.html:199: - http://chroniclingamerica.loc.gov/search/pages/results/?format=json&proxtext=fire is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/creating-apis-with-python-and-flask.html:667: +* At _site/en/lessons/creating-mobile-augmented-reality-experiences-in-unity.html:117: - http://flask.pocoo.org/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/creating-apis-with-python-and-flask.html:725: +* At _site/en/lessons/creating-mobile-augmented-reality-experiences-in-unity.html:136: - http://127.0.0.1:5000/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/creating-apis-with-python-and-flask.html:749: +* At _site/en/lessons/creating-mobile-augmented-reality-experiences-in-unity.html:173: - http://127.0.0.1:5000/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/creating-apis-with-python-and-flask.html:844: +* At _site/en/lessons/creating-mobile-augmented-reality-experiences-in-unity.html:199: - http://127.0.0.1:5000/api/v1/resources/books/all is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/creating-apis-with-python-and-flask.html:921: +* At _site/en/lessons/creating-network-diagrams-from-historical-sources.html:117: - http://127.0.0.1:5000/api/v1/resources/books?id=0 is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/creating-apis-with-python-and-flask.html:922: +* At _site/en/lessons/creating-network-diagrams-from-historical-sources.html:136: - http://127.0.0.1:5000/api/v1/resources/books?id=1 is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/creating-apis-with-python-and-flask.html:923: +* At _site/en/lessons/creating-network-diagrams-from-historical-sources.html:173: - http://127.0.0.1:5000/api/v1/resources/books?id=2 is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/creating-apis-with-python-and-flask.html:924: +* At _site/en/lessons/creating-network-diagrams-from-historical-sources.html:199: - http://127.0.0.1:5000/api/v1/resources/books?id=3 is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/creating-apis-with-python-and-flask.html:930: +* At _site/en/lessons/crowdsourced-data-normalization-with-pandas.html:117: - http://127.0.0.1:5000/api/v1/resources/books is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/creating-apis-with-python-and-flask.html:1002: +* At _site/en/lessons/crowdsourced-data-normalization-with-pandas.html:136: - http://www.doxygen.org/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/creating-apis-with-python-and-flask.html:1002: +* At _site/en/lessons/crowdsourced-data-normalization-with-pandas.html:173: - http://www.sphinx-doc.org/en/stable/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/creating-apis-with-python-and-flask.html:1004: +* At _site/en/lessons/crowdsourced-data-normalization-with-pandas.html:199: - http://api.repo.nypl.org/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/creating-apis-with-python-and-flask.html:1092: +* At _site/en/lessons/data-mining-the-internet-archive.html:117: - http://127.0.0.1:5000/api/v1/resources/books/all is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/creating-apis-with-python-and-flask.html:1093: +* At _site/en/lessons/data-mining-the-internet-archive.html:136: - http://127.0.0.1:5000/api/v1/resources/books?author=Connie+Willis is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/creating-apis-with-python-and-flask.html:1094: +* At _site/en/lessons/data-mining-the-internet-archive.html:173: - http://127.0.0.1:5000/api/v1/resources/books?author=Connie+Willis&published=1993 is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/creating-apis-with-python-and-flask.html:1095: +* At _site/en/lessons/data-mining-the-internet-archive.html:199: - http://127.0.0.1:5000/api/v1/resources/books?published=2010 is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/creating-apis-with-python-and-flask.html:1228: +* At _site/en/lessons/data-wrangling-and-management-in-r.html:117: - http://chroniclingamerica.loc.gov/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/creating-apis-with-python-and-flask.html:1234: +* At _site/en/lessons/data-wrangling-and-management-in-r.html:136: - http://hds.essex.ac.uk/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/creating-guis-in-python-for-digital-humanities-projects.html:117: +* At _site/en/lessons/data-wrangling-and-management-in-r.html:173: 'a' tag is missing a reference -* At _site/en/lessons/creating-guis-in-python-for-digital-humanities-projects.html:136: +* At _site/en/lessons/data-wrangling-and-management-in-r.html:199: 'a' tag is missing a reference -* At _site/en/lessons/creating-guis-in-python-for-digital-humanities-projects.html:173: +* At _site/en/lessons/dealing-with-big-data-and-network-analysis-using-neo4j.html:117: 'a' tag is missing a reference -* At _site/en/lessons/creating-guis-in-python-for-digital-humanities-projects.html:199: +* At _site/en/lessons/dealing-with-big-data-and-network-analysis-using-neo4j.html:136: 'a' tag is missing a reference -* At _site/en/lessons/creating-mobile-augmented-reality-experiences-in-unity.html:117: +* At _site/en/lessons/dealing-with-big-data-and-network-analysis-using-neo4j.html:173: 'a' tag is missing a reference -* At _site/en/lessons/creating-mobile-augmented-reality-experiences-in-unity.html:136: +* At _site/en/lessons/dealing-with-big-data-and-network-analysis-using-neo4j.html:199: 'a' tag is missing a reference -* At _site/en/lessons/creating-mobile-augmented-reality-experiences-in-unity.html:173: +* At _site/en/lessons/designing-a-timeline-tabletop-simulator.html:117: 'a' tag is missing a reference -* At _site/en/lessons/creating-mobile-augmented-reality-experiences-in-unity.html:199: +* At _site/en/lessons/designing-a-timeline-tabletop-simulator.html:136: 'a' tag is missing a reference -* At _site/en/lessons/creating-mobile-augmented-reality-experiences-in-unity.html:536: +* At _site/en/lessons/designing-a-timeline-tabletop-simulator.html:173: - http://www.gizmag.com/ikea-augmented-reality-catalog-app/28703/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/creating-mobile-augmented-reality-experiences-in-unity.html:556: +* At _site/en/lessons/designing-a-timeline-tabletop-simulator.html:199: - http://web.archive.org/web/20180421163517/http://english.ufl.edu/trace_arcs/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/creating-mobile-augmented-reality-experiences-in-unity.html:616: +* At _site/en/lessons/detecting-text-reuse-with-passim.html:119: - http://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.html is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/creating-mobile-augmented-reality-experiences-in-unity.html:858: +* At _site/en/lessons/detecting-text-reuse-with-passim.html:138: - http://www.gimp.org/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/creating-mobile-augmented-reality-experiences-in-unity.html:1205: +* At _site/en/lessons/detecting-text-reuse-with-passim.html:175: - http://developer.Android.com/tools/device.html is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/creating-network-diagrams-from-historical-sources.html:117: +* At _site/en/lessons/detecting-text-reuse-with-passim.html:201: 'a' tag is missing a reference -* At _site/en/lessons/creating-network-diagrams-from-historical-sources.html:136: +* At _site/en/lessons/displaying-georeferenced-map-knightlab-storymap-js.html:119: 'a' tag is missing a reference -* At _site/en/lessons/creating-network-diagrams-from-historical-sources.html:173: +* At _site/en/lessons/displaying-georeferenced-map-knightlab-storymap-js.html:138: 'a' tag is missing a reference -* At _site/en/lessons/creating-network-diagrams-from-historical-sources.html:199: +* At _site/en/lessons/displaying-georeferenced-map-knightlab-storymap-js.html:175: 'a' tag is missing a reference -* At _site/en/lessons/creating-network-diagrams-from-historical-sources.html:486: +* At _site/en/lessons/displaying-georeferenced-map-knightlab-storymap-js.html:201: - http://hdlab.stanford.edu/palladio/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/creating-network-diagrams-from-historical-sources.html:508: +* At _site/en/lessons/downloading-multiple-records-using-query-strings.html:117: - http://hal.archives-ouvertes.fr/docs/00/64/93/16/PDF/lemercier_A_zg.pdf is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/creating-network-diagrams-from-historical-sources.html:508: +* At _site/en/lessons/downloading-multiple-records-using-query-strings.html:136: - http://hal.archives-ouvertes.fr/docs/00/64/93/16/PDF/lemercier_A_zg.pdf is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/creating-network-diagrams-from-historical-sources.html:508: +* At _site/en/lessons/downloading-multiple-records-using-query-strings.html:173: - http://historicalnetworkresearch.org/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/creating-network-diagrams-from-historical-sources.html:518: +* At _site/en/lessons/downloading-multiple-records-using-query-strings.html:199: - http://web.archive.org/web/20180422010025/http://www.gdw-berlin.de/fileadmin/bilder/publ/publikationen_in_englischer_sprache/2006_Neuman_eng.pdf is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/creating-network-diagrams-from-historical-sources.html:518: +* At _site/en/lessons/editing-audio-with-audacity.html:117: - http://martenduering.com/research/covert-networks-during-the-holocaust/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/creating-network-diagrams-from-historical-sources.html:650: +* At _site/en/lessons/editing-audio-with-audacity.html:136: - http://hdlab.stanford.edu/palladio/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/creating-network-diagrams-from-historical-sources.html:726: +* At _site/en/lessons/editing-audio-with-audacity.html:173: - http://hdlab.stanford.edu/doc/scenario-simple-map.pdf is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/creating-network-diagrams-from-historical-sources.html:802: +* At _site/en/lessons/editing-audio-with-audacity.html:199: - http://en.wikipedia.org/wiki/Bipartite_graph#Examples is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/creating-network-diagrams-from-historical-sources.html:892: +* At _site/en/lessons/exploring-and-analyzing-network-data-with-python.html:126: - http://en.wikipedia.org/wiki/Directed_graph#Indegree_and_outdegree is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/creating-network-diagrams-from-historical-sources.html:940: +* At _site/en/lessons/exploring-and-analyzing-network-data-with-python.html:145: - http://nodegoat.net/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/creating-network-diagrams-from-historical-sources.html:940: +* At _site/en/lessons/exploring-and-analyzing-network-data-with-python.html:182: - http://nodegoat.net/cms/UPLOAD/AsmallguidebyYanan11082014.pdf is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/creating-network-diagrams-from-historical-sources.html:942: +* At _site/en/lessons/exploring-and-analyzing-network-data-with-python.html:208: - http://www.youtube.com/watch?v=xKhYGRpbwOc is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/creating-network-diagrams-from-historical-sources.html:944: +* At _site/en/lessons/extracting-illustrated-pages.html:117: - http://www.clementlevallois.net/training.html is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/creating-network-diagrams-from-historical-sources.html:944: +* At _site/en/lessons/extracting-illustrated-pages.html:136: - http://www.youtube.com/watch?v=L6hHv6y5GsQ is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/creating-network-diagrams-from-historical-sources.html:948: +* At _site/en/lessons/extracting-illustrated-pages.html:173: - http://pajek.imfm.si/doku.php is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/creating-network-diagrams-from-historical-sources.html:948: +* At _site/en/lessons/extracting-illustrated-pages.html:199: - http://www.cambridge.org/us/academic/subjects/sociology/research-methods-sociology-and-criminology/exploratory-social-network-analysis-pajek-2nd-edition is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/creating-network-diagrams-from-historical-sources.html:1035: +* At _site/en/lessons/extracting-keywords.html:117: - http://historicalnetworkresearch.org is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/crowdsourced-data-normalization-with-pandas.html:117: +* At _site/en/lessons/extracting-keywords.html:136: 'a' tag is missing a reference -* At _site/en/lessons/crowdsourced-data-normalization-with-pandas.html:136: +* At _site/en/lessons/extracting-keywords.html:173: 'a' tag is missing a reference -* At _site/en/lessons/crowdsourced-data-normalization-with-pandas.html:173: +* At _site/en/lessons/extracting-keywords.html:199: 'a' tag is missing a reference -* At _site/en/lessons/crowdsourced-data-normalization-with-pandas.html:199: +* At _site/en/lessons/facial-recognition-ai-python.html:119: 'a' tag is missing a reference -* At _site/en/lessons/crowdsourced-data-normalization-with-pandas.html:507: +* At _site/en/lessons/facial-recognition-ai-python.html:138: - http://transcribe-bentham.ucl.ac.uk/td/Transcribe_Bentham is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/crowdsourced-data-normalization-with-pandas.html:562: +* At _site/en/lessons/facial-recognition-ai-python.html:175: - http://menus.nypl.org/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/crowdsourced-data-normalization-with-pandas.html:883: +* At _site/en/lessons/facial-recognition-ai-python.html:201: - http://pandas-docs.github.io/pandas-docs-travis/user_guide/timeseries.html#timestamp-limitations is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/crowdsourced-data-normalization-with-pandas.html:990: +* At _site/en/lessons/fetch-and-parse-data-with-openrefine.html:117: - http://curatingmenus.org/articles/against-cleaning/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/data-mining-the-internet-archive.html:117: +* At _site/en/lessons/fetch-and-parse-data-with-openrefine.html:136: 'a' tag is missing a reference -* At _site/en/lessons/data-mining-the-internet-archive.html:136: +* At _site/en/lessons/fetch-and-parse-data-with-openrefine.html:173: 'a' tag is missing a reference -* At _site/en/lessons/data-mining-the-internet-archive.html:173: +* At _site/en/lessons/fetch-and-parse-data-with-openrefine.html:199: 'a' tag is missing a reference -* At _site/en/lessons/data-mining-the-internet-archive.html:199: +* At _site/en/lessons/finding-places-world-historical-gazetteer.html:119: 'a' tag is missing a reference -* At _site/en/lessons/data-mining-the-internet-archive.html:490: +* At _site/en/lessons/finding-places-world-historical-gazetteer.html:138: - http://archive.org/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/data-mining-the-internet-archive.html:494: +* At _site/en/lessons/finding-places-world-historical-gazetteer.html:175: - http://activehistory.ca/2013/09/the-internet-archive-rocks-or-two-million-plus-free-sources-to-explore/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/data-mining-the-internet-archive.html:503: +* At _site/en/lessons/finding-places-world-historical-gazetteer.html:201: - http://archive.org/details/bplscas is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/data-mining-the-internet-archive.html:563: +* At _site/en/lessons/from-html-to-list-of-words-1.html:119: - http://archive.org/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/data-mining-the-internet-archive.html:568: +* At _site/en/lessons/from-html-to-list-of-words-1.html:138: - http://archive.org/details/lettertowilliaml00doug is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/data-mining-the-internet-archive.html:569: +* At _site/en/lessons/from-html-to-list-of-words-1.html:175: - http://archive.org/stream/lettertowilliaml00doug/39999066767938#page/n0/mode/2up is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/data-mining-the-internet-archive.html:572: +* At _site/en/lessons/from-html-to-list-of-words-1.html:201: - http://archive.org/download/lettertowilliaml00doug is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/data-mining-the-internet-archive.html:573: +* At _site/en/lessons/from-html-to-list-of-words-2.html:119: - http://archive.org/download/lettertowilliaml00doug/lettertowilliaml00doug_dc.xml is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/data-mining-the-internet-archive.html:573: +* At _site/en/lessons/from-html-to-list-of-words-2.html:138: - http://archive.org/download/lettertowilliaml00doug/lettertowilliaml00doug_marc.xml is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/data-mining-the-internet-archive.html:574: +* At _site/en/lessons/from-html-to-list-of-words-2.html:175: - http://www.loc.gov/marc/bibliographic/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/data-mining-the-internet-archive.html:580: +* At _site/en/lessons/from-html-to-list-of-words-2.html:201: - http://archive.org/search.php?query=collection%3Abplscas&sort=-publicdate is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/data-mining-the-internet-archive.html:612: +* At _site/en/lessons/generating-an-ordered-data-set-from-an-OCR-text-file.html:117: - http://blog.archive.org/2011/03/31/how-archive-org-items-are-structured/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/data-mining-the-internet-archive.html:622: +* At _site/en/lessons/generating-an-ordered-data-set-from-an-OCR-text-file.html:136: - http://internetarchive.readthedocs.io/en/latest/quickstart.html#searching is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/data-mining-the-internet-archive.html:647: +* At _site/en/lessons/generating-an-ordered-data-set-from-an-OCR-text-file.html:173: - http://archive.org/search.php?query=collection%3Abplscas is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/data-mining-the-internet-archive.html:653: +* At _site/en/lessons/generating-an-ordered-data-set-from-an-OCR-text-file.html:199: - http://internetarchive.readthedocs.io/en/latest/quickstart.html#downloading is not an HTTPS link + 'a' tag is missing a reference -* At _site/en/lessons/data-mining-the-internet-archive.html:794: - - http://internetarchive.readthedocs.io/en/latest/quickstart.html#downloading is not an HTTPS link - -* At _site/en/lessons/data-mining-the-internet-archive.html:872: - - http://docs.python.org/2/tutorial/errors.html#handling-exceptions is not an HTTPS link - -* At _site/en/lessons/data-mining-the-internet-archive.html:945: - - http://archive.org/download/lettertowilliaml00doug/lettertowilliaml00doug_marc.xml is not an HTTPS link - -* At _site/en/lessons/data-mining-the-internet-archive.html:952: - - http://www.loc.gov/marc/bibliographic/bd260.html is not an HTTPS link - -* At _site/en/lessons/data-mining-the-internet-archive.html:952: - - http://www.loc.gov/marc/ is not an HTTPS link - -* At _site/en/lessons/data-wrangling-and-management-in-r.html:117: - - 'a' tag is missing a reference - -* At _site/en/lessons/data-wrangling-and-management-in-r.html:136: - - 'a' tag is missing a reference - -* At _site/en/lessons/data-wrangling-and-management-in-r.html:173: - - 'a' tag is missing a reference - -* At _site/en/lessons/data-wrangling-and-management-in-r.html:199: - - 'a' tag is missing a reference - -* At _site/en/lessons/data-wrangling-and-management-in-r.html:539: - - http://hadley.nz/ is not an HTTPS link - -* At _site/en/lessons/data-wrangling-and-management-in-r.html:563: - - http://tidyverse.org/ is not an HTTPS link - -* At _site/en/lessons/data-wrangling-and-management-in-r.html:574: - - http://magrittr.tidyverse.org is not an HTTPS link - -* At _site/en/lessons/data-wrangling-and-management-in-r.html:576: - - http://ggplot2.tidyverse.org/ is not an HTTPS link - -* At _site/en/lessons/data-wrangling-and-management-in-r.html:577: - - http://www.springer.com/us/book/9780387245447 is not an HTTPS link - -* At _site/en/lessons/data-wrangling-and-management-in-r.html:579: - - http://readr.tidyverse.org is not an HTTPS link - -* At _site/en/lessons/data-wrangling-and-management-in-r.html:582: - - http://tibble.tidyverse.org/ is not an HTTPS link - -* At _site/en/lessons/data-wrangling-and-management-in-r.html:709: - - http://stefanbache.dk/ is not an HTTPS link - -* At _site/en/lessons/data-wrangling-and-management-in-r.html:710: - - http://hadley.nz/ is not an HTTPS link - -* At _site/en/lessons/data-wrangling-and-management-in-r.html:797: - - http://lincolnmullen.com/ is not an HTTPS link - -* At _site/en/lessons/dealing-with-big-data-and-network-analysis-using-neo4j.html:117: - - 'a' tag is missing a reference - -* At _site/en/lessons/dealing-with-big-data-and-network-analysis-using-neo4j.html:136: - - 'a' tag is missing a reference - -* At _site/en/lessons/dealing-with-big-data-and-network-analysis-using-neo4j.html:173: - - 'a' tag is missing a reference - -* At _site/en/lessons/dealing-with-big-data-and-network-analysis-using-neo4j.html:199: - - 'a' tag is missing a reference - -* At _site/en/lessons/dealing-with-big-data-and-network-analysis-using-neo4j.html:528: - - http://faculty.ucr.edu/~hanneman/nettext/ is not an HTTPS link - -* At _site/en/lessons/dealing-with-big-data-and-network-analysis-using-neo4j.html:529: - - http://www.insna.org is not an HTTPS link - -* At _site/en/lessons/dealing-with-big-data-and-network-analysis-using-neo4j.html:985: - - http://localhost:7474/browser/ is not an HTTPS link - -* At _site/en/lessons/dealing-with-big-data-and-network-analysis-using-neo4j.html:1048: - - http://localhost:7474 is not an HTTPS link - -* At _site/en/lessons/dealing-with-big-data-and-network-analysis-using-neo4j.html:1097: - - http://jgmackay.com/ is not an HTTPS link - -* At _site/en/lessons/designing-a-timeline-tabletop-simulator.html:117: - - 'a' tag is missing a reference - -* At _site/en/lessons/designing-a-timeline-tabletop-simulator.html:136: - - 'a' tag is missing a reference - -* At _site/en/lessons/designing-a-timeline-tabletop-simulator.html:173: - - 'a' tag is missing a reference - -* At _site/en/lessons/designing-a-timeline-tabletop-simulator.html:199: - - 'a' tag is missing a reference - -* At _site/en/lessons/detecting-text-reuse-with-passim.html:119: - - 'a' tag is missing a reference - -* At _site/en/lessons/detecting-text-reuse-with-passim.html:138: - - 'a' tag is missing a reference - -* At _site/en/lessons/detecting-text-reuse-with-passim.html:175: - - 'a' tag is missing a reference - -* At _site/en/lessons/detecting-text-reuse-with-passim.html:201: - - 'a' tag is missing a reference - -* At _site/en/lessons/detecting-text-reuse-with-passim.html:542: - - http://www.ccs.neu.edu/home/dasmith/ is not an HTTPS link - -* At _site/en/lessons/detecting-text-reuse-with-passim.html:705: - - http://spark.apache.org/downloads is not an HTTPS link - -* At _site/en/lessons/detecting-text-reuse-with-passim.html:910: - - http://jsonlines.org/ is not an HTTPS link - -* At _site/en/lessons/detecting-text-reuse-with-passim.html:1486: - - http://ceur-ws.org/Vol-2253/paper22.pdf is not an HTTPS link - -* At _site/en/lessons/detecting-text-reuse-with-passim.html:1487: - - http://dx.doi.org/10.1093/alh/ajv029 is not an HTTPS link - -* At _site/en/lessons/detecting-text-reuse-with-passim.html:1488: - - http://dx.doi.org/10.1093/alh/ajv028 is not an HTTPS link - -* At _site/en/lessons/detecting-text-reuse-with-passim.html:1489: - - http://dx.doi.org/10.1080/1461670x.2020.1761865 is not an HTTPS link - -* At _site/en/lessons/detecting-text-reuse-with-passim.html:1491: - - http://dx.doi.org/10.1007/978-3-319-12655-5_11 is not an HTTPS link - -* At _site/en/lessons/detecting-text-reuse-with-passim.html:1492: - - http://dx.doi.org/10.22148/16.034 is not an HTTPS link - -* At _site/en/lessons/detecting-text-reuse-with-passim.html:1495: - - http://dx.doi.org/10.1145/2682571.2797068 is not an HTTPS link - -* At _site/en/lessons/displaying-georeferenced-map-knightlab-storymap-js.html:119: - - 'a' tag is missing a reference - -* At _site/en/lessons/displaying-georeferenced-map-knightlab-storymap-js.html:138: - - 'a' tag is missing a reference - -* At _site/en/lessons/displaying-georeferenced-map-knightlab-storymap-js.html:175: - - 'a' tag is missing a reference - -* At _site/en/lessons/displaying-georeferenced-map-knightlab-storymap-js.html:201: - - 'a' tag is missing a reference - -* At _site/en/lessons/displaying-georeferenced-map-knightlab-storymap-js.html:546: - - http://gmail.com is not an HTTPS link - -* At _site/en/lessons/downloading-multiple-records-using-query-strings.html:117: - - 'a' tag is missing a reference - -* At _site/en/lessons/downloading-multiple-records-using-query-strings.html:136: - - 'a' tag is missing a reference - -* At _site/en/lessons/downloading-multiple-records-using-query-strings.html:173: - - 'a' tag is missing a reference - -* At _site/en/lessons/downloading-multiple-records-using-query-strings.html:199: - - 'a' tag is missing a reference - -* At _site/en/lessons/downloading-multiple-records-using-query-strings.html:517: - - http://www.oldbaileyonline.org/ is not an HTTPS link - -* At _site/en/lessons/downloading-multiple-records-using-query-strings.html:551: - - http://www.oldbaileyonline.org/browse.jsp?id=t17800628-33&div=t17800628-33 is not an HTTPS link - -* At _site/en/lessons/downloading-multiple-records-using-query-strings.html:612: - - http://www.oldbaileyonline.org/forms/formMain.jsp is not an HTTPS link - -* At _site/en/lessons/downloading-multiple-records-using-query-strings.html:1303: - - http://www.checkupdown.com/status/E408.html is not an HTTPS link - -* At _site/en/lessons/downloading-multiple-records-using-query-strings.html:1472: - - http://docs.python.org/tutorial/errors.html is not an HTTPS link - -* At _site/en/lessons/downloading-multiple-records-using-query-strings.html:1659: - - http://www.oldbaileyonline.org/static/DocAPI.jsp is not an HTTPS link - -* At _site/en/lessons/downloading-multiple-records-using-query-strings.html:1660: - - http://stackoverflow.com/questions/273192/python-best-way-to-create-directory-if-it-doesnt-exist-for-file-write is not an HTTPS link - -* At _site/en/lessons/editing-audio-with-audacity.html:117: - - 'a' tag is missing a reference - -* At _site/en/lessons/editing-audio-with-audacity.html:136: - - 'a' tag is missing a reference - -* At _site/en/lessons/editing-audio-with-audacity.html:173: - - 'a' tag is missing a reference - -* At _site/en/lessons/editing-audio-with-audacity.html:199: - - 'a' tag is missing a reference - -* At _site/en/lessons/editing-audio-with-audacity.html:487: - - http://audacityteam.org/ is not an HTTPS link - -* At _site/en/lessons/editing-audio-with-audacity.html:499: - - http://audacityteam.org/ is not an HTTPS link - -* At _site/en/lessons/editing-audio-with-audacity.html:525: - - http://web.archive.org/web/20161119231053/http://www.indiana.edu:80/~emusic/acoustics/amplitude.htm is not an HTTPS link - -* At _site/en/lessons/editing-audio-with-audacity.html:621: - - http://www.diffen.com/difference/Mono_vs_Stereo is not an HTTPS link - -* At _site/en/lessons/editing-audio-with-audacity.html:791: - - http://manual.audacityteam.org/man/crossfade_tracks.html is not an HTTPS link - -* At _site/en/lessons/editing-audio-with-audacity.html:821: - - http://www.nch.com.au/acm/formats.html is not an HTTPS link - -* At _site/en/lessons/editing-audio-with-audacity.html:821: - - http://www.w3schools.com/html/html5_audio.asp is not an HTTPS link - -* At _site/en/lessons/exploring-and-analyzing-network-data-with-python.html:126: - - 'a' tag is missing a reference - -* At _site/en/lessons/exploring-and-analyzing-network-data-with-python.html:145: - - 'a' tag is missing a reference - -* At _site/en/lessons/exploring-and-analyzing-network-data-with-python.html:182: - - 'a' tag is missing a reference - -* At _site/en/lessons/exploring-and-analyzing-network-data-with-python.html:208: - - 'a' tag is missing a reference - -* At _site/en/lessons/exploring-and-analyzing-network-data-with-python.html:559: - - http://docs.python-guide.org/en/latest/starting/installation/ is not an HTTPS link - -* At _site/en/lessons/exploring-and-analyzing-network-data-with-python.html:580: - - http://www.oxforddnb.com is not an HTTPS link - -* At _site/en/lessons/exploring-and-analyzing-network-data-with-python.html:580: - - http://www.sixdegreesoffrancisbacon.com is not an HTTPS link - -* At _site/en/lessons/exploring-and-analyzing-network-data-with-python.html:836: - - http://6dfb.tumblr.com/post/159420498411/ut-tensio-sic-vis-introducing-the-hooke-graph is not an HTTPS link - -* At _site/en/lessons/exploring-and-analyzing-network-data-with-python.html:852: - - http://sixdegreesoffrancisbacon.com/ is not an HTTPS link - -* At _site/en/lessons/exploring-and-analyzing-network-data-with-python.html:996: - - http://perso.crans.org/aynaud/communities/api.html is not an HTTPS link - -* At _site/en/lessons/exploring-and-analyzing-network-data-with-python.html:1043: - - http://pandas.pydata.org/ is not an HTTPS link - -* At _site/en/lessons/extracting-illustrated-pages.html:117: - - 'a' tag is missing a reference - -* At _site/en/lessons/extracting-illustrated-pages.html:136: - - 'a' tag is missing a reference - -* At _site/en/lessons/extracting-illustrated-pages.html:173: - - 'a' tag is missing a reference - -* At _site/en/lessons/extracting-illustrated-pages.html:199: - - 'a' tag is missing a reference - -* At _site/en/lessons/extracting-illustrated-pages.html:559: - - http://projectaida.org/ is not an HTTPS link - -* At _site/en/lessons/extracting-illustrated-pages.html:559: - - http://www.ccs.neu.edu/home/dasmith/ichneumon-proposal.pdf is not an HTTPS link - -* At _site/en/lessons/extracting-illustrated-pages.html:638: - - http://web.archive.org/web/20190115051900/https://conda.io/docs/_downloads/conda-cheatsheet.pdf is not an HTTPS link - -* At _site/en/lessons/extracting-keywords.html:117: - - 'a' tag is missing a reference - -* At _site/en/lessons/extracting-keywords.html:136: - - 'a' tag is missing a reference - -* At _site/en/lessons/extracting-keywords.html:173: - - 'a' tag is missing a reference - -* At _site/en/lessons/extracting-keywords.html:199: - - 'a' tag is missing a reference - -* At _site/en/lessons/extracting-keywords.html:502: - - http://en.wikipedia.org/wiki/Gazetteer is not an HTTPS link - -* At _site/en/lessons/extracting-keywords.html:502: - - http://en.wikipedia.org/wiki/Stop_words is not an HTTPS link - -* At _site/en/lessons/extracting-keywords.html:510: - - http://www.british-history.ac.uk/alumni-oxon/1500-1714 is not an HTTPS link - -* At _site/en/lessons/extracting-keywords.html:510: - - http://en.wikipedia.org/wiki/Comma-separated_values is not an HTTPS link - -* At _site/en/lessons/extracting-keywords.html:552: - - http://en.wikipedia.org/wiki/Historic_counties_of_England is not an HTTPS link - -* At _site/en/lessons/extracting-keywords.html:603: - - http://stackoverflow.com/questions/3056740/gedit-adds-line-at-end-of-file is not an HTTPS link - -* At _site/en/lessons/extracting-keywords.html:605: - - http://en.wikipedia.org/wiki/Word_processor is not an HTTPS link - -* At _site/en/lessons/extracting-keywords.html:660: - - http://stackoverflow.com/questions/11497376/new-line-python is not an HTTPS link - -* At _site/en/lessons/extracting-keywords.html:682: - - http://en.wikipedia.org/wiki/Carriage_return is not an HTTPS link - -* At _site/en/lessons/extracting-keywords.html:705: - - http://en.wikipedia.org/wiki/Regular_expression is not an HTTPS link - -* At _site/en/lessons/extracting-keywords.html:985: - - http://stackoverflow.com/questions/17315635/csv-new-line-character-seen-in-unquoted-field-error is not an HTTPS link - -* At _site/en/lessons/extracting-keywords.html:1108: - - http://fredgibbs.net/tutorials/extract-geocode-placenames-from-text-file.html is not an HTTPS link - -* At _site/en/lessons/facial-recognition-ai-python.html:119: - - 'a' tag is missing a reference - -* At _site/en/lessons/facial-recognition-ai-python.html:138: - - 'a' tag is missing a reference - -* At _site/en/lessons/facial-recognition-ai-python.html:175: - - 'a' tag is missing a reference - -* At _site/en/lessons/facial-recognition-ai-python.html:201: - - 'a' tag is missing a reference - -* At _site/en/lessons/fetch-and-parse-data-with-openrefine.html:117: - - 'a' tag is missing a reference - -* At _site/en/lessons/fetch-and-parse-data-with-openrefine.html:136: - - 'a' tag is missing a reference - -* At _site/en/lessons/fetch-and-parse-data-with-openrefine.html:173: - - 'a' tag is missing a reference - -* At _site/en/lessons/fetch-and-parse-data-with-openrefine.html:199: - - 'a' tag is missing a reference - -* At _site/en/lessons/fetch-and-parse-data-with-openrefine.html:524: - - http://web.archive.org/web/20180129051941/http://data-lessons.github.io/library-openrefine/ is not an HTTPS link - -* At _site/en/lessons/fetch-and-parse-data-with-openrefine.html:524: - - http://www.datacarpentry.org/OpenRefine-ecology-lesson/ is not an HTTPS link - -* At _site/en/lessons/fetch-and-parse-data-with-openrefine.html:566: - - http://www.gutenberg.org/ebooks/1105 is not an HTTPS link - -* At _site/en/lessons/fetch-and-parse-data-with-openrefine.html:566: - - http://www.gutenberg.org/ is not an HTTPS link - -* At _site/en/lessons/fetch-and-parse-data-with-openrefine.html:570: - - http://www.gutenberg.org/wiki/Gutenberg:Feeds is not an HTTPS link - -* At _site/en/lessons/fetch-and-parse-data-with-openrefine.html:1142: - - http://www.json.org/ is not an HTTPS link - -* At _site/en/lessons/fetch-and-parse-data-with-openrefine.html:1282: - - http://www.jython.org/ is not an HTTPS link - -* At _site/en/lessons/fetch-and-parse-data-with-openrefine.html:1285: - - http://www.jython.org/ is not an HTTPS link - -* At _site/en/lessons/fetch-and-parse-data-with-openrefine.html:1362: - - http://text-processing.com/ is not an HTTPS link - -* At _site/en/lessons/fetch-and-parse-data-with-openrefine.html:1362: - - http://www.nltk.org/ is not an HTTPS link - -* At _site/en/lessons/fetch-and-parse-data-with-openrefine.html:1363: - - http://text-processing.com/docs/sentiment.html is not an HTTPS link - -* At _site/en/lessons/fetch-and-parse-data-with-openrefine.html:1440: - - http://sentiment.vivekn.com/docs/api/ is not an HTTPS link - -* At _site/en/lessons/fetch-and-parse-data-with-openrefine.html:1465: - - http://www.nltk.org/book/ch06.html is not an HTTPS link - -* At _site/en/lessons/fetch-and-parse-data-with-openrefine.html:1465: - - http://www.nltk.org/book/ is not an HTTPS link - -* At _site/en/lessons/fetch-and-parse-data-with-openrefine.html:1481: - - http://web.archive.org/web/20150528125345/http://davidhuynh.net/spaces/nicar2011/tutorial.pdf is not an HTTPS link - -* At _site/en/lessons/fetch-and-parse-data-with-openrefine.html:1484: - - http://text-processing.com/docs/index.html is not an HTTPS link - -* At _site/en/lessons/fetch-and-parse-data-with-openrefine.html:1487: - - http://text-processing.com/demo/sentiment/ is not an HTTPS link - -* At _site/en/lessons/finding-places-world-historical-gazetteer.html:119: - - 'a' tag is missing a reference - -* At _site/en/lessons/finding-places-world-historical-gazetteer.html:138: - - 'a' tag is missing a reference - -* At _site/en/lessons/finding-places-world-historical-gazetteer.html:175: - - 'a' tag is missing a reference - -* At _site/en/lessons/finding-places-world-historical-gazetteer.html:201: - - 'a' tag is missing a reference - -* At _site/en/lessons/from-html-to-list-of-words-1.html:119: - - 'a' tag is missing a reference - -* At _site/en/lessons/from-html-to-list-of-words-1.html:138: - - 'a' tag is missing a reference - -* At _site/en/lessons/from-html-to-list-of-words-1.html:175: - - 'a' tag is missing a reference - -* At _site/en/lessons/from-html-to-list-of-words-1.html:201: - - 'a' tag is missing a reference - -* At _site/en/lessons/from-html-to-list-of-words-1.html:517: - - http://www.oldbaileyonline.org/browse.jsp?id=t17800628-33&div=t17800628-33 is not an HTTPS link - -* At _site/en/lessons/from-html-to-list-of-words-1.html:532: - - http://www.w3schools.com/html/ is not an HTTPS link - -* At _site/en/lessons/from-html-to-list-of-words-2.html:119: - - 'a' tag is missing a reference - -* At _site/en/lessons/from-html-to-list-of-words-2.html:138: - - 'a' tag is missing a reference - -* At _site/en/lessons/from-html-to-list-of-words-2.html:175: - - 'a' tag is missing a reference - -* At _site/en/lessons/from-html-to-list-of-words-2.html:201: - - 'a' tag is missing a reference - -* At _site/en/lessons/from-html-to-list-of-words-2.html:725: - - http://docs.python.org/2.4/lib/typesnumeric.html is not an HTTPS link - -* At _site/en/lessons/from-html-to-list-of-words-2.html:741: - - http://docs.python.org/3/library/types.html is not an HTTPS link - -* At _site/en/lessons/generating-an-ordered-data-set-from-an-OCR-text-file.html:117: - - 'a' tag is missing a reference - -* At _site/en/lessons/generating-an-ordered-data-set-from-an-OCR-text-file.html:136: - - 'a' tag is missing a reference - -* At _site/en/lessons/generating-an-ordered-data-set-from-an-OCR-text-file.html:173: - - 'a' tag is missing a reference - -* At _site/en/lessons/generating-an-ordered-data-set-from-an-OCR-text-file.html:199: - - 'a' tag is missing a reference - -* At _site/en/lessons/generating-an-ordered-data-set-from-an-OCR-text-file.html:523: - - http://en.wikipedia.org/wiki/Regular_language is not an HTTPS link - -* At _site/en/lessons/generating-an-ordered-data-set-from-an-OCR-text-file.html:529: - - http://www.worldcat.org/oclc/17591390 is not an HTTPS link - -* At _site/en/lessons/generating-an-ordered-data-set-from-an-OCR-text-file.html:685: - - http://en.wikipedia.org/wiki/Levenshtein_distance is not an HTTPS link - -* At _site/en/lessons/generating-an-ordered-data-set-from-an-OCR-text-file.html:1359: - - http://www.worldcat.org/oclc/41238508 is not an HTTPS link - -* At _site/en/lessons/generating-an-ordered-data-set-from-an-OCR-text-file.html:1387: - - http://www.egenix.com/products/python/mxBase/mxDateTime/ is not an HTTPS link - -* At _site/en/lessons/generating-an-ordered-data-set-from-an-OCR-text-file.html:1491: - - http://lxml.de/ is not an HTTPS link - -* At _site/en/lessons/generating-an-ordered-data-set-from-an-OCR-text-file.html:1596: - - http://brat.nlplab.org is not an HTTPS link - -* At _site/en/lessons/geocoding-qgis.html:118: - - 'a' tag is missing a reference - -* At _site/en/lessons/geocoding-qgis.html:137: - - 'a' tag is missing a reference - -* At _site/en/lessons/geocoding-qgis.html:174: - - 'a' tag is missing a reference - -* At _site/en/lessons/geocoding-qgis.html:200: - - 'a' tag is missing a reference - -* At _site/en/lessons/geocoding-qgis.html:517: - - http://www.qgis.org/en/site/ is not an HTTPS link - -* At _site/en/lessons/geocoding-qgis.html:561: - - http://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.html is not an HTTPS link - -* At _site/en/lessons/geocoding-qgis.html:563: - - http://www.british-history.ac.uk/alumni-oxon/1500-1714 is not an HTTPS link - -* At _site/en/lessons/geocoding-qgis.html:640: - - http://www.qgistutorials.com/en/docs/3/working_with_projections.html is not an HTTPS link - -* At _site/en/lessons/geocoding-qgis.html:643: - - http://www.county-borders.co.uk/ is not an HTTPS link - -* At _site/en/lessons/geocoding-qgis.html:714: - - http://wiki.gis.com/wiki/index.php/Classification is not an HTTPS link - -* At _site/en/lessons/geocoding-qgis.html:728: - - http://www.gazetteer.org.uk/index.php is not an HTTPS link - -* At _site/en/lessons/geocoding-qgis.html:740: - - http://www.county-borders.co.uk/ is not an HTTPS link - -* At _site/en/lessons/geocoding-qgis.html:797: - - http://michaelminn.com/linux/mmqgis/ is not an HTTPS link - -* At _site/en/lessons/geocoding-qgis.html:910: - - http://www.w3schools.com/sql/sql_like.asp is not an HTTPS link - -* At _site/en/lessons/geocoding-qgis.html:910: - - http://www.w3schools.com/sql/ is not an HTTPS link - -* At _site/en/lessons/geoparsing-text-with-edinburgh.html:117: - - 'a' tag is missing a reference - -* At _site/en/lessons/geoparsing-text-with-edinburgh.html:136: - - 'a' tag is missing a reference - -* At _site/en/lessons/geoparsing-text-with-edinburgh.html:173: - - 'a' tag is missing a reference - -* At _site/en/lessons/geoparsing-text-with-edinburgh.html:199: - - 'a' tag is missing a reference - -* At _site/en/lessons/geoparsing-text-with-edinburgh.html:488: - - http://jekyll.inf.ed.ac.uk/geoparser.html is not an HTTPS link - -* At _site/en/lessons/geoparsing-text-with-edinburgh.html:652: - - http://www.geonames.org/ is not an HTTPS link - -* At _site/en/lessons/geoparsing-text-with-edinburgh.html:652: - - http://groups.inf.ed.ac.uk/geoparser/documentation/v1.3/html/gaz.html is not an HTTPS link - -* At _site/en/lessons/geoparsing-text-with-edinburgh.html:659: - - http://groups.inf.ed.ac.uk/geoparser/documentation/v1.3/html/pipeline.html is not an HTTPS link - -* At _site/en/lessons/geoparsing-text-with-edinburgh.html:766: - - http://boundingbox.klokantech.com is not an HTTPS link - -* At _site/en/lessons/geoparsing-text-with-edinburgh.html:771: - - http://boundingbox.klokantech.com is not an HTTPS link - -* At _site/en/lessons/geoparsing-text-with-edinburgh.html:887: - - http://groups.inf.ed.ac.uk/geoparser/scripts/run-multiple-files.sh is not an HTTPS link - -* At _site/en/lessons/geoparsing-text-with-edinburgh.html:887: - - http://homepages.inf.ed.ac.uk/balex/publications/geoparser-workshop.pdf is not an HTTPS link - -* At _site/en/lessons/geoparsing-text-with-edinburgh.html:982: - - http://palimpsest.blogs.edina.ac.uk/ is not an HTTPS link - -* At _site/en/lessons/geoparsing-text-with-edinburgh.html:982: - - http://litlong.org/ is not an HTTPS link - -* At _site/en/lessons/geoparsing-text-with-edinburgh.html:984: - - http://web.archive.org/web/20170722115758/http://englishplacenames.cerch.kcl.ac.uk/ is not an HTTPS link - -* At _site/en/lessons/geoparsing-text-with-edinburgh.html:985: - - http://tradingconsequences.blogs.edina.ac.uk/ is not an HTTPS link - -* At _site/en/lessons/geoparsing-text-with-edinburgh.html:986: - - http://www.lancaster.ac.uk/staff/gregoryi/ is not an HTTPS link - -* At _site/en/lessons/geoparsing-text-with-edinburgh.html:989: - - http://nrabinowitz.github.io/gapvis/ is not an HTTPS link - -* At _site/en/lessons/geoparsing-text-with-edinburgh.html:993: - - http://www.lrec-conf.org/proceedings/lrec2016/pdf/129_Paper.pdf is not an HTTPS link - -* At _site/en/lessons/geoparsing-text-with-edinburgh.html:995: - - http://www.euppublishing.com/doi/pdfplus/10.3366/ijhac.2015.0136 is not an HTTPS link - -* At _site/en/lessons/geoparsing-text-with-edinburgh.html:1003: - - http://homepages.inf.ed.ac.uk/grover/papers/PTRS-A-2010-Grover-3875-89.pdf is not an HTTPS link - -* At _site/en/lessons/georeferencing-qgis.html:121: - - 'a' tag is missing a reference - -* At _site/en/lessons/georeferencing-qgis.html:140: - - 'a' tag is missing a reference - -* At _site/en/lessons/georeferencing-qgis.html:177: - - 'a' tag is missing a reference - -* At _site/en/lessons/georeferencing-qgis.html:203: - - 'a' tag is missing a reference - -* At _site/en/lessons/georeferencing-qgis.html:528: - - http://en.wikipedia.org/wiki/Rubbersheeting is not an HTTPS link - -* At _site/en/lessons/georeferencing-qgis.html:578: - - http://en.wikipedia.org/wiki/Spatial_reference_system is not an HTTPS link - -* At _site/en/lessons/georeferencing-qgis.html:605: - - http://www.gov.pe.ca/gis/license_agreement.php3?name=lot_town&file_format=SHP is not an HTTPS link - -* At _site/en/lessons/georeferencing-qgis.html:775: - - http://books.google.ca/books?id=TqCNZYXWXAUC&dq=tilting&source=gbs_navlinks_s is not an HTTPS link - -* At _site/en/lessons/georeferencing-qgis.html:943: - - http://en.wikipedia.org/wiki/World_file is not an HTTPS link - -* At _site/en/lessons/georeferencing-qgis.html:950: - - http://en.wikipedia.org/wiki/Tagged_Image_File_Format is not an HTTPS link - -* At _site/en/lessons/georeferencing-qgis.html:1129: - - http://geospatialhistorian.wordpress.com/ is not an HTTPS link - -* At _site/en/lessons/geospatial-data-analysis.html:120: - - 'a' tag is missing a reference - -* At _site/en/lessons/geospatial-data-analysis.html:139: - - 'a' tag is missing a reference - -* At _site/en/lessons/geospatial-data-analysis.html:176: - - 'a' tag is missing a reference - -* At _site/en/lessons/geospatial-data-analysis.html:202: - - 'a' tag is missing a reference - -* At _site/en/lessons/geospatial-data-analysis.html:495: - - http://www.ats.ucla.edu/stat/r/default.htm is not an HTTPS link - -* At _site/en/lessons/geospatial-data-analysis.html:570: - - http://geoservices.tamu.edu/Services/Geocode/ is not an HTTPS link - -* At _site/en/lessons/geospatial-data-analysis.html:616: - - http://r4ds.had.co.nz/transform.html is not an HTTPS link - -* At _site/en/lessons/geospatial-data-analysis.html:632: - - http://web.archive.org/web/20190922234254/http://strimas.com/r/tidy-sf/ is not an HTTPS link - -* At _site/en/lessons/geospatial-data-analysis.html:722: - - http://www.theanalysisfactor.com/regression-models-for-count-data/ is not an HTTPS link - -* At _site/en/lessons/geospatial-data-analysis.html:766: - - http://web.archive.org/web/20200225021219/https://www.nceas.ucsb.edu/~frazier/RSpatialGuides/OverviewCoordinateReferenceSystems.pdf is not an HTTPS link - -* At _site/en/lessons/geospatial-data-analysis.html:772: - - http://www.statisticshowto.com/probability-and-statistics/normal-distributions/ is not an HTTPS link - -* At _site/en/lessons/geospatial-data-analysis.html:775: - - http://www.sciencedirect.com/science/article/pii/S0031405608000073 is not an HTTPS link - -* At _site/en/lessons/geospatial-data-analysis.html:775: - - http://www.biostathandbook.com/transformation.html is not an HTTPS link - -* At _site/en/lessons/getting-started-with-markdown.html:117: - - 'a' tag is missing a reference - -* At _site/en/lessons/getting-started-with-markdown.html:136: - - 'a' tag is missing a reference - -* At _site/en/lessons/getting-started-with-markdown.html:173: - - 'a' tag is missing a reference - -* At _site/en/lessons/getting-started-with-markdown.html:199: - - 'a' tag is missing a reference - -* At _site/en/lessons/getting-started-with-markdown.html:528: - - http://daringfireball.net/projects/markdown/ is not an HTTPS link - -* At _site/en/lessons/getting-started-with-markdown.html:532: - - http://github.com is not an HTTPS link - -* At _site/en/lessons/getting-started-with-markdown.html:727: - - http://programminghistorian.org/ is not an HTTPS link - -* At _site/en/lessons/getting-started-with-mysql-using-r.html:117: - - 'a' tag is missing a reference - -* At _site/en/lessons/getting-started-with-mysql-using-r.html:136: - - 'a' tag is missing a reference - -* At _site/en/lessons/getting-started-with-mysql-using-r.html:173: - - 'a' tag is missing a reference - -* At _site/en/lessons/getting-started-with-mysql-using-r.html:199: - - 'a' tag is missing a reference - -* At _site/en/lessons/getting-started-with-mysql-using-r.html:482: - - http://www.jason-french.com/blog/2014/07/03/using-r-with-mysql-databases/ is not an HTTPS link - -* At _site/en/lessons/getting-started-with-mysql-using-r.html:595: - - http://newspapers.library.wales is not an HTTPS link - -* At _site/en/lessons/getting-started-with-mysql-using-r.html:848: - - http://dev.mysql.com/downloads/workbench/ is not an HTTPS link - -* At _site/en/lessons/getting-started-with-mysql-using-r.html:1147: - - http://newspapers.library.wales/view/4121281/4121288/94/ is not an HTTPS link - -* At _site/en/lessons/getting-started-with-mysql-using-r.html:1686: - - http://www.jeffblackadar.ca/graham_fellowship/corpus_entities_equity/ is not an HTTPS link - -* At _site/en/lessons/getting-started-with-mysql-using-r.html:1689: - - http://web.archive.org/web/20171228130133/https://www.ntu.edu.sg/home/ehchua/programming/sql/MySQL_Beginner.html is not an HTTPS link - -* At _site/en/lessons/getting-started-with-mysql-using-r.html:1696: - - http://grahamresearchfellow.org/ is not an HTTPS link - -* At _site/en/lessons/getting-started-with-mysql-using-r.html:1710: - - http://www.jason-french.com/blog/2014/07/03/using-r-with-mysql-databases/ is not an HTTPS link - -* At _site/en/lessons/googlemaps-googleearth.html:121: - - 'a' tag is missing a reference - -* At _site/en/lessons/googlemaps-googleearth.html:140: - - 'a' tag is missing a reference - -* At _site/en/lessons/googlemaps-googleearth.html:177: - - 'a' tag is missing a reference - -* At _site/en/lessons/googlemaps-googleearth.html:203: - - 'a' tag is missing a reference - -* At _site/en/lessons/googlemaps-googleearth.html:1181: - - http://www.davidrumsey.com/ is not an HTTPS link - -* At _site/en/lessons/googlemaps-googleearth.html:1444: - - http://niche-canada.org/2011/12/14/mobile-mapping-and-historical-gis-in-the-field/ is not an HTTPS link - -* At _site/en/lessons/googlemaps-googleearth.html:1593: - - http://geospatialhistorian.wordpress.com/ is not an HTTPS link - -* At _site/en/lessons/gravity-model.html:133: - - 'a' tag is missing a reference - -* At _site/en/lessons/gravity-model.html:152: - - 'a' tag is missing a reference - -* At _site/en/lessons/gravity-model.html:189: - - 'a' tag is missing a reference - -* At _site/en/lessons/gravity-model.html:215: - - 'a' tag is missing a reference - -* At _site/en/lessons/gravity-model.html:637: - - http://doi.org/10.5334/johd.1 is not an HTTPS link - -* At _site/en/lessons/gravity-model.html:661: - - http://www.migrants.adamcrymble.org/the-project/ is not an HTTPS link - -* At _site/en/lessons/gravity-model.html:664: - - http://doi.org/10.5334/johd.1 is not an HTTPS link - -* At _site/en/lessons/gravity-model.html:842: - - http://www.statisticshowto.com/population-mean/ is not an HTTPS link - -* At _site/en/lessons/gravity-model.html:1489: - - http://www.mathematica-journal.com/2013/06/negative-binomial-regression/ is not an HTTPS link - -* At _site/en/lessons/image-classification-neural-networks.html:117: - - 'a' tag is missing a reference - -* At _site/en/lessons/image-classification-neural-networks.html:136: - - 'a' tag is missing a reference - -* At _site/en/lessons/image-classification-neural-networks.html:173: - - 'a' tag is missing a reference - -* At _site/en/lessons/image-classification-neural-networks.html:199: - - 'a' tag is missing a reference - -* At _site/en/lessons/index.html:86: - - 'a' tag is missing a reference - -* At _site/en/lessons/index.html:105: - - 'a' tag is missing a reference - -* At _site/en/lessons/index.html:142: - - 'a' tag is missing a reference - -* At _site/en/lessons/index.html:168: - - 'a' tag is missing a reference - -* At _site/en/lessons/installing-omeka.html:117: - - 'a' tag is missing a reference - -* At _site/en/lessons/installing-omeka.html:136: - - 'a' tag is missing a reference - -* At _site/en/lessons/installing-omeka.html:173: - - 'a' tag is missing a reference - -* At _site/en/lessons/installing-omeka.html:199: - - 'a' tag is missing a reference - -* At _site/en/lessons/installing-omeka.html:469: - - http://omeka.net is not an HTTPS link - -* At _site/en/lessons/installing-omeka.html:484: - - http://aws.amazon.com/free/ is not an HTTPS link - -* At _site/en/lessons/installing-omeka.html:484: - - http://www.hostgator.com/ is not an HTTPS link - -* At _site/en/lessons/installing-omeka.html:484: - - http://www.dreamhost.com is not an HTTPS link - -* At _site/en/lessons/installing-omeka.html:486: - - http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-launch-instance_linux.html is not an HTTPS link - -* At _site/en/lessons/installing-omeka.html:492: - - http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/AccessingInstancesLinux.html is not an HTTPS link - -* At _site/en/lessons/installing-omeka.html:500: - - http://support.hostgator.com/articles/hosting-guide/lets-get-started/how-do-i-get-and-use-ssh-access is not an HTTPS link - -* At _site/en/lessons/installing-python-modules-pip.html:117: - - 'a' tag is missing a reference - -* At _site/en/lessons/installing-python-modules-pip.html:136: - - 'a' tag is missing a reference - -* At _site/en/lessons/installing-python-modules-pip.html:173: - - 'a' tag is missing a reference - -* At _site/en/lessons/installing-python-modules-pip.html:199: - - 'a' tag is missing a reference - -* At _site/en/lessons/installing-python-modules-pip.html:531: - - http://www.thegeekstuff.com/2012/04/curl-examples/ is not an HTTPS link - -* At _site/en/lessons/installing-python-modules-pip.html:578: - - http://stackoverflow.com/questions/4750806/how-to-install-pip-on-windows is not an HTTPS link - -* At _site/en/lessons/interactive-data-visualization-dashboard.html:117: - - 'a' tag is missing a reference - -* At _site/en/lessons/interactive-data-visualization-dashboard.html:136: - - 'a' tag is missing a reference - -* At _site/en/lessons/interactive-data-visualization-dashboard.html:173: - - 'a' tag is missing a reference - -* At _site/en/lessons/interactive-data-visualization-dashboard.html:199: - - 'a' tag is missing a reference - -* At _site/en/lessons/interactive-text-games-using-twine.html:117: - - 'a' tag is missing a reference - -* At _site/en/lessons/interactive-text-games-using-twine.html:136: - - 'a' tag is missing a reference - -* At _site/en/lessons/interactive-text-games-using-twine.html:173: - - 'a' tag is missing a reference - -* At _site/en/lessons/interactive-text-games-using-twine.html:199: - - 'a' tag is missing a reference - -* At _site/en/lessons/interactive-text-games-using-twine.html:576: - - http://www.depressionquest.com/ is not an HTTPS link - -* At _site/en/lessons/interactive-text-games-using-twine.html:643: - - http://twinery.org/ is not an HTTPS link - -* At _site/en/lessons/interactive-text-games-using-twine.html:1076: - - http://www.depressionquest.com/ is not an HTTPS link - -* At _site/en/lessons/interactive-visualization-with-plotly.html:117: - - 'a' tag is missing a reference - -* At _site/en/lessons/interactive-visualization-with-plotly.html:136: - - 'a' tag is missing a reference - -* At _site/en/lessons/interactive-visualization-with-plotly.html:173: - - 'a' tag is missing a reference - -* At _site/en/lessons/interactive-visualization-with-plotly.html:199: - - 'a' tag is missing a reference - -* At _site/en/lessons/interrogating-national-narrative-gpt.html:117: - - 'a' tag is missing a reference - -* At _site/en/lessons/interrogating-national-narrative-gpt.html:136: - - 'a' tag is missing a reference - -* At _site/en/lessons/interrogating-national-narrative-gpt.html:173: - - 'a' tag is missing a reference - -* At _site/en/lessons/interrogating-national-narrative-gpt.html:199: - - 'a' tag is missing a reference - -* At _site/en/lessons/intro-to-bash.html:119: - - 'a' tag is missing a reference - -* At _site/en/lessons/intro-to-bash.html:138: - - 'a' tag is missing a reference - -* At _site/en/lessons/intro-to-bash.html:175: - - 'a' tag is missing a reference - -* At _site/en/lessons/intro-to-bash.html:201: - - 'a' tag is missing a reference - -* At _site/en/lessons/intro-to-bash.html:528: - - http://www.tldp.org/LDP/Bash-Beginners-Guide/html/chap_01.html is not an HTTPS link - -* At _site/en/lessons/intro-to-bash.html:530: - - http://en.wikipedia.org/wiki/Syntax is not an HTTPS link - -* At _site/en/lessons/intro-to-bash.html:532: - - http://en.wikipedia.org/wiki/Unix_shell is not an HTTPS link - -* At _site/en/lessons/intro-to-bash.html:532: - - http://en.wikipedia.org/wiki/Unix is not an HTTPS link - -* At _site/en/lessons/intro-to-bash.html:578: - - http://ethanschoonover.com/solarized is not an HTTPS link - -* At _site/en/lessons/intro-to-bash.html:714: - - http://en.wikipedia.org/wiki/Tree_structure is not an HTTPS link - -* At _site/en/lessons/intro-to-bash.html:738: - - http://www.viemu.com/a-why-vi-vim.html is not an HTTPS link - -* At _site/en/lessons/intro-to-bash.html:750: - - http://www.gutenberg.org/ebooks/2600 is not an HTTPS link - -* At _site/en/lessons/intro-to-bash.html:824: - - http://en.wikipedia.org/wiki/Vim_%28text_editor%29 is not an HTTPS link - -* At _site/en/lessons/intro-to-bash.html:848: - - http://vimdoc.sourceforge.net/htmldoc/quickref.html is not an HTTPS link - -* At _site/en/lessons/intro-to-linked-data.html:118: - - 'a' tag is missing a reference - -* At _site/en/lessons/intro-to-linked-data.html:137: - - 'a' tag is missing a reference - -* At _site/en/lessons/intro-to-linked-data.html:174: - - 'a' tag is missing a reference - -* At _site/en/lessons/intro-to-linked-data.html:200: - - 'a' tag is missing a reference - -* At _site/en/lessons/intro-to-linked-data.html:531: - - http://linkeddatacatalog.dws.informatik.uni-mannheim.de/state/ is not an HTTPS link - -* At _site/en/lessons/intro-to-linked-data.html:560: - - http://www.oxforddnb.com is not an HTTPS link - -* At _site/en/lessons/intro-to-linked-data.html:572: - - http://www.geonames.org/ is not an HTTPS link - -* At _site/en/lessons/intro-to-linked-data.html:673: - - http://www.history.ac.uk/projects/digital/tobias is not an HTTPS link - -* At _site/en/lessons/intro-to-linked-data.html:707: - - http://semanticweb.org/wiki/Main_Page.html is not an HTTPS link - -* At _site/en/lessons/intro-to-linked-data.html:709: - - http://web.archive.org/web/20170715094229/http://www.musicontology.com/ is not an HTTPS link - -* At _site/en/lessons/intro-to-linked-data.html:709: - - http://web.archive.org/web/20170718143925/http://musicontology.com/docs/getting-started.html is not an HTTPS link - -* At _site/en/lessons/intro-to-linked-data.html:827: - - http://web.archive.org/web/20170718143925/http://musicontology.com/docs/getting-started.html is not an HTTPS link - -* At _site/en/lessons/intro-to-linked-data.html:862: - - http://www.history.ac.uk/projects/digital/tobias is not an HTTPS link - -* At _site/en/lessons/intro-to-linked-data.html:883: - - http://www.easyrdf.org/converter is not an HTTPS link - -* At _site/en/lessons/intro-to-linked-data.html:895: - - http://dbpedia.org/snorql/ is not an HTTPS link - -* At _site/en/lessons/intro-to-linked-data.html:995: - - http://dbpedia.org/class/yago/WikicatBritishHistorians is not an HTTPS link - -* At _site/en/lessons/intro-to-linked-data.html:995: - - http://dbpedia.org/class/yago/WikicatWomenHistorians is not an HTTPS link - -* At _site/en/lessons/intro-to-linked-data.html:1024: - - 'a' tag is missing a reference - -* At _site/en/lessons/intro-to-linked-data.html:1024: - - 'a' tag is missing a reference - -* At _site/en/lessons/intro-to-linked-data.html:1034: - - http://www.snee.com/bobdc.blog/ is not an HTTPS link - -* At _site/en/lessons/intro-to-linked-data.html:1038: - - http://linkeddata.org/guides-and-tutorials is not an HTTPS link - -* At _site/en/lessons/intro-to-linked-data.html:1040: - - http://linkeddatacatalog.dws.informatik.uni-mannheim.de/state/ is not an HTTPS link - -* At _site/en/lessons/intro-to-linked-data.html:1046: - - http://www.history.ac.uk/projects/digital/tobias is not an HTTPS link - -* At _site/en/lessons/intro-to-linked-data.html:1046: - - http://www.ahrc.ac.uk/ is not an HTTPS link - -* At _site/en/lessons/intro-to-powershell.html:117: - - 'a' tag is missing a reference - -* At _site/en/lessons/intro-to-powershell.html:136: - - 'a' tag is missing a reference - -* At _site/en/lessons/intro-to-powershell.html:173: - - 'a' tag is missing a reference - -* At _site/en/lessons/intro-to-powershell.html:199: - - 'a' tag is missing a reference - -* At _site/en/lessons/intro-to-twitterbots.html:121: - - 'a' tag is missing a reference - -* At _site/en/lessons/intro-to-twitterbots.html:140: - - 'a' tag is missing a reference - -* At _site/en/lessons/intro-to-twitterbots.html:177: - - 'a' tag is missing a reference - -* At _site/en/lessons/intro-to-twitterbots.html:203: - - 'a' tag is missing a reference - -* At _site/en/lessons/intro-to-twitterbots.html:518: - - http://tracery.io is not an HTTPS link - -* At _site/en/lessons/intro-to-twitterbots.html:518: - - http://cheapbotsdonequick.com/ is not an HTTPS link - -* At _site/en/lessons/intro-to-twitterbots.html:521: - - http://www.sciencedirect.com/science/article/pii/S0747563213003129 is not an HTTPS link - -* At _site/en/lessons/intro-to-twitterbots.html:521: - - http://www.docnow.io/ is not an HTTPS link - -* At _site/en/lessons/intro-to-twitterbots.html:589: - - http://json.org/ is not an HTTPS link - -* At _site/en/lessons/intro-to-twitterbots.html:593: - - http://twitter.com/tinyarchae is not an HTTPS link - -* At _site/en/lessons/intro-to-twitterbots.html:593: - - http://web.archive.org/web/20180131161516/https://publicarchaeologyconference.wordpress.com/ is not an HTTPS link - -* At _site/en/lessons/intro-to-twitterbots.html:603: - - http://tracery.io is not an HTTPS link - -* At _site/en/lessons/intro-to-twitterbots.html:769: - - http://apps.twitter.com is not an HTTPS link - -* At _site/en/lessons/intro-to-twitterbots.html:771: - - http://cheapbotsdonequick.com/ is not an HTTPS link - -* At _site/en/lessons/intro-to-twitterbots.html:821: - - http://tinysubversions.com/2013/03/basic-twitter-bot-etiquette/ is not an HTTPS link - -* At _site/en/lessons/intro-to-twitterbots.html:845: - - http://unicode.org/emoji/charts/full-emoji-list.html is not an HTTPS link - -* At _site/en/lessons/intro-to-twitterbots.html:868: - - http://www.crystalcodepalace.com/traceryTut.html is not an HTTPS link - -* At _site/en/lessons/intro-to-twitterbots.html:884: - - http://cheapbotsdonequick.com/ is not an HTTPS link - -* At _site/en/lessons/intro-to-twitterbots.html:923: - - http://cheapbotsdonequick.com/source/softlandscapes is not an HTTPS link - -* At _site/en/lessons/intro-to-twitterbots.html:942: - - http://www.codingblocks.net/videos/generating-music-in-javascript/ is not an HTTPS link - -* At _site/en/lessons/intro-to-twitterbots.html:947: - - http://www.zachwhalen.net/posts/how-to-make-a-twitter-bot-with-google-spreadsheets-version-04/ is not an HTTPS link - -* At _site/en/lessons/introduction-and-installation.html:119: - - 'a' tag is missing a reference - -* At _site/en/lessons/introduction-and-installation.html:138: - - 'a' tag is missing a reference - -* At _site/en/lessons/introduction-and-installation.html:175: - - 'a' tag is missing a reference - -* At _site/en/lessons/introduction-and-installation.html:201: - - 'a' tag is missing a reference - -* At _site/en/lessons/introduction-and-installation.html:513: - - http://www.python.org/ is not an HTTPS link - -* At _site/en/lessons/introduction-and-installation.html:514: - - http://www.crummy.com/software/BeautifulSoup/ is not an HTTPS link - -* At _site/en/lessons/introduction-and-installation.html:515: - - http://www.activestate.com/komodo-edit is not an HTTPS link - -* At _site/en/lessons/introduction-and-installation.html:517: - - http://wiki.python.org/moin/PythonEditors/ is not an HTTPS link - -* At _site/en/lessons/introduction-and-installation.html:526: - - http://sebastianraschka.com/Articles/2014_python_2_3_key_diff.html is not an HTTPS link - -* At _site/en/lessons/introduction-map-warper.html:121: - - 'a' tag is missing a reference - -* At _site/en/lessons/introduction-map-warper.html:140: - - 'a' tag is missing a reference - -* At _site/en/lessons/introduction-map-warper.html:177: - - 'a' tag is missing a reference - -* At _site/en/lessons/introduction-map-warper.html:203: - - 'a' tag is missing a reference - -* At _site/en/lessons/introduction-to-ffmpeg.html:117: - - 'a' tag is missing a reference - -* At _site/en/lessons/introduction-to-ffmpeg.html:136: - - 'a' tag is missing a reference - -* At _site/en/lessons/introduction-to-ffmpeg.html:173: - - 'a' tag is missing a reference - -* At _site/en/lessons/introduction-to-ffmpeg.html:199: - - 'a' tag is missing a reference - -* At _site/en/lessons/introduction-to-ffmpeg.html:574: - - http://linuxbrew.sh/ is not an HTTPS link - -* At _site/en/lessons/introduction-to-ffmpeg.html:621: - - http://linuxbrew.sh/ is not an HTTPS link - -* At _site/en/lessons/introduction-to-ffmpeg.html:1081: - - http://tldp.org/LDP/GNU-Linux-Tools-Summary/html/x11655.htm is not an HTTPS link - -* At _site/en/lessons/introduction-to-ffmpeg.html:1082: - - http://tldp.org/HOWTO/Bash-Prog-Intro-HOWTO-5.html is not an HTTPS link - -* At _site/en/lessons/introduction-to-populating-a-website-with-api-data.html:120: - - 'a' tag is missing a reference - -* At _site/en/lessons/introduction-to-populating-a-website-with-api-data.html:139: - - 'a' tag is missing a reference - -* At _site/en/lessons/introduction-to-populating-a-website-with-api-data.html:176: - - 'a' tag is missing a reference - -* At _site/en/lessons/introduction-to-populating-a-website-with-api-data.html:202: - - 'a' tag is missing a reference - -* At _site/en/lessons/introduction-to-populating-a-website-with-api-data.html:512: - - http://php.net/ is not an HTTPS link - -* At _site/en/lessons/introduction-to-populating-a-website-with-api-data.html:636: - - http://jsonviewer.stack.hu/ is not an HTTPS link - -* At _site/en/lessons/introduction-to-populating-a-website-with-api-data.html:661: - - http://json.org/ is not an HTTPS link - -* At _site/en/lessons/introduction-to-populating-a-website-with-api-data.html:717: - - http://uurl.kbr.be/1017835 is not an HTTPS link - -* At _site/en/lessons/introduction-to-populating-a-website-with-api-data.html:732: - - http://www.europeana.eu/portal/record/90402/RP_P_OB_84_508.html is not an HTTPS link - -* At _site/en/lessons/introduction-to-populating-a-website-with-api-data.html:741: - - http://dublincore.org/documents/dcmi-terms/ is not an HTTPS link - -* At _site/en/lessons/introduction-to-populating-a-website-with-api-data.html:821: - - http://localhost/dashboard is not an HTTPS link - -* At _site/en/lessons/introduction-to-populating-a-website-with-api-data.html:859: - - http://localhost/helloworld.php is not an HTTPS link - -* At _site/en/lessons/introduction-to-populating-a-website-with-api-data.html:1245: - - http://developer.nytimes.com/ is not an HTTPS link - -* At _site/en/lessons/introduction-to-populating-a-website-with-api-data.html:1248: - - http://www.geonames.org/export/web-services.html is not an HTTPS link - -* At _site/en/lessons/introduction-to-populating-a-website-with-api-data.html:1251: - - http://museum-api.pbworks.com/w/page/21933420/Museum%C2%A0APIs is not an HTTPS link - -* At _site/en/lessons/introduction-to-stylometry-with-python.html:120: - - 'a' tag is missing a reference - -* At _site/en/lessons/introduction-to-stylometry-with-python.html:139: - - 'a' tag is missing a reference - -* At _site/en/lessons/introduction-to-stylometry-with-python.html:176: - - 'a' tag is missing a reference - -* At _site/en/lessons/introduction-to-stylometry-with-python.html:202: - - 'a' tag is missing a reference - -* At _site/en/lessons/introduction-to-stylometry-with-python.html:568: - - http://www.gutenberg.org/cache/epub/1404/pg1404.txt is not an HTTPS link - -* At _site/en/lessons/introduction-to-stylometry-with-python.html:584: - - http://www.nltk.org/book/ is not an HTTPS link - -* At _site/en/lessons/introduction-to-stylometry-with-python.html:588: - - http://www.nltk.org/book/ is not an HTTPS link - -* At _site/en/lessons/introduction-to-stylometry-with-python.html:724: - - http://jupyter.org/ is not an HTTPS link - -* At _site/en/lessons/introduction-to-stylometry-with-python.html:724: - - http://jupyterlab.readthedocs.io/en/stable/getting_started/installation.html is not an HTTPS link - -* At _site/en/lessons/introduction-to-stylometry-with-python.html:935: - - http://www.cis.uni-muenchen.de/~schmid/tools/TreeTagger/ is not an HTTPS link - -* At _site/en/lessons/json-and-jq.html:117: - - 'a' tag is missing a reference - -* At _site/en/lessons/json-and-jq.html:136: - - 'a' tag is missing a reference - -* At _site/en/lessons/json-and-jq.html:173: - - 'a' tag is missing a reference - -* At _site/en/lessons/json-and-jq.html:199: - - 'a' tag is missing a reference - -* At _site/en/lessons/json-and-jq.html:520: - - http://www.json.org/ is not an HTTPS link - -* At _site/en/lessons/json-and-jq.html:731: - - http://stackoverflow.com/questions/3135325/why-do-vector-indices-in-r-start-with-1-instead-of-0 is not an HTTPS link - -* At _site/en/lessons/json-and-jq.html:731: - - http://skillcrush.com/2013/01/17/why-programmers-start-counting-at-zero/ is not an HTTPS link - -* At _site/en/lessons/json-and-jq.html:1416: - - http://brew.sh/ is not an HTTPS link - -* At _site/en/lessons/json-and-jq.html:1467: - - http://stackoverflow.com/questions/tagged/jq is not an HTTPS link - -* At _site/en/lessons/json-and-jq.html:1468: - - http://stackoverflow.com/help/how-to-ask is not an HTTPS link - -* At _site/en/lessons/json-and-jq.html:1468: - - http://stackoverflow.com/help/mcve is not an HTTPS link - -* At _site/en/lessons/jupyter-notebooks.html:121: - - 'a' tag is missing a reference - -* At _site/en/lessons/jupyter-notebooks.html:140: - - 'a' tag is missing a reference - -* At _site/en/lessons/jupyter-notebooks.html:177: - - 'a' tag is missing a reference - -* At _site/en/lessons/jupyter-notebooks.html:203: - - 'a' tag is missing a reference - -* At _site/en/lessons/jupyter-notebooks.html:651: - - 'a' tag is missing a reference - -* At _site/en/lessons/jupyter-notebooks.html:879: - - 'a' tag is missing a reference - -* At _site/en/lessons/keywords-in-context-using-n-grams.html:120: - - 'a' tag is missing a reference - -* At _site/en/lessons/keywords-in-context-using-n-grams.html:139: - - 'a' tag is missing a reference - -* At _site/en/lessons/keywords-in-context-using-n-grams.html:176: - - 'a' tag is missing a reference - -* At _site/en/lessons/keywords-in-context-using-n-grams.html:202: - - 'a' tag is missing a reference - -* At _site/en/lessons/linear-regression.html:133: - - 'a' tag is missing a reference - -* At _site/en/lessons/linear-regression.html:152: - - 'a' tag is missing a reference - -* At _site/en/lessons/linear-regression.html:189: - - 'a' tag is missing a reference - -* At _site/en/lessons/linear-regression.html:215: - - 'a' tag is missing a reference - -* At _site/en/lessons/linux-installation.html:119: - - 'a' tag is missing a reference - -* At _site/en/lessons/linux-installation.html:138: - - 'a' tag is missing a reference - -* At _site/en/lessons/linux-installation.html:175: - - 'a' tag is missing a reference - -* At _site/en/lessons/linux-installation.html:201: - - 'a' tag is missing a reference - -* At _site/en/lessons/logistic-regression.html:133: - - 'a' tag is missing a reference - -* At _site/en/lessons/logistic-regression.html:152: - - 'a' tag is missing a reference - -* At _site/en/lessons/logistic-regression.html:189: - - 'a' tag is missing a reference - -* At _site/en/lessons/logistic-regression.html:215: - - 'a' tag is missing a reference - -* At _site/en/lessons/logistic-regression.html:1993: - - http://scikit-learn.org/stable/install.html is not an HTTPS link - -* At _site/en/lessons/mac-installation.html:119: - - 'a' tag is missing a reference - -* At _site/en/lessons/mac-installation.html:138: - - 'a' tag is missing a reference - -* At _site/en/lessons/mac-installation.html:175: - - 'a' tag is missing a reference - -* At _site/en/lessons/mac-installation.html:201: - - 'a' tag is missing a reference - -* At _site/en/lessons/mac-installation.html:498: - - http://support.apple.com/kb/ht1427 is not an HTTPS link - -* At _site/en/lessons/mac-installation.html:504: - - http://www.python.org/ is not an HTTPS link - -* At _site/en/lessons/mac-installation.html:517: - - http://wiki.python.org/moin/PythonEditors/ is not an HTTPS link - -* At _site/en/lessons/manipulating-strings-in-python.html:119: - - 'a' tag is missing a reference - -* At _site/en/lessons/manipulating-strings-in-python.html:138: - - 'a' tag is missing a reference - -* At _site/en/lessons/manipulating-strings-in-python.html:175: - - 'a' tag is missing a reference - -* At _site/en/lessons/manipulating-strings-in-python.html:201: - - 'a' tag is missing a reference - -* At _site/en/lessons/mapping-with-python-leaflet.html:117: - - 'a' tag is missing a reference - -* At _site/en/lessons/mapping-with-python-leaflet.html:136: - - 'a' tag is missing a reference - -* At _site/en/lessons/mapping-with-python-leaflet.html:173: - - 'a' tag is missing a reference - -* At _site/en/lessons/mapping-with-python-leaflet.html:199: - - 'a' tag is missing a reference - -* At _site/en/lessons/mapping-with-python-leaflet.html:511: - - http://pip.readthedocs.org/en/stable/ is not an HTTPS link - -* At _site/en/lessons/mapping-with-python-leaflet.html:511: - - http://pandas.pydata.org/pandas-docs/stable/dsintro.html#dataframe is not an HTTPS link - -* At _site/en/lessons/mapping-with-python-leaflet.html:512: - - http://leafletjs.com/ is not an HTTPS link - -* At _site/en/lessons/mapping-with-python-leaflet.html:513: - - http://geojson.io/ is not an HTTPS link - -* At _site/en/lessons/mapping-with-python-leaflet.html:522: - - http://www.barebones.com/products/textwrangler/ is not an HTTPS link - -* At _site/en/lessons/mapping-with-python-leaflet.html:522: - - http://www.sublimetext.com/ is not an HTTPS link - -* At _site/en/lessons/mapping-with-python-leaflet.html:524: - - http://docs.python-guide.org/en/latest/dev/virtualenvs/ is not an HTTPS link - -* At _site/en/lessons/mapping-with-python-leaflet.html:535: - - http://data.london.gov.uk/dataset/historic-census-population is not an HTTPS link - -* At _site/en/lessons/mapping-with-python-leaflet.html:555: - - http://pandas.pydata.org/pandas-docs/stable/dsintro.html#dataframe is not an HTTPS link - -* At _site/en/lessons/mapping-with-python-leaflet.html:557: - - http://pip.readthedocs.org/en/stable/ is not an HTTPS link - -* At _site/en/lessons/mapping-with-python-leaflet.html:561: - - http://pip.readthedocs.org/en/stable/installing/ is not an HTTPS link - -* At _site/en/lessons/mapping-with-python-leaflet.html:573: - - http://pandas.pydata.org/pandas-docs/stable/install.html#dependencies is not an HTTPS link - -* At _site/en/lessons/mapping-with-python-leaflet.html:611: - - http://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_csv.html is not an HTTPS link - -* At _site/en/lessons/mapping-with-python-leaflet.html:661: - - http://geopy.readthedocs.org/ is not an HTTPS link - -* At _site/en/lessons/mapping-with-python-leaflet.html:682: - - http://geopy.readthedocs.io/en/latest/#module-geopy.geocoders is not an HTTPS link - -* At _site/en/lessons/mapping-with-python-leaflet.html:682: - - http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.apply.html is not an HTTPS link - -* At _site/en/lessons/mapping-with-python-leaflet.html:792: - - http://geojson.io is not an HTTPS link - -* At _site/en/lessons/mapping-with-python-leaflet.html:896: - - http://geojson.io is not an HTTPS link - -* At _site/en/lessons/mapping-with-python-leaflet.html:1185: - - http://leafletjs.com/SlavaUkraini/reference-1.2.0.html#geojson-oneachfeature is not an HTTPS link - -* At _site/en/lessons/naive-bayesian.html:117: - - 'a' tag is missing a reference - -* At _site/en/lessons/naive-bayesian.html:136: - - 'a' tag is missing a reference - -* At _site/en/lessons/naive-bayesian.html:173: - - 'a' tag is missing a reference - -* At _site/en/lessons/naive-bayesian.html:199: - - 'a' tag is missing a reference - -* At _site/en/lessons/naive-bayesian.html:493: - - http://digitalhistoryhacks.blogspot.com/2008/05/naive-bayesian-in-old-bailey-part-1.html is not an HTTPS link - -* At _site/en/lessons/naive-bayesian.html:501: - - http://www.oldbaileyonline.org/ is not an HTTPS link - -* At _site/en/lessons/naive-bayesian.html:557: - - http://www.crummy.com/software/BeautifulSoup/ is not an HTTPS link - -* At _site/en/lessons/naive-bayesian.html:562: - - http://www.oldbaileyonline.org/ is not an HTTPS link - -* At _site/en/lessons/naive-bayesian.html:581: - - http://www.oldbaileyonline.org/forms/formMain.jsp is not an HTTPS link - -* At _site/en/lessons/naive-bayesian.html:593: - - http://en.wikipedia.org/wiki/Statistical_classification is not an HTTPS link - -* At _site/en/lessons/naive-bayesian.html:593: - - http://home.deib.polimi.it/matteucc/Clustering/tutorial_html/ is not an HTTPS link - -* At _site/en/lessons/naive-bayesian.html:616: - - http://www.paulgraham.com/spam.html is not an HTTPS link - -* At _site/en/lessons/naive-bayesian.html:696: - - http://www.yudkowsky.net/rational/bayes is not an HTTPS link - -* At _site/en/lessons/naive-bayesian.html:897: - - http://betterexplained.com/articles/using-logs-in-the-real-world/ is not an HTTPS link - -* At _site/en/lessons/naive-bayesian.html:915: - - http://support.sas.com/documentation/cdl/en/statug/63033/HTML/default/viewer.htm#statug_introbayes_sect004.htm is not an HTTPS link - -* At _site/en/lessons/naive-bayesian.html:1082: - - http://www.oldbaileyonline.org/static/DocAPI.jsp is not an HTTPS link - -* At _site/en/lessons/naive-bayesian.html:1098: - - http://www.oldbaileyonline.org/forms/formMain.jsp is not an HTTPS link - -* At _site/en/lessons/naive-bayesian.html:1310: - - http://pypy.org/ is not an HTTPS link - -* At _site/en/lessons/naive-bayesian.html:2014: - - http://snowball.tartarus.org/ is not an HTTPS link - -* At _site/en/lessons/naive-bayesian.html:2024: - - http://stevenloria.com/finding-important-words-in-a-document-using-tf-idf/ is not an HTTPS link - -* At _site/en/lessons/normalizing-data.html:119: - - 'a' tag is missing a reference - -* At _site/en/lessons/normalizing-data.html:138: - - 'a' tag is missing a reference - -* At _site/en/lessons/normalizing-data.html:175: - - 'a' tag is missing a reference - -* At _site/en/lessons/normalizing-data.html:201: - - 'a' tag is missing a reference - -* At _site/en/lessons/normalizing-data.html:538: - - http://www.oldbaileyonline.org/browse.jsp?id=t17800628-33&div=t17800628-33 is not an HTTPS link - -* At _site/en/lessons/normalizing-data.html:724: - - http://unicode.org/ is not an HTTPS link - -* At _site/en/lessons/ocr-with-google-vision-and-tesseract.html:117: - - 'a' tag is missing a reference - -* At _site/en/lessons/ocr-with-google-vision-and-tesseract.html:136: - - 'a' tag is missing a reference - -* At _site/en/lessons/ocr-with-google-vision-and-tesseract.html:173: - - 'a' tag is missing a reference - -* At _site/en/lessons/ocr-with-google-vision-and-tesseract.html:199: - - 'a' tag is missing a reference - -* At _site/en/lessons/output-data-as-html-file.html:119: - - 'a' tag is missing a reference - -* At _site/en/lessons/output-data-as-html-file.html:138: - - 'a' tag is missing a reference - -* At _site/en/lessons/output-data-as-html-file.html:175: - - 'a' tag is missing a reference - -* At _site/en/lessons/output-data-as-html-file.html:201: - - 'a' tag is missing a reference - -* At _site/en/lessons/output-keywords-in-context-in-html-file.html:119: - - 'a' tag is missing a reference - -* At _site/en/lessons/output-keywords-in-context-in-html-file.html:138: - - 'a' tag is missing a reference - -* At _site/en/lessons/output-keywords-in-context-in-html-file.html:175: - - 'a' tag is missing a reference - -* At _site/en/lessons/output-keywords-in-context-in-html-file.html:201: - - 'a' tag is missing a reference - -* At _site/en/lessons/preserving-your-research-data.html:117: - - 'a' tag is missing a reference - -* At _site/en/lessons/preserving-your-research-data.html:136: - - 'a' tag is missing a reference - -* At _site/en/lessons/preserving-your-research-data.html:173: - - 'a' tag is missing a reference - -* At _site/en/lessons/preserving-your-research-data.html:199: - - 'a' tag is missing a reference - -* At _site/en/lessons/preserving-your-research-data.html:580: - - http://en.wikipedia.org/wiki/PRINCE2 is not an HTTPS link - -* At _site/en/lessons/preserving-your-research-data.html:621: - - http://en.wikipedia.org/wiki/Cross-platform is not an HTTPS link - -* At _site/en/lessons/preserving-your-research-data.html:644: - - http://en.wikipedia.org/wiki/Markdown is not an HTTPS link - -* At _site/en/lessons/preserving-your-research-data.html:648: - - http://notepad-plus-plus.org/ is not an HTTPS link - -* At _site/en/lessons/preserving-your-research-data.html:650: - - http://komodoide.com/komodo-edit/ is not an HTTPS link - -* At _site/en/lessons/preserving-your-research-data.html:699: - - http://cradledincaricature.com/2014/02/06/comic-art-beyond-the-print-shop/ is not an HTTPS link - -* At _site/en/lessons/preserving-your-research-data.html:709: - - http://www.theguardian.com/uk-news/2014/feb/20/rebekah-brooks-rupert-murdoch-phone-hacking-trial is not an HTTPS link - -* At _site/en/lessons/preserving-your-research-data.html:719: - - http://www.cartoons.ac.uk/record/SBD0931 is not an HTTPS link - -* At _site/en/lessons/preserving-your-research-data.html:726: - - http://www.oldbaileyonline.org/browse.jsp?ref=OA16780417 is not an HTTPS link - -* At _site/en/lessons/preserving-your-research-data.html:901: - - http://historyonics.blogspot.co.uk/2014/01/judging-book-by-its-url.html is not an HTTPS link - -* At _site/en/lessons/preserving-your-research-data.html:905: - - http://earlymodernnotes.wordpress.com/2013/05/18/unclean-unclean-what-historians-can-do-about-sharing-our-messy-research-data/ is not an HTTPS link - -* At _site/en/lessons/preserving-your-research-data.html:918: - - http://britishlibrary.typepad.co.uk/collectioncare/2013/09/the-twelve-principles-of-digital-preservation.html is not an HTTPS link - -* At _site/en/lessons/preserving-your-research-data.html:927: - - http://data-archive.ac.uk/create-manage/document is not an HTTPS link - -* At _site/en/lessons/qgis-layers.html:121: - - 'a' tag is missing a reference - -* At _site/en/lessons/qgis-layers.html:140: - - 'a' tag is missing a reference - -* At _site/en/lessons/qgis-layers.html:177: - - 'a' tag is missing a reference - -* At _site/en/lessons/qgis-layers.html:203: - - 'a' tag is missing a reference - -* At _site/en/lessons/qgis-layers.html:529: - - http://qgis.org/en/site/forusers/download.html is not an HTTPS link - -* At _site/en/lessons/qgis-layers.html:539: - - http://www.kyngchaos.com/software/qgis is not an HTTPS link - -* At _site/en/lessons/qgis-layers.html:541: - - http://www.kyngchaos.com/software/archive is not an HTTPS link - -* At _site/en/lessons/qgis-layers.html:609: - - http://www.gov.pe.ca/gis/download.php3?name=coastline&file_format=SHP is not an HTTPS link - -* At _site/en/lessons/qgis-layers.html:610: - - http://www.gov.pe.ca/gis/download.php3?name=lot_town&file_format=SHP is not an HTTPS link - -* At _site/en/lessons/qgis-layers.html:611: - - http://www.gov.pe.ca/gis/download.php3?name=hydronetwork&file_format=SHP is not an HTTPS link - -* At _site/en/lessons/qgis-layers.html:612: - - http://www.gov.pe.ca/gis/download.php3?name=forest_35&file_format=SHP is not an HTTPS link - -* At _site/en/lessons/qgis-layers.html:613: - - http://www.gov.pe.ca/gis/download.php3?name=nat_parks&file_format=SHP is not an HTTPS link - -* At _site/en/lessons/qgis-layers.html:634: - - http://en.wikipedia.org/wiki/Spatial_reference_system is not an HTTPS link - -* At _site/en/lessons/qgis-layers.html:642: - - http://www.gov.pe.ca/gis/index.php3?number=77865&lang=E is not an HTTPS link - -* At _site/en/lessons/qgis-layers.html:737: - - http://web.archive.org/web/20180807132308/http://qgis.spatialthoughts.com/2012/04/tutorial-working-with-projections-in.html is not an HTTPS link - -* At _site/en/lessons/qgis-layers.html:768: - - http://www.gislounge.com/geodatabases-explored-vector-and-raster-data/ is not an HTTPS link - -* At _site/en/lessons/qgis-layers.html:1312: - - http://en.wikipedia.org/wiki/Orthophoto is not an HTTPS link - -* At _site/en/lessons/qgis-layers.html:1463: - - http://geospatialhistorian.wordpress.com/ is not an HTTPS link - -* At _site/en/lessons/r-basics-with-tabular-data.html:117: - - 'a' tag is missing a reference - -* At _site/en/lessons/r-basics-with-tabular-data.html:136: - - 'a' tag is missing a reference - -* At _site/en/lessons/r-basics-with-tabular-data.html:173: - - 'a' tag is missing a reference - -* At _site/en/lessons/r-basics-with-tabular-data.html:199: - - 'a' tag is missing a reference - -* At _site/en/lessons/r-basics-with-tabular-data.html:1016: - - http://web.archive.org/web/20191015004305/https://www.nceas.ucsb.edu/files/scicomp/Dloads/RProgramming/BestFirstRTutorial.pdf is not an HTTPS link - -* At _site/en/lessons/r-basics-with-tabular-data.html:1020: - - http://dh-r.lincolnmullen.com/ is not an HTTPS link - -* At _site/en/lessons/research-data-with-unix.html:119: - - 'a' tag is missing a reference - -* At _site/en/lessons/research-data-with-unix.html:138: - - 'a' tag is missing a reference - -* At _site/en/lessons/research-data-with-unix.html:175: - - 'a' tag is missing a reference - -* At _site/en/lessons/research-data-with-unix.html:201: - - 'a' tag is missing a reference - -* At _site/en/lessons/research-data-with-unix.html:506: - - http://msysgit.github.io/ is not an HTTPS link - -* At _site/en/lessons/research-data-with-unix.html:514: - - http://www.7-zip.org/ is not an HTTPS link - -* At _site/en/lessons/research-data-with-unix.html:536: - - http://en.wikipedia.org/wiki/Tab-separated_values is not an HTTPS link - -* At _site/en/lessons/research-data-with-unix.html:538: - - http://en.wikipedia.org/wiki/Comma-separated_values is not an HTTPS link - -* At _site/en/lessons/research-data-with-unix.html:597: - - http://williamjturkel.net/2013/06/15/basic-text-analysis-with-command-line-tools-in-linux/ is not an HTTPS link - -* At _site/en/lessons/research-data-with-unix.html:598: - - http://williamjturkel.net/2013/06/20/pattern-matching-and-permuted-term-indexing-with-command-line-tools-in-linux/ is not an HTTPS link - -* At _site/en/lessons/retired/OCR-and-Machine-Translation.html:87: - - 'a' tag is missing a reference - -* At _site/en/lessons/retired/OCR-and-Machine-Translation.html:106: - - 'a' tag is missing a reference - -* At _site/en/lessons/retired/OCR-and-Machine-Translation.html:143: - - 'a' tag is missing a reference - -* At _site/en/lessons/retired/OCR-and-Machine-Translation.html:169: - - 'a' tag is missing a reference - -* At _site/en/lessons/retired/OCR-and-Machine-Translation.html:523: - - http://www.fmwconcepts.com/imagemagick/textcleaner/index.php is not an HTTPS link - -* At _site/en/lessons/retired/OCR-with-Tesseract-and-ScanTailor.html:87: - - 'a' tag is missing a reference - -* At _site/en/lessons/retired/OCR-with-Tesseract-and-ScanTailor.html:106: - - 'a' tag is missing a reference - -* At _site/en/lessons/retired/OCR-with-Tesseract-and-ScanTailor.html:143: - - 'a' tag is missing a reference - -* At _site/en/lessons/retired/OCR-with-Tesseract-and-ScanTailor.html:169: - - 'a' tag is missing a reference - -* At _site/en/lessons/retired/OCR-with-Tesseract-and-ScanTailor.html:444: - - 'a' tag is missing a reference - -* At _site/en/lessons/retired/OCR-with-Tesseract-and-ScanTailor.html:488: - - 'a' tag is missing a reference - -* At _site/en/lessons/retired/OCR-with-Tesseract-and-ScanTailor.html:662: - - 'a' tag is missing a reference - -* At _site/en/lessons/retired/OCR-with-Tesseract-and-ScanTailor.html:667: - - http://www.7-zip.org/ is not an HTTPS link - -* At _site/en/lessons/retired/OCR-with-Tesseract-and-ScanTailor.html:788: - - 'a' tag is missing a reference - -* At _site/en/lessons/retired/counting-frequencies-from-zotero-items.html:87: - - 'a' tag is missing a reference - -* At _site/en/lessons/retired/counting-frequencies-from-zotero-items.html:106: - - 'a' tag is missing a reference - -* At _site/en/lessons/retired/counting-frequencies-from-zotero-items.html:143: - - 'a' tag is missing a reference - -* At _site/en/lessons/retired/counting-frequencies-from-zotero-items.html:169: - - 'a' tag is missing a reference - -* At _site/en/lessons/retired/creating-new-items-in-zotero.html:87: - - 'a' tag is missing a reference - -* At _site/en/lessons/retired/creating-new-items-in-zotero.html:106: - - 'a' tag is missing a reference - -* At _site/en/lessons/retired/creating-new-items-in-zotero.html:143: - - 'a' tag is missing a reference - -* At _site/en/lessons/retired/creating-new-items-in-zotero.html:169: - - 'a' tag is missing a reference - -* At _site/en/lessons/retired/getting-started-with-github-desktop.html:87: - - 'a' tag is missing a reference - -* At _site/en/lessons/retired/getting-started-with-github-desktop.html:106: - - 'a' tag is missing a reference - -* At _site/en/lessons/retired/getting-started-with-github-desktop.html:143: - - 'a' tag is missing a reference - -* At _site/en/lessons/retired/getting-started-with-github-desktop.html:169: - - 'a' tag is missing a reference - -* At _site/en/lessons/retired/getting-started-with-github-desktop.html:449: - - http://swcarpentry.github.io/git-novice/ is not an HTTPS link - -* At _site/en/lessons/retired/getting-started-with-github-desktop.html:611: - - http://flight-manual.atom.io/ is not an HTTPS link - -* At _site/en/lessons/retired/getting-started-with-github-desktop.html:728: - - http://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html is not an HTTPS link - -* At _site/en/lessons/retired/graph-databases-and-SPARQL.html:87: - - 'a' tag is missing a reference - -* At _site/en/lessons/retired/graph-databases-and-SPARQL.html:106: - - 'a' tag is missing a reference - -* At _site/en/lessons/retired/graph-databases-and-SPARQL.html:143: - - 'a' tag is missing a reference - -* At _site/en/lessons/retired/graph-databases-and-SPARQL.html:169: - - 'a' tag is missing a reference - -* At _site/en/lessons/retired/graph-databases-and-SPARQL.html:514: - - http://collection.britishmuseum.org is not an HTTPS link - -* At _site/en/lessons/retired/graph-databases-and-SPARQL.html:514: - - http://labs.europeana.eu/api/linked-open-data-introduction is not an HTTPS link - -* At _site/en/lessons/retired/graph-databases-and-SPARQL.html:515: - - http://americanart.si.edu is not an HTTPS link - -* At _site/en/lessons/retired/graph-databases-and-SPARQL.html:515: - - http://britishart.yale.edu/collections/using-collections/technology/linked-open-data is not an HTTPS link - -* At _site/en/lessons/retired/graph-databases-and-SPARQL.html:516: - - http://vocab.getty.edu is not an HTTPS link - -* At _site/en/lessons/retired/graph-databases-and-SPARQL.html:589: - - http://plot.ly is not an HTTPS link - -* At _site/en/lessons/retired/graph-databases-and-SPARQL.html:590: - - http://palladio.designhumanities.org/ is not an HTTPS link - -* At _site/en/lessons/retired/graph-databases-and-SPARQL.html:794: - - http://collection.britishmuseum.org/sparql is not an HTTPS link - -* At _site/en/lessons/retired/graph-databases-and-SPARQL.html:816: - - http://collection.britishmuseum.org/id/object/PPA82633 is not an HTTPS link - -* At _site/en/lessons/retired/graph-databases-and-SPARQL.html:830: - - http://collection.britishmuseum.org/sparql?query=SELECT+*%0D%0AWHERE+%7B%0D%0A++%3Chttp%3A%2F%2Fcollection.britishmuseum.org%2Fid%2Fobject%2FPPA82633%3E+%3Fp+%3Fo+.%0D%0A++%7D&_implicit=false&_equivalent=false&_form=%2Fsparql is not an HTTPS link - -* At _site/en/lessons/retired/graph-databases-and-SPARQL.html:1104: - - http://sparql.europeana.eu/ is not an HTTPS link - -* At _site/en/lessons/retired/graph-databases-and-SPARQL.html:1106: - - http://wiki.dbpedia.org/ is not an HTTPS link - -* At _site/en/lessons/retired/graph-databases-and-SPARQL.html:1107: - - http://sws.geonames.org/ is not an HTTPS link - -* At _site/en/lessons/retired/graph-databases-and-SPARQL.html:1111: - - http://sparql.europeana.eu/ is not an HTTPS link - -* At _site/en/lessons/retired/graph-databases-and-SPARQL.html:1186: - - http://openrefine.org/ is not an HTTPS link - -* At _site/en/lessons/retired/graph-databases-and-SPARQL.html:1189: - - http://stedolan.github.io/jq/download/ is not an HTTPS link - -* At _site/en/lessons/retired/graph-databases-and-SPARQL.html:1200: - - http://palladio.designhumanities.org/ is not an HTTPS link - -* At _site/en/lessons/retired/graph-databases-and-SPARQL.html:1272: - - http://en.wikibooks.org/wiki/XQuery/SPARQL_Tutorial is not an HTTPS link - -* At _site/en/lessons/retired/graph-databases-and-SPARQL.html:1281: - - http://labs.europeana.eu/api/linked-open-data-SPARQL-endpoint is not an HTTPS link - -* At _site/en/lessons/retired/graph-databases-and-SPARQL.html:1282: - - http://vocab.getty.edu/queries is not an HTTPS link - -* At _site/en/lessons/retired/intro-to-augmented-reality-with-unity.html:87: - - 'a' tag is missing a reference - -* At _site/en/lessons/retired/intro-to-augmented-reality-with-unity.html:106: - - 'a' tag is missing a reference - -* At _site/en/lessons/retired/intro-to-augmented-reality-with-unity.html:143: - - 'a' tag is missing a reference - -* At _site/en/lessons/retired/intro-to-augmented-reality-with-unity.html:169: - - 'a' tag is missing a reference - -* At _site/en/lessons/retired/intro-to-augmented-reality-with-unity.html:490: - - http://www.gizmag.com/ikea-augmented-reality-catalog-app/28703/ is not an HTTPS link - -* At _site/en/lessons/retired/intro-to-augmented-reality-with-unity.html:490: - - http://www.digi-capital.com/news/2015/04/augmentedvirtual-reality-to-hit-150-billion-disrupting-mobile-by-2020/#.VbetCU1VhHw is not an HTTPS link - -* At _site/en/lessons/retired/intro-to-augmented-reality-with-unity.html:508: - - http://www.tamikothiel.com/AR/clouding-green.html is not an HTTPS link - -* At _site/en/lessons/retired/intro-to-augmented-reality-with-unity.html:510: - - http://web.archive.org/web/20180421163517/http://english.ufl.edu/trace_arcs/ is not an HTTPS link - -* At _site/en/lessons/retired/intro-to-augmented-reality-with-unity.html:516: - - http://docs.unity3d.com/Manual/LearningtheInterface.html is not an HTTPS link - -* At _site/en/lessons/retired/intro-to-augmented-reality-with-unity.html:649: - - http://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.html is not an HTTPS link - -* At _site/en/lessons/retired/intro-to-augmented-reality-with-unity.html:1046: - - http://www.gimp.org/ is not an HTTPS link - -* At _site/en/lessons/retired/intro-to-augmented-reality-with-unity.html:1335: - - http://docs.unity3d.com/Manual/Transforms.html is not an HTTPS link - -* At _site/en/lessons/retired/intro-to-augmented-reality-with-unity.html:1392: - - http://developer.android.com/tools/device.html is not an HTTPS link - -* At _site/en/lessons/retired/intro-to-beautiful-soup.html:87: - - 'a' tag is missing a reference - -* At _site/en/lessons/retired/intro-to-beautiful-soup.html:106: - - 'a' tag is missing a reference - -* At _site/en/lessons/retired/intro-to-beautiful-soup.html:143: - - 'a' tag is missing a reference - -* At _site/en/lessons/retired/intro-to-beautiful-soup.html:169: - - 'a' tag is missing a reference - -* At _site/en/lessons/retired/intro-to-beautiful-soup.html:456: - - http://praxis.scholarslab.org/resources/bash/ is not an HTTPS link - -* At _site/en/lessons/retired/intro-to-beautiful-soup.html:463: - - http://www.crummy.com/software/BeautifulSoup/bs4/doc/ is not an HTTPS link - -* At _site/en/lessons/retired/intro-to-beautiful-soup.html:475: - - http://www.crummy.com/software/BeautifulSoup/bs4/doc/ is not an HTTPS link - -* At _site/en/lessons/retired/intro-to-beautiful-soup.html:601: - - http://urllib3.readthedocs.org/en/latest/ is not an HTTPS link - -* At _site/en/lessons/retired/intro-to-beautiful-soup.html:609: - - http://bioguide.congress.gov/biosearch/biosearch.asp is not an HTTPS link - -* At _site/en/lessons/retired/intro-to-the-zotero-api.html:87: - - 'a' tag is missing a reference - -* At _site/en/lessons/retired/intro-to-the-zotero-api.html:106: - - 'a' tag is missing a reference - -* At _site/en/lessons/retired/intro-to-the-zotero-api.html:143: - - 'a' tag is missing a reference - -* At _site/en/lessons/retired/intro-to-the-zotero-api.html:169: - - 'a' tag is missing a reference - -* At _site/en/lessons/retired/intro-to-the-zotero-api.html:449: - - http://zotero.org is not an HTTPS link - -* At _site/en/lessons/scalable-reading-of-structured-data.html:125: - - 'a' tag is missing a reference - -* At _site/en/lessons/scalable-reading-of-structured-data.html:144: - - 'a' tag is missing a reference - -* At _site/en/lessons/scalable-reading-of-structured-data.html:181: - - 'a' tag is missing a reference - -* At _site/en/lessons/scalable-reading-of-structured-data.html:207: - - 'a' tag is missing a reference - -* At _site/en/lessons/sentiment-analysis-syuzhet.html:119: - - 'a' tag is missing a reference - -* At _site/en/lessons/sentiment-analysis-syuzhet.html:138: - - 'a' tag is missing a reference - -* At _site/en/lessons/sentiment-analysis-syuzhet.html:175: - - 'a' tag is missing a reference - -* At _site/en/lessons/sentiment-analysis-syuzhet.html:201: - - 'a' tag is missing a reference - -* At _site/en/lessons/sentiment-analysis.html:120: - - 'a' tag is missing a reference - -* At _site/en/lessons/sentiment-analysis.html:139: - - 'a' tag is missing a reference - -* At _site/en/lessons/sentiment-analysis.html:176: - - 'a' tag is missing a reference - -* At _site/en/lessons/sentiment-analysis.html:202: - - 'a' tag is missing a reference - -* At _site/en/lessons/sentiment-analysis.html:506: - - http://www.nltk.org/ is not an HTTPS link - -* At _site/en/lessons/sentiment-analysis.html:549: - - http://journals.sagepub.com/doi/abs/10.1177/1749975514542486 is not an HTTPS link - -* At _site/en/lessons/sentiment-analysis.html:549: - - http://www.emeraldinsight.com/doi/abs/10.1108/S0733-558X%282014%290000040001 is not an HTTPS link - -* At _site/en/lessons/sentiment-analysis.html:567: - - http://www.nltk.org/install.html is not an HTTPS link - -* At _site/en/lessons/sentiment-analysis.html:579: - - http://www.nltk.org/_modules/nltk/sentiment/vader.html is not an HTTPS link - -* At _site/en/lessons/sentiment-analysis.html:602: - - http://www.nltk.org/_modules/nltk/sentiment/vader.html is not an HTTPS link - -* At _site/en/lessons/shiny-leaflet-newspaper-map-tutorial.html:117: - - 'a' tag is missing a reference - -* At _site/en/lessons/shiny-leaflet-newspaper-map-tutorial.html:136: - - 'a' tag is missing a reference - -* At _site/en/lessons/shiny-leaflet-newspaper-map-tutorial.html:173: - - 'a' tag is missing a reference - -* At _site/en/lessons/shiny-leaflet-newspaper-map-tutorial.html:199: - - 'a' tag is missing a reference - -* At _site/en/lessons/simulating-historical-communication-networks-python.html:123: - - 'a' tag is missing a reference - -* At _site/en/lessons/simulating-historical-communication-networks-python.html:142: - - 'a' tag is missing a reference - -* At _site/en/lessons/simulating-historical-communication-networks-python.html:179: - - 'a' tag is missing a reference - -* At _site/en/lessons/simulating-historical-communication-networks-python.html:205: - - 'a' tag is missing a reference - -* At _site/en/lessons/simulating-historical-communication-networks-python.html:1500: - - http://arxiv.org/abs/2112.04336 is not an HTTPS link - -* At _site/en/lessons/sonification.html:117: - - 'a' tag is missing a reference - -* At _site/en/lessons/sonification.html:136: - - 'a' tag is missing a reference - -* At _site/en/lessons/sonification.html:173: - - 'a' tag is missing a reference - -* At _site/en/lessons/sonification.html:199: - - 'a' tag is missing a reference - -* At _site/en/lessons/sonification.html:517: - - http://web.archive.org/web/20190203083307/http://www.digitalhumanities.org/dhq/vol/5/1/000091/000091.html is not an HTTPS link - -* At _site/en/lessons/sonification.html:521: - - http://blog.taracopplestone.co.uk/making-things-photobashing-as-archaeological-remediation/ is not an HTTPS link - -* At _site/en/lessons/sonification.html:521: - - http://www.samplereality.com/2012/05/02/notes-towards-a-deformed-humanities/ is not an HTTPS link - -* At _site/en/lessons/sonification.html:521: - - http://nowviskie.org/2013/resistance-in-the-materials/ is not an HTTPS link - -* At _site/en/lessons/sonification.html:521: - - http://nooart.org/post/73353953758/temkin-glitchhumancomputerinteraction is not an HTTPS link - -* At _site/en/lessons/sonification.html:533: - - http://musicalgorithms.org/ is not an HTTPS link - -* At _site/en/lessons/sonification.html:535: - - http://sonic-pi.net/ is not an HTTPS link - -* At _site/en/lessons/sonification.html:548: - - http://www.icad.org/Proceedings/2008/Hermann2008.pdf is not an HTTPS link - -* At _site/en/lessons/sonification.html:559: - - http://waxy.org/2015/12/if_drake_was_born_a_piano/ is not an HTTPS link - -* At _site/en/lessons/sonification.html:571: - - http://musicalgorithms.org/ is not an HTTPS link - -* At _site/en/lessons/sonification.html:571: - - http://musicalgorithms.org/3.0/index.html is not an HTTPS link - -* At _site/en/lessons/sonification.html:625: - - http://www.lynda.com/GarageBand-tutorials/Importing-audio-tracks/156620/164050-4.html is not an HTTPS link - -* At _site/en/lessons/sonification.html:663: - - http://musicalgorithms.org/3.0/index.html is not an HTTPS link - -* At _site/en/lessons/sonification.html:706: - - http://www.ethanhein.com/wp/2010/scales-and-emotions/ is not an HTTPS link - -* At _site/en/lessons/sonification.html:771: - - http://docs.python-guide.org/en/latest/starting/install/win/ is not an HTTPS link - -* At _site/en/lessons/sonification.html:775: - - http://www.howtogeek.com/235101/10-ways-to-open-the-command-prompt-in-windows-10/ is not an HTTPS link - -* At _site/en/lessons/sonification.html:836: - - http://abcnotation.com/wiki/abc:standard:v2.1 is not an HTTPS link - -* At _site/en/lessons/sonification.html:836: - - http://trillian.mit.edu/~jc/music/abc/ABCcontrib.html is not an HTTPS link - -* At _site/en/lessons/sonification.html:840: - - http://themacroscope.org is not an HTTPS link - -* At _site/en/lessons/sonification.html:991: - - http://sonic-pi.net is not an HTTPS link - -* At _site/en/lessons/sonification.html:997: - - http://puffin.creighton.edu/jesuit/relations/ is not an HTTPS link - -* At _site/en/lessons/sonification.html:1057: - - http://library.gwu.edu/scholarly-technology-group/posts/sound-library-work is not an HTTPS link - -* At _site/en/lessons/sonification.html:1060: - - http://www.lilypond.org/ is not an HTTPS link - -* At _site/en/lessons/sonification.html:1064: - - http://www.trevorowens.org/2012/11/discovery-and-justification-are-different-notes-on-sciencing-the-humanities/ is not an HTTPS link - -* At _site/en/lessons/sonification.html:1069: - - 'a' tag is missing a reference - -* At _site/en/lessons/sonification.html:1070: - - 'a' tag is missing a reference - -* At _site/en/lessons/sonification.html:1071: - - 'a' tag is missing a reference - -* At _site/en/lessons/sonification.html:1072: - - 'a' tag is missing a reference - -* At _site/en/lessons/sonification.html:1073: - - 'a' tag is missing a reference - -* At _site/en/lessons/sonification.html:1074: - - 'a' tag is missing a reference - -* At _site/en/lessons/sonification.html:1075: - - 'a' tag is missing a reference - -* At _site/en/lessons/sonification.html:1079: - - 'a' tag is missing a reference - -* At _site/en/lessons/sonification.html:1079: - - http://waxy.org/2015/12/if_drake_was_born_a_piano/ is not an HTTPS link - -* At _site/en/lessons/sonification.html:1081: - - 'a' tag is missing a reference - -* At _site/en/lessons/sonification.html:1081: - - http://web.archive.org/web/20190203083307/http://www.digitalhumanities.org/dhq/vol/5/1/000091/000091.html is not an HTTPS link - -* At _site/en/lessons/sonification.html:1083: - - 'a' tag is missing a reference - -* At _site/en/lessons/sonification.html:1083: - - http://www.jstor.org/stable/734136 is not an HTTPS link - -* At _site/en/lessons/sonification.html:1085: - - 'a' tag is missing a reference - -* At _site/en/lessons/sonification.html:1085: - - http://www.icad.org/Proceedings/2008/Hermann2008.pdf is not an HTTPS link - -* At _site/en/lessons/sonification.html:1087: - - 'a' tag is missing a reference - -* At _site/en/lessons/sonification.html:1089: - - 'a' tag is missing a reference - -* At _site/en/lessons/space-place-gazetteers.html:119: - - 'a' tag is missing a reference - -* At _site/en/lessons/space-place-gazetteers.html:138: - - 'a' tag is missing a reference - -* At _site/en/lessons/space-place-gazetteers.html:175: - - 'a' tag is missing a reference - -* At _site/en/lessons/space-place-gazetteers.html:201: - - 'a' tag is missing a reference - -* At _site/en/lessons/space-place-gazetteers.html:547: - - http://bombsight.org/#17/51.50595/-0.10680 is not an HTTPS link - -* At _site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html:119: - - 'a' tag is missing a reference - -* At _site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html:138: - - 'a' tag is missing a reference - -* At _site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html:175: - - 'a' tag is missing a reference - -* At _site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html:201: - - 'a' tag is missing a reference - -* At _site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html:588: - - http://notepad-plus-plus.org is not an HTTPS link - -* At _site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html:892: - - http://daringfireball.net/projects/markdown/dingus is not an HTTPS link - -* At _site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html:1145: - - http://editor.citationstyles.org/about/ is not an HTTPS link - -* At _site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html:1207: - - http://stackoverflow.com/questions/tagged/pandoc is not an HTTPS link - -* At _site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html:1208: - - http://web.archive.org/web/20190203062832/http://digitalhumanities.org/answers/ is not an HTTPS link - -* At _site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html:1214: - - http://web.archive.org/web/20140120195538/http://mashable.com/2013/06/24/markdown-tools/ is not an HTTPS link - -* At _site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html:1218: - - http://mouapp.com/ is not an HTTPS link - -* At _site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html:1218: - - http://writemonkey.com is not an HTTPS link - -* At _site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html:1219: - - http://www.sublimetext.com/ is not an HTTPS link - -* At _site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html:1222: - - http://prose.io is not an HTTPS link - -* At _site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html:1223: - - http://www.authorea.com is not an HTTPS link - -* At _site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html:1224: - - http://www.draftin.com is not an HTTPS link - -* At _site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html:1227: - - http://gitit.net/ is not an HTTPS link - -* At _site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html:1232: - - http://github.com/fauno/jekyll-pandoc-multiple-formats is not an HTTPS link - -* At _site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html:1233: - - http://jaspervdj.be/hakyll/ is not an HTTPS link - -* At _site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html:1242: - - http://readthedocs.org is not an HTTPS link - -* At _site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html:1254: - - http://www.antipope.org/charlie/blog-static/2013/10/why-microsoft-word-must-die.html is not an HTTPS link - -* At _site/en/lessons/temporal-network-analysis-with-r.html:121: - - 'a' tag is missing a reference - -* At _site/en/lessons/temporal-network-analysis-with-r.html:140: - - 'a' tag is missing a reference - -* At _site/en/lessons/temporal-network-analysis-with-r.html:177: - - 'a' tag is missing a reference - -* At _site/en/lessons/temporal-network-analysis-with-r.html:203: - - 'a' tag is missing a reference - -* At _site/en/lessons/temporal-network-analysis-with-r.html:1193: - - http://www.epimodel.org/ is not an HTTPS link - -* At _site/en/lessons/text-mining-with-extracted-features.html:136: - - 'a' tag is missing a reference - -* At _site/en/lessons/text-mining-with-extracted-features.html:155: - - 'a' tag is missing a reference - -* At _site/en/lessons/text-mining-with-extracted-features.html:192: - - 'a' tag is missing a reference - -* At _site/en/lessons/text-mining-with-extracted-features.html:218: - - 'a' tag is missing a reference - -* At _site/en/lessons/text-mining-with-extracted-features.html:551: - - http://mimno.infosci.cornell.edu/wordsim/nearest.html is not an HTTPS link - -* At _site/en/lessons/text-mining-with-extracted-features.html:630: - - http://stackoverflow.com/a/19350234/233577 is not an HTTPS link - -* At _site/en/lessons/text-mining-with-extracted-features.html:630: - - http://pandas.pydata.org/pandas-docs/version/0.15.2/install.html#recommended-dependencies is not an HTTPS link - -* At _site/en/lessons/text-mining-with-extracted-features.html:930: - - http://hdl.handle.net/2027/nyp.33433074811310 is not an HTTPS link - -* At _site/en/lessons/text-mining-with-extracted-features.html:1055: - - http://htrc.github.io/htrc-feature-reader/htrc_features/feature_reader.m.html#htrc_features.feature_reader.Volume.tokenlist is not an HTTPS link - -* At _site/en/lessons/text-mining-with-extracted-features.html:1671: - - http://pandas.pydata.org/pandas-docs/stable/groupby.html is not an HTTPS link - -* At _site/en/lessons/text-mining-with-extracted-features.html:1800: - - http://htrc.github.io/htrc-feature-reader/htrc_features/feature_reader.m.html is not an HTTPS link - -* At _site/en/lessons/text-mining-with-extracted-features.html:1860: - - http://data.analytics.hathitrust.org/genre/fiction_paths.txt is not an HTTPS link - -* At _site/en/lessons/text-mining-with-extracted-features.html:1860: - - http://data.analytics.hathitrust.org/genre/drama_paths.txt is not an HTTPS link - -* At _site/en/lessons/text-mining-with-extracted-features.html:1860: - - http://data.analytics.hathitrust.org/genre/poetry_paths.txt is not an HTTPS link - -* At _site/en/lessons/text-mining-youtube-comments.html:121: - - 'a' tag is missing a reference - -* At _site/en/lessons/text-mining-youtube-comments.html:140: - - 'a' tag is missing a reference - -* At _site/en/lessons/text-mining-youtube-comments.html:177: - - 'a' tag is missing a reference - -* At _site/en/lessons/text-mining-youtube-comments.html:203: - - 'a' tag is missing a reference - -* At _site/en/lessons/text-mining-youtube-comments.html:876: - - http://www.Wordfish.org/software.html is not an HTTPS link - -* At _site/en/lessons/text-mining-youtube-comments.html:876: - - http://www.wordfish.org/ is not an HTTPS link - -* At _site/en/lessons/text-mining-youtube-comments.html:906: - - http://www.wordfish.org/ is not an HTTPS link - -* At _site/en/lessons/text-mining-youtube-comments.html:906: - - http://www.Wordfish.org/software.html is not an HTTPS link - -* At _site/en/lessons/topic-modeling-and-mallet.html:121: - - 'a' tag is missing a reference - -* At _site/en/lessons/topic-modeling-and-mallet.html:140: - - 'a' tag is missing a reference - -* At _site/en/lessons/topic-modeling-and-mallet.html:177: - - 'a' tag is missing a reference - -* At _site/en/lessons/topic-modeling-and-mallet.html:203: - - 'a' tag is missing a reference - -* At _site/en/lessons/topic-modeling-and-mallet.html:529: - - http://mallet.cs.umass.edu/mailinglist.php is not an HTTPS link - -* At _site/en/lessons/topic-modeling-and-mallet.html:542: - - http://www.cs.umbc.edu/~hillol/NGDM07/abstracts/talks/MKirschenbaum.pdf is not an HTTPS link - -* At _site/en/lessons/topic-modeling-and-mallet.html:545: - - http://www.worldcat.org/title/reading-machines-toward-an-algorithmic-criticism/oclc/708761605&referer=brief_results is not an HTTPS link - -* At _site/en/lessons/topic-modeling-and-mallet.html:550: - - http://voyant-tools.org is not an HTTPS link - -* At _site/en/lessons/topic-modeling-and-mallet.html:597: - - http://arxiv.org/abs/1003.6087/ is not an HTTPS link - -* At _site/en/lessons/topic-modeling-and-mallet.html:606: - - http://en.wikipedia.org/wiki/Latent_Dirichlet_allocation is not an HTTPS link - -* At _site/en/lessons/topic-modeling-and-mallet.html:616: - - http://dsl.richmond.edu/dispatch/ is not an HTTPS link - -* At _site/en/lessons/topic-modeling-and-mallet.html:629: - - http://mallet.cs.umass.edu/index.php is not an HTTPS link - -* At _site/en/lessons/topic-modeling-and-mallet.html:630: - - http://en.wikipedia.org/wiki/Gibbs_sampling is not an HTTPS link - -* At _site/en/lessons/topic-modeling-and-mallet.html:657: - - http://mallet.cs.umass.edu/index.php is not an HTTPS link - -* At _site/en/lessons/topic-modeling-and-mallet.html:657: - - http://mallet.cs.umass.edu/download.php is not an HTTPS link - -* At _site/en/lessons/topic-modeling-and-mallet.html:658: - - http://www.oracle.com/technetwork/java/javase/downloads/index.html is not an HTTPS link - -* At _site/en/lessons/topic-modeling-and-mallet.html:836: - - http://mallet.cs.umass.edu/download.php is not an HTTPS link - -* At _site/en/lessons/topic-modeling-and-mallet.html:837: - - http://www.oracle.com/technetwork/java/javase/downloads/index.html is not an HTTPS link - -* At _site/en/lessons/topic-modeling-and-mallet.html:1177: - - http://dsl.richmond.edu/dispatch/ is not an HTTPS link - -* At _site/en/lessons/topic-modeling-and-mallet.html:1205: - - http://electricarchaeology.ca/2012/07/09/mining-a-day-of-archaeology/ is not an HTTPS link - -* At _site/en/lessons/topic-modeling-and-mallet.html:1211: - - http://electricarchaeology.ca/2012/06/08/mining-the-open-web-with-looted-heritage-draft/ is not an HTTPS link - -* At _site/en/lessons/topic-modeling-and-mallet.html:1222: - - http://tedunderwood.wordpress.com/2012/04/07/topic-modeling-made-just-simple-enough/ is not an HTTPS link - -* At _site/en/lessons/topic-modeling-and-mallet.html:1225: - - http://web.archive.org/web/20160704150726/http://www.lisarhody.com:80/some-assembly-required/ is not an HTTPS link - -* At _site/en/lessons/topic-modeling-and-mallet.html:1228: - - http://dl.acm.org/citation.cfm?id=944937 is not an HTTPS link - -* At _site/en/lessons/topic-modeling-and-mallet.html:1230: - - http://mimno.infosci.cornell.edu/topics.html is not an HTTPS link - -* At _site/en/lessons/topic-modeling-and-mallet.html:1233: - - http://www.perseus.tufts.edu/publications/02-jocch-mimno.pdf is not an HTTPS link - -* At _site/en/lessons/transcribing-handwritten-text-with-python-and-azure.html:117: - - 'a' tag is missing a reference - -* At _site/en/lessons/transcribing-handwritten-text-with-python-and-azure.html:136: - - 'a' tag is missing a reference - -* At _site/en/lessons/transcribing-handwritten-text-with-python-and-azure.html:173: - - 'a' tag is missing a reference - -* At _site/en/lessons/transcribing-handwritten-text-with-python-and-azure.html:199: - - 'a' tag is missing a reference - -* At _site/en/lessons/transforming-xml-with-xsl.html:117: - - 'a' tag is missing a reference - -* At _site/en/lessons/transforming-xml-with-xsl.html:136: - - 'a' tag is missing a reference - -* At _site/en/lessons/transforming-xml-with-xsl.html:173: - - 'a' tag is missing a reference - -* At _site/en/lessons/transforming-xml-with-xsl.html:199: - - 'a' tag is missing a reference - -* At _site/en/lessons/transforming-xml-with-xsl.html:728: - - http://scissors-and-paste.net is not an HTTPS link - -* At _site/en/lessons/transforming-xml-with-xsl.html:823: - - http://www.w3.org/ is not an HTTPS link - -* At _site/en/lessons/transliterating.html:117: - - 'a' tag is missing a reference - -* At _site/en/lessons/transliterating.html:136: - - 'a' tag is missing a reference - -* At _site/en/lessons/transliterating.html:173: - - 'a' tag is missing a reference - -* At _site/en/lessons/transliterating.html:199: - - 'a' tag is missing a reference - -* At _site/en/lessons/transliterating.html:476: - - http://en.wikipedia.org/wiki/Ascii is not an HTTPS link - -* At _site/en/lessons/transliterating.html:481: - - http://lists.memo.ru is not an HTTPS link - -* At _site/en/lessons/transliterating.html:481: - - http://en.wikipedia.org/wiki/Cyrillic_script is not an HTTPS link - -* At _site/en/lessons/transliterating.html:481: - - http://en.wikipedia.org/wiki/Latin_script is not an HTTPS link - -* At _site/en/lessons/transliterating.html:483: - - http://en.wikipedia.org/wiki/Unicode is not an HTTPS link - -* At _site/en/lessons/transliterating.html:507: - - http://en.wikipedia.org/wiki/ALA-LC_romanization_for_Russian is not an HTTPS link - -* At _site/en/lessons/transliterating.html:511: - - http://www.crummy.com/software/BeautifulSoup/ is not an HTTPS link - -* At _site/en/lessons/transliterating.html:519: - - http://en.wikipedia.org/wiki/Glasnost is not an HTTPS link - -* At _site/en/lessons/transliterating.html:522: - - http://lists.memo.ru is not an HTTPS link - -* At _site/en/lessons/transliterating.html:530: - - http://lists.memo.ru/d1/f1.htm is not an HTTPS link - -* At _site/en/lessons/transliterating.html:573: - - http://www.unicode.org/standard/WhatIsUnicode.html is not an HTTPS link - -* At _site/en/lessons/transliterating.html:592: - - http://en.wikipedia.org/wiki/Comma-separated_values is not an HTTPS link - -* At _site/en/lessons/transliterating.html:657: - - http://web.archive.org/web/20170312041508/http://www.lcweb.loc.gov/catdir/cpso/romanization/russian.pdf is not an HTTPS link - -* At _site/en/lessons/transliterating.html:664: - - http://en.wikipedia.org/wiki/Cyrillic_script_in_Unicode is not an HTTPS link - -* At _site/en/lessons/transliterating.html:665: - - http://www.unicode.org/charts/ is not an HTTPS link - -* At _site/en/lessons/transliterating.html:837: - - http://www.w3schools.com/css/ is not an HTTPS link - -* At _site/en/lessons/understanding-creating-word-embeddings.html:121: - - 'a' tag is missing a reference - -* At _site/en/lessons/understanding-creating-word-embeddings.html:140: - - 'a' tag is missing a reference - -* At _site/en/lessons/understanding-creating-word-embeddings.html:177: - - 'a' tag is missing a reference - -* At _site/en/lessons/understanding-creating-word-embeddings.html:203: - - 'a' tag is missing a reference - -* At _site/en/lessons/understanding-regular-expressions.html:117: - - 'a' tag is missing a reference - -* At _site/en/lessons/understanding-regular-expressions.html:136: - - 'a' tag is missing a reference - -* At _site/en/lessons/understanding-regular-expressions.html:173: - - 'a' tag is missing a reference - -* At _site/en/lessons/understanding-regular-expressions.html:199: - - 'a' tag is missing a reference - -* At _site/en/lessons/understanding-regular-expressions.html:579: - - http://www.libreoffice.org/download is not an HTTPS link - -* At _site/en/lessons/understanding-regular-expressions.html:626: - - http://archive.org/details/jstor-4560629/ is not an HTTPS link - -* At _site/en/lessons/understanding-regular-expressions.html:628: - - http://archive.org/stream/jstor-4560629/4560629#page/n0/mode/2up is not an HTTPS link - -* At _site/en/lessons/understanding-regular-expressions.html:646: - - http://archive.org/stream/jstor-4560629/4560629_djvu.txt is not an HTTPS link - -* At _site/en/lessons/understanding-regular-expressions.html:1412: - - http://en.wikipedia.org/wiki/Regular_expressions is not an HTTPS link - -* At _site/en/lessons/understanding-regular-expressions.html:1425: - - http://rubular.com/ is not an HTTPS link - -* At _site/en/lessons/understanding-regular-expressions.html:1430: - - http://dh.obdurodon.org/regex.html is not an HTTPS link - -* At _site/en/lessons/up-and-running-with-omeka.html:117: - - 'a' tag is missing a reference - -* At _site/en/lessons/up-and-running-with-omeka.html:136: - - 'a' tag is missing a reference - -* At _site/en/lessons/up-and-running-with-omeka.html:173: - - 'a' tag is missing a reference - -* At _site/en/lessons/up-and-running-with-omeka.html:199: - - 'a' tag is missing a reference - -* At _site/en/lessons/up-and-running-with-omeka.html:507: - - http://www.omeka.net is not an HTTPS link - -* At _site/en/lessons/up-and-running-with-omeka.html:527: - - http://www.omeka.net is not an HTTPS link - -* At _site/en/lessons/up-and-running-with-omeka.html:906: - - http://info.omeka.net is not an HTTPS link - -* At _site/en/lessons/urban-demographic-data-r-ggplot2.html:119: - - 'a' tag is missing a reference - -* At _site/en/lessons/urban-demographic-data-r-ggplot2.html:138: - - 'a' tag is missing a reference - -* At _site/en/lessons/urban-demographic-data-r-ggplot2.html:175: - - 'a' tag is missing a reference - -* At _site/en/lessons/urban-demographic-data-r-ggplot2.html:201: - - 'a' tag is missing a reference - -* At _site/en/lessons/urban-demographic-data-r-ggplot2.html:488: - - http://ggplot2.tidyverse.org is not an HTTPS link - -* At _site/en/lessons/urban-demographic-data-r-ggplot2.html:530: - - http://www.ggplot2-exts.org/ is not an HTTPS link - -* At _site/en/lessons/urban-demographic-data-r-ggplot2.html:946: - - http://colorbrewer2.org is not an HTTPS link - -* At _site/en/lessons/urban-demographic-data-r-ggplot2.html:1149: - - http://www.ggplot2-exts.org/ is not an HTTPS link - -* At _site/en/lessons/urban-demographic-data-r-ggplot2.html:1200: - - http://r4ds.hadley.nz/ is not an HTTPS link - -* At _site/en/lessons/urban-demographic-data-r-ggplot2.html:1218: - - http://www.ggplot2-exts.org/gallery/ is not an HTTPS link - -* At _site/en/lessons/urban-demographic-data-r-ggplot2.html:1227: - - http://www.cookbook-r.com/Graphs/ is not an HTTPS link - -* At _site/en/lessons/urban-demographic-data-r-ggplot2.html:1227: - - http://shop.oreilly.com/product/0636920023135.do is not an HTTPS link - -* At _site/en/lessons/using-javascript-to-create-maps.html:123: - - 'a' tag is missing a reference - -* At _site/en/lessons/using-javascript-to-create-maps.html:142: - - 'a' tag is missing a reference - -* At _site/en/lessons/using-javascript-to-create-maps.html:179: - - 'a' tag is missing a reference - -* At _site/en/lessons/using-javascript-to-create-maps.html:205: - - 'a' tag is missing a reference - -* At _site/en/lessons/using-javascript-to-create-maps.html:496: - - http://wcm1.web.rice.edu/mining-bpl-antislavery.html is not an HTTPS link - -* At _site/en/lessons/using-javascript-to-create-maps.html:496: - - http://leafletjs.com/ is not an HTTPS link - -* At _site/en/lessons/using-javascript-to-create-maps.html:498: - - http://postgis.net/ is not an HTTPS link - -* At _site/en/lessons/using-javascript-to-create-maps.html:498: - - http://postgis.net/ is not an HTTPS link - -* At _site/en/lessons/using-javascript-to-create-maps.html:498: - - http://dp.la is not an HTTPS link - -* At _site/en/lessons/using-javascript-to-create-maps.html:502: - - http://hdlab.stanford.edu/palladio/ is not an HTTPS link - -* At _site/en/lessons/using-javascript-to-create-maps.html:518: - - http://leafletjs.com/ is not an HTTPS link - -* At _site/en/lessons/using-javascript-to-create-maps.html:518: - - http://jqueryui.com/ is not an HTTPS link - -* At _site/en/lessons/using-javascript-to-create-maps.html:534: - - http://zotero.org is not an HTTPS link - -* At _site/en/lessons/using-javascript-to-create-maps.html:558: - - http://www.gpsvisualizer.com/geocoder/ is not an HTTPS link - -* At _site/en/lessons/using-javascript-to-create-maps.html:633: - - http:///www.mapbox.com is not an HTTPS link - -* At _site/en/lessons/using-javascript-to-create-maps.html:744: - - http://stackoverflow.com/questions/16151018/npm-throws-error-without-sudo/24404451#24404451 is not an HTTPS link - -* At _site/en/lessons/vector-layers-qgis.html:121: - - 'a' tag is missing a reference - -* At _site/en/lessons/vector-layers-qgis.html:140: - - 'a' tag is missing a reference - -* At _site/en/lessons/vector-layers-qgis.html:177: - - 'a' tag is missing a reference - -* At _site/en/lessons/vector-layers-qgis.html:203: - - 'a' tag is missing a reference - -* At _site/en/lessons/vector-layers-qgis.html:683: - - http://www.lib.uwaterloo.ca/locations/umd/digital/clump_classes.html is not an HTTPS link - -* At _site/en/lessons/vector-layers-qgis.html:878: - - http://en.wikipedia.org/wiki/Prince_Royalty,_Prince_Edward_Island is not an HTTPS link - -* At _site/en/lessons/vector-layers-qgis.html:1233: - - http://geospatialhistorian.wordpress.com/ is not an HTTPS link - -* At _site/en/lessons/viewing-html-files.html:119: - - 'a' tag is missing a reference - -* At _site/en/lessons/viewing-html-files.html:138: - - 'a' tag is missing a reference - -* At _site/en/lessons/viewing-html-files.html:175: - - 'a' tag is missing a reference - -* At _site/en/lessons/viewing-html-files.html:201: - - 'a' tag is missing a reference - -* At _site/en/lessons/viewing-html-files.html:557: - - http://www.w3schools.com/html/default.asp is not an HTTPS link - -* At _site/en/lessons/viewing-html-files.html:634: - - http://www.w3schools.com/html/default.asp is not an HTTPS link - -* At _site/en/lessons/viewing-html-files.html:635: - - http://www.w3schools.com/html/html5_intro.asp is not an HTTPS link - -* At _site/en/lessons/visualizing-with-bokeh.html:117: - - 'a' tag is missing a reference - -* At _site/en/lessons/visualizing-with-bokeh.html:136: - - 'a' tag is missing a reference - -* At _site/en/lessons/visualizing-with-bokeh.html:173: - - 'a' tag is missing a reference - -* At _site/en/lessons/visualizing-with-bokeh.html:199: - - 'a' tag is missing a reference - -* At _site/en/lessons/visualizing-with-bokeh.html:626: - - http://jupyter.org is not an HTTPS link - -* At _site/en/lessons/visualizing-with-bokeh.html:1101: - - http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases is not an HTTPS link - -* At _site/en/lessons/visualizing-with-bokeh.html:1190: - - http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases is not an HTTPS link - -* At _site/en/lessons/windows-installation.html:119: - - 'a' tag is missing a reference - -* At _site/en/lessons/windows-installation.html:138: - - 'a' tag is missing a reference - -* At _site/en/lessons/windows-installation.html:175: - - 'a' tag is missing a reference - -* At _site/en/lessons/windows-installation.html:201: - - 'a' tag is missing a reference - -* At _site/en/lessons/windows-installation.html:508: - - http://www.python.org/ is not an HTTPS link - -* At _site/en/lessons/windows-installation.html:521: - - http://wiki.python.org/moin/PythonEditors/ is not an HTTPS link - -* At _site/en/lessons/windows-installation.html:579: - - http://en.wikipedia.org/wiki/UTF-8 is not an HTTPS link - -* At _site/en/lessons/working-with-batches-of-pdf-files.html:117: - - 'a' tag is missing a reference - -* At _site/en/lessons/working-with-batches-of-pdf-files.html:136: - - 'a' tag is missing a reference - -* At _site/en/lessons/working-with-batches-of-pdf-files.html:173: - - 'a' tag is missing a reference - -* At _site/en/lessons/working-with-batches-of-pdf-files.html:199: - - 'a' tag is missing a reference - -* At _site/en/lessons/working-with-text-files.html:119: - - 'a' tag is missing a reference - -* At _site/en/lessons/working-with-text-files.html:138: - - 'a' tag is missing a reference - -* At _site/en/lessons/working-with-text-files.html:175: - - 'a' tag is missing a reference - -* At _site/en/lessons/working-with-text-files.html:201: - - 'a' tag is missing a reference - -* At _site/en/lessons/working-with-text-files.html:622: - - http://docs.python.org/release/2.5.4/ref/keywords.html is not an HTTPS link - -* At _site/en/lessons/working-with-text-files.html:734: - - http://en.wikibooks.org/wiki/Non-Programmer%27s_Tutorial_for_Python_2.6/Hello,_World is not an HTTPS link - -* At _site/en/lessons/working-with-web-pages.html:119: - - 'a' tag is missing a reference - -* At _site/en/lessons/working-with-web-pages.html:138: - - 'a' tag is missing a reference - -* At _site/en/lessons/working-with-web-pages.html:175: - - 'a' tag is missing a reference - -* At _site/en/lessons/working-with-web-pages.html:201: - - 'a' tag is missing a reference - -* At _site/en/lessons/working-with-web-pages.html:545: - - http://www.oldbaileyonline.org/ is not an HTTPS link - -* At _site/en/lessons/working-with-web-pages.html:612: - - http://en.wikipedia.org/wiki/Gordon_Riots is not an HTTPS link - -* At _site/en/lessons/working-with-web-pages.html:645: - - http://www.oldbaileyonline.org/browse.jsp?foo=bar&path=sessionsPapers/17800628.xml&div=t17800628-33&xml=yes is not an HTTPS link - -* At _site/en/lessons/working-with-web-pages.html:647: - - http://www.oldbaileyonline.org/images.jsp?doc=178006280084 is not an HTTPS link - -* At _site/en/privacy-policy.html:86: - - 'a' tag is missing a reference - -* At _site/en/privacy-policy.html:105: - - 'a' tag is missing a reference - -* At _site/en/privacy-policy.html:142: - - 'a' tag is missing a reference - -* At _site/en/privacy-policy.html:168: - - 'a' tag is missing a reference - -* At _site/en/project-team.html:86: - - 'a' tag is missing a reference - -* At _site/en/project-team.html:105: - - 'a' tag is missing a reference - -* At _site/en/project-team.html:142: - - 'a' tag is missing a reference - -* At _site/en/project-team.html:168: - - 'a' tag is missing a reference - -* At _site/en/project-team.html:308: - - http://twitter.com/maxcarlons is not an HTTPS link - -* At _site/en/project-team.html:311: - - http://github.com/carlonim is not an HTTPS link - -* At _site/en/project-team.html:412: - - http://github.com/lachapot is not an HTTPS link - -* At _site/en/project-team.html:510: - - http://twitter.com/cosovschi is not an HTTPS link - -* At _site/en/project-team.html:513: - - http://github.com/digitalkosovski is not an HTTPS link - -* At _site/en/project-team.html:616: - - http://github.com/caiocmello is not an HTTPS link - -* At _site/en/project-team.html:1180: - - http://github.com/semanticnoodles is not an HTTPS link - -* At _site/en/project-team.html:1276: - - http://twitter.com/nabsiddiqui is not an HTTPS link - -* At _site/en/project-team.html:1279: - - http://github.com/nabsiddiqui is not an HTTPS link - -* At _site/en/project-team.html:1629: - - http://twitter.com/giulia_taurino is not an HTTPS link - -* At _site/en/project-team.html:1632: - - http://github.com/giuliataurino is not an HTTPS link - -* At _site/en/project-team.html:1802: - - http://www.alexwermercolan.com/ is not an HTTPS link - -* At _site/en/project-team.html:1808: - - http://twitter.com/alexwermercolan is not an HTTPS link - -* At _site/en/project-team.html:1811: - - http://github.com/hawc2 is not an HTTPS link - -* At _site/en/project-team.html:2057: - - http://www.mariajoseafanador.com is not an HTTPS link - -* At _site/en/project-team.html:2063: - - http://twitter.com/mariajoafana is not an HTTPS link - -* At _site/en/project-team.html:2066: - - http://github.com/mariajoafana is not an HTTPS link - -* At _site/en/project-team.html:2532: - - http://twitter.com/IsaGribomont is not an HTTPS link - -* At _site/en/project-team.html:2535: - - http://github.com/isag91 is not an HTTPS link - -* At _site/en/project-team.html:2743: - - http://twitter.com/espejolento is not an HTTPS link - -* At _site/en/project-team.html:2746: - - http://github.com/silviaegt is not an HTTPS link - -* At _site/en/project-team.html:3034: - - http://twitter.com/jenniferisve is not an HTTPS link - -* At _site/en/project-team.html:3037: - - http://github.com/jenniferisasi is not an HTTPS link - -* At _site/en/project-team.html:3359: - - http://twitter.com/enetreseles is not an HTTPS link - -* At _site/en/project-team.html:3362: - - http://github.com/nllano is not an HTTPS link - -* At _site/en/project-team.html:3566: - - http://twitter.com/jgob is not an HTTPS link - -* At _site/en/project-team.html:3569: - - http://github.com/joshuagob is not an HTTPS link - -* At _site/en/project-team.html:3861: - - http://twitter.com/rivaquiroga is not an HTTPS link - -* At _site/en/project-team.html:3864: - - http://github.com/rivaquiroga is not an HTTPS link - -* At _site/en/project-team.html:4155: - - http://github.com/nivaca is not an HTTPS link - -* At _site/en/project-team.html:4368: - - http://github.com/marie-flesch is not an HTTPS link - -* At _site/en/project-team.html:4511: - - http://github.com/matgille is not an HTTPS link - -* At _site/en/project-team.html:4744: - - http://github.com/mhersent is not an HTTPS link - -* At _site/en/project-team.html:4802: - - http://twitter.com/superHH is not an HTTPS link - -* At _site/en/project-team.html:5054: - - http://github.com/DMathelier is not an HTTPS link - -* At _site/en/project-team.html:5188: - - http://twitter.com/emilienschultz is not an HTTPS link - -* At _site/en/project-team.html:5191: - - http://github.com/emilienschultz is not an HTTPS link - -* At _site/en/project-team.html:5315: - - http://twitter.com/davvalent is not an HTTPS link - -* At _site/en/project-team.html:5318: - - http://github.com/davvalent is not an HTTPS link - -* At _site/en/project-team.html:5447: - - http://github.com/AlexandreWa is not an HTTPS link - -* At _site/en/project-team.html:5582: - - http://github.com/josircg is not an HTTPS link - -* At _site/en/project-team.html:5840: - - http://twitter.com/danielalvesfcsh is not an HTTPS link - -* At _site/en/project-team.html:5843: - - http://github.com/DanielAlvesLABDH is not an HTTPS link - -* At _site/en/project-team.html:6105: - - http://twitter.com/ericbrasiln is not an HTTPS link - -* At _site/en/project-team.html:6108: - - http://github.com/ericbrasiln is not an HTTPS link - -* At _site/en/project-team.html:6299: - - http://github.com/luisferla is not an HTTPS link - -* At _site/en/project-team.html:6541: - - http://twitter.com/jimmy_medeiros is not an HTTPS link - -* At _site/en/project-team.html:6544: - - http://github.com/JimmyMedeiros82 is not an HTTPS link - -* At _site/en/project-team.html:6779: - - http://github.com/joanacvp is not an HTTPS link - -* At _site/en/project-team.html:7025: - - http://twitter.com/araceletorres is not an HTTPS link - -* At _site/en/project-team.html:7028: - - http://github.com/aracele is not an HTTPS link - -* At _site/en/project-team.html:7284: - - http://twitter.com/j_w_baker is not an HTTPS link - -* At _site/en/project-team.html:7287: - - http://github.com/drjwbaker is not an HTTPS link - -* At _site/en/project-team.html:7719: - - http://adamcrymble.org is not an HTTPS link - -* At _site/en/project-team.html:7725: - - http://twitter.com/Adam_Crymble is not an HTTPS link - -* At _site/en/project-team.html:7728: - - http://github.com/acrymble is not an HTTPS link - -* At _site/en/project-team.html:8196: - - http://github.com/adamfarquhar is not an HTTPS link - -* At _site/en/project-team.html:8256: - - http://twitter.com/jenniferisve is not an HTTPS link - -* At _site/en/project-team.html:8259: - - http://github.com/jenniferisasi is not an HTTPS link - -* At _site/en/project-team.html:8587: - - http://twitter.com/rivaquiroga is not an HTTPS link - -* At _site/en/project-team.html:8590: - - http://github.com/rivaquiroga is not an HTTPS link - -* At _site/en/project-team.html:8876: - - http://twitter.com/amsichani is not an HTTPS link - -* At _site/en/project-team.html:8879: - - http://github.com/amsichani is not an HTTPS link - -* At _site/en/project-team.html:9219: - - http://twitter.com/AnisaHawes is not an HTTPS link - -* At _site/en/project-team.html:9222: - - http://github.com/anisa-hawes is not an HTTPS link - -* At _site/en/project-team.html:9431: - - http://github.com/charlottejmc is not an HTTPS link - -* At _site/en/research.html:86: - - 'a' tag is missing a reference - -* At _site/en/research.html:105: - - 'a' tag is missing a reference - -* At _site/en/research.html:142: - - 'a' tag is missing a reference - -* At _site/en/research.html:168: - - 'a' tag is missing a reference - -* At _site/en/research.html:253: - - http://niche-canada.org/wp-content/uploads/2013/09/programming-historian-1.pdf is not an HTTPS link - -* At _site/en/research.html:260: - - http://www.diva-portal.org/smash/record.jsf?pid=diva2%3A1508542&dswid=7551 is not an HTTPS link - -* At _site/en/research.html:265: - - http://jitp.commons.gc.cuny.edu/review-of-the-programming-historian/ is not an HTTPS link - -* At _site/en/research.html:277: - - http://www.digitalhumanities.org/dhq/vol/16/2/000585/000585.html is not an HTTPS link - -* At _site/en/research.html:283: - - http://www.tandfonline.com/doi/full/10.1080/13555502.2017.1301179 is not an HTTPS link - -* At _site/en/research.html:285: - - http://web.archive.org/web/20180713014622/http://dhcommons.org/journal/issue-1/editorial-sustainability-and-open-peer-review-programming-historian is not an HTTPS link - -* At _site/en/research.html:286: - - http://www.themacroscope.org/2.0/ is not an HTTPS link - -* At _site/en/research.html:294: - - http://doi.org/10.5281/zenodo.3813763 is not an HTTPS link - -* At _site/en/research.html:307: - - http://www.iea.usp.br/eventos/historia-digital-educacao-caminhos-cruzados is not an HTTPS link - -* At _site/en/research.html:395: - - http://ixa2.si.ehu.eus/intele/?q=webinars is not an HTTPS link - -* At _site/en/research.html:398: - - http://walshbr.com/blog/the-programming-historian-and-editorial-process-in-digital-publishing/ is not an HTTPS link - -* At _site/en/research.html:411: - - http://fredgibbs.net/assets/images/ph-poster/final-board.png is not an HTTPS link - -* At _site/en/research.html:425: - - http://niche-canada.org/2018/03/23/a-decade-of-programming-historians/ is not an HTTPS link - -* At _site/en/research.html:426: - - http://fredgibbs.net/posts/reflections-former-PH-editor is not an HTTPS link - -* At _site/en/research.html:427: - - http://clionauta.hypotheses.org/16979 is not an HTTPS link - -* At _site/en/research.html:429: - - http://humanidadesdigitales.net/blog/2017/03/17/the-programming-historian-en-espanol/ is not an HTTPS link - -* At _site/en/reviewer-guidelines.html:86: - - 'a' tag is missing a reference - -* At _site/en/reviewer-guidelines.html:105: - - 'a' tag is missing a reference - -* At _site/en/reviewer-guidelines.html:142: - - 'a' tag is missing a reference - -* At _site/en/reviewer-guidelines.html:168: - - 'a' tag is missing a reference - -* At _site/en/supporters.html:86: - - 'a' tag is missing a reference - -* At _site/en/supporters.html:105: - - 'a' tag is missing a reference - -* At _site/en/supporters.html:142: - - 'a' tag is missing a reference - -* At _site/en/supporters.html:168: - - 'a' tag is missing a reference - -* At _site/en/supporters.html:273: - - http://cdrh.unl.edu/ is not an HTTPS link - -* At _site/en/translator-guidelines.html:86: - - 'a' tag is missing a reference - -* At _site/en/translator-guidelines.html:105: - - 'a' tag is missing a reference - -* At _site/en/translator-guidelines.html:142: - - 'a' tag is missing a reference - -* At _site/en/translator-guidelines.html:168: - - 'a' tag is missing a reference - -* At _site/en/vacancies.html:86: - - 'a' tag is missing a reference - -* At _site/en/vacancies.html:105: - - 'a' tag is missing a reference - -* At _site/en/vacancies.html:142: - - 'a' tag is missing a reference - -* At _site/en/vacancies.html:168: - - 'a' tag is missing a reference - -* At _site/es/acerca-de.html:89: - - 'a' tag is missing a reference - -* At _site/es/acerca-de.html:108: - - 'a' tag is missing a reference - -* At _site/es/acerca-de.html:145: - - 'a' tag is missing a reference - -* At _site/es/acerca-de.html:182: - - 'a' tag is missing a reference - -* At _site/es/acerca-de.html:269: - - http://dhawards.org/dhawards2016/results/ is not an HTTPS link - -* At _site/es/acerca-de.html:269: - - http://dhawards.org/dhawards2017/results/ is not an HTTPS link - -* At _site/es/acerca-de.html:269: - - http://humanidadesdigitaleshispanicas.es/ is not an HTTPS link - -* At _site/es/acerca-de.html:269: - - http://dhawards.org/dhawards2022/results/ is not an HTTPS link - -* At _site/es/acerca-de.html:282: - - http://digitalhistoryhacks.blogspot.com/2008/01/programming-historian.html is not an HTTPS link - -* At _site/es/colaboradores.html:88: - - 'a' tag is missing a reference - -* At _site/es/colaboradores.html:107: - - 'a' tag is missing a reference - -* At _site/es/colaboradores.html:144: - - 'a' tag is missing a reference - -* At _site/es/colaboradores.html:181: - - 'a' tag is missing a reference - -* At _site/es/colaboradores.html:274: - - http://cdrh.unl.edu/ is not an HTTPS link - -* At _site/es/contribuciones.html:88: - - 'a' tag is missing a reference - -* At _site/es/contribuciones.html:107: - - 'a' tag is missing a reference - -* At _site/es/contribuciones.html:144: - - 'a' tag is missing a reference - -* At _site/es/contribuciones.html:181: - - 'a' tag is missing a reference - -* At _site/es/contribuciones.html:258: - - http://vocabularios.caicyt.gov.ar/portalthes/index.php?v=42 is not an HTTPS link - -* At _site/es/contribuciones.html:258: - - http://www.mecd.gob.es/planes-nacionales/dam/jcr:f20a4ba1-0ed2-445d-9be9-b8b0382562ea/mex-glosario-interpares-total0112.pdf is not an HTTPS link - -* At _site/es/contribuciones.html:303: - - http://www.worldcat.org/title/programming-historian/oclc/951537099 is not an HTTPS link - -* At _site/es/contribuciones.html:305: - - http://purdue-primo-prod.hosted.exlibrisgroup.com/primo_library/libweb/action/dlDisplay.do?vid=PURDUE&search_scope=everything&docId=PURDUE_ALMA51671812890001081&fn=permalink is not an HTTPS link - -* At _site/es/donaciones.html:88: - - 'a' tag is missing a reference - -* At _site/es/donaciones.html:107: - - 'a' tag is missing a reference - -* At _site/es/donaciones.html:144: - - 'a' tag is missing a reference - -* At _site/es/donaciones.html:181: - - 'a' tag is missing a reference - -* At _site/es/equipo-de-proyecto.html:88: - - 'a' tag is missing a reference - -* At _site/es/equipo-de-proyecto.html:107: - - 'a' tag is missing a reference - -* At _site/es/equipo-de-proyecto.html:144: - - 'a' tag is missing a reference - -* At _site/es/equipo-de-proyecto.html:181: - - 'a' tag is missing a reference - -* At _site/es/equipo-de-proyecto.html:306: - - http://twitter.com/maxcarlons is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:309: - - http://github.com/carlonim is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:410: - - http://github.com/lachapot is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:508: - - http://twitter.com/cosovschi is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:511: - - http://github.com/digitalkosovski is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:614: - - http://github.com/caiocmello is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:1178: - - http://github.com/semanticnoodles is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:1274: - - http://twitter.com/nabsiddiqui is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:1277: - - http://github.com/nabsiddiqui is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:1627: - - http://twitter.com/giulia_taurino is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:1630: - - http://github.com/giuliataurino is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:1800: - - http://www.alexwermercolan.com/ is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:1806: - - http://twitter.com/alexwermercolan is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:1809: - - http://github.com/hawc2 is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:2055: - - http://www.mariajoseafanador.com is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:2061: - - http://twitter.com/mariajoafana is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:2064: - - http://github.com/mariajoafana is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:2530: - - http://twitter.com/IsaGribomont is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:2533: - - http://github.com/isag91 is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:2741: - - http://twitter.com/espejolento is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:2744: - - http://github.com/silviaegt is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:3032: - - http://twitter.com/jenniferisve is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:3035: - - http://github.com/jenniferisasi is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:3357: - - http://twitter.com/enetreseles is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:3360: - - http://github.com/nllano is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:3564: - - http://twitter.com/jgob is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:3567: - - http://github.com/joshuagob is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:3859: - - http://twitter.com/rivaquiroga is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:3862: - - http://github.com/rivaquiroga is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:4153: - - http://github.com/nivaca is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:4366: - - http://github.com/marie-flesch is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:4509: - - http://github.com/matgille is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:4742: - - http://github.com/mhersent is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:4800: - - http://twitter.com/superHH is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:5052: - - http://github.com/DMathelier is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:5186: - - http://twitter.com/emilienschultz is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:5189: - - http://github.com/emilienschultz is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:5313: - - http://twitter.com/davvalent is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:5316: - - http://github.com/davvalent is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:5445: - - http://github.com/AlexandreWa is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:5580: - - http://github.com/josircg is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:5838: - - http://twitter.com/danielalvesfcsh is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:5841: - - http://github.com/DanielAlvesLABDH is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:6103: - - http://twitter.com/ericbrasiln is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:6106: - - http://github.com/ericbrasiln is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:6297: - - http://github.com/luisferla is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:6539: - - http://twitter.com/jimmy_medeiros is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:6542: - - http://github.com/JimmyMedeiros82 is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:6777: - - http://github.com/joanacvp is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:7023: - - http://twitter.com/araceletorres is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:7026: - - http://github.com/aracele is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:7282: - - http://twitter.com/j_w_baker is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:7285: - - http://github.com/drjwbaker is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:7717: - - http://adamcrymble.org is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:7723: - - http://twitter.com/Adam_Crymble is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:7726: - - http://github.com/acrymble is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:8194: - - http://github.com/adamfarquhar is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:8254: - - http://twitter.com/jenniferisve is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:8257: - - http://github.com/jenniferisasi is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:8585: - - http://twitter.com/rivaquiroga is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:8588: - - http://github.com/rivaquiroga is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:8874: - - http://twitter.com/amsichani is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:8877: - - http://github.com/amsichani is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:9217: - - http://twitter.com/AnisaHawes is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:9220: - - http://github.com/anisa-hawes is not an HTTPS link - -* At _site/es/equipo-de-proyecto.html:9429: - - http://github.com/charlottejmc is not an HTTPS link - -* At _site/es/eventos.html:88: - - 'a' tag is missing a reference - -* At _site/es/eventos.html:107: - - 'a' tag is missing a reference - -* At _site/es/eventos.html:144: - - 'a' tag is missing a reference - -* At _site/es/eventos.html:181: - - 'a' tag is missing a reference - -* At _site/es/guia-editor.html:88: - - 'a' tag is missing a reference - -* At _site/es/guia-editor.html:107: - - 'a' tag is missing a reference - -* At _site/es/guia-editor.html:144: - - 'a' tag is missing a reference - -* At _site/es/guia-editor.html:181: - - 'a' tag is missing a reference - -* At _site/es/guia-editor.html:617: - - http://www.europeana.eu/portal/en is not an HTTPS link - -* At _site/es/guia-editor.html:621: - - http://www.loc.gov/maps/collections is not an HTTPS link - -* At _site/es/guia-para-autores.html:88: - - 'a' tag is missing a reference - -* At _site/es/guia-para-autores.html:107: - - 'a' tag is missing a reference - -* At _site/es/guia-para-autores.html:144: - - 'a' tag is missing a reference - -* At _site/es/guia-para-autores.html:181: - - 'a' tag is missing a reference - -* At _site/es/guia-para-revisores.html:88: - - 'a' tag is missing a reference - -* At _site/es/guia-para-revisores.html:107: - - 'a' tag is missing a reference - -* At _site/es/guia-para-revisores.html:144: - - 'a' tag is missing a reference - -* At _site/es/guia-para-revisores.html:181: - - 'a' tag is missing a reference - -* At _site/es/guia-para-revisores.html:336: - - http://vocabularios.caicyt.gov.ar/portalthes/index.php?v=42 is not an HTTPS link - -* At _site/es/guia-para-revisores.html:336: - - http://www.mecd.gob.es/planes-nacionales/dam/jcr:f20a4ba1-0ed2-445d-9be9-b8b0382562ea/mex-glosario-interpares-total0112.pdf is not an HTTPS link - -* At _site/es/guia-para-traductores.html:88: - - 'a' tag is missing a reference - -* At _site/es/guia-para-traductores.html:107: - - 'a' tag is missing a reference - -* At _site/es/guia-para-traductores.html:144: - - 'a' tag is missing a reference - -* At _site/es/guia-para-traductores.html:181: - - 'a' tag is missing a reference - -* At _site/es/index.html:90: - - 'a' tag is missing a reference - -* At _site/es/index.html:109: - - 'a' tag is missing a reference - -* At _site/es/index.html:146: - - 'a' tag is missing a reference - -* At _site/es/index.html:183: - - 'a' tag is missing a reference - -* At _site/es/investigacion.html:88: - - 'a' tag is missing a reference - -* At _site/es/investigacion.html:107: - - 'a' tag is missing a reference - -* At _site/es/investigacion.html:144: - - 'a' tag is missing a reference - -* At _site/es/investigacion.html:181: - - 'a' tag is missing a reference - -* At _site/es/investigacion.html:254: - - http://niche-canada.org/wp-content/uploads/2013/09/programming-historian-1.pdf is not an HTTPS link - -* At _site/es/investigacion.html:260: - - http://www.diva-portal.org/smash/record.jsf?pid=diva2%3A1508542&dswid=7551 is not an HTTPS link - -* At _site/es/investigacion.html:264: - - http://jitp.commons.gc.cuny.edu/review-of-the-programming-historian is not an HTTPS link - -* At _site/es/investigacion.html:265: - - http://jitp.commons.gc.cuny.edu/review-of-the-programming-historian is not an HTTPS link - -* At _site/es/investigacion.html:277: - - http://www.digitalhumanities.org/dhq/vol/16/2/000585/000585.html is not an HTTPS link - -* At _site/es/investigacion.html:283: - - http://www.tandfonline.com/doi/full/10.1080/13555502.2017.1301179 is not an HTTPS link - -* At _site/es/investigacion.html:285: - - http://web.archive.org/web/20180713014622/http://dhcommons.org/journal/issue-1/editorial-sustainability-and-open-peer-review-programming-historian is not an HTTPS link - -* At _site/es/investigacion.html:286: - - http://www.themacroscope.org/2.0/ is not an HTTPS link - -* At _site/es/investigacion.html:294: - - http://doi.org/10.5281/zenodo.3813763 is not an HTTPS link - -* At _site/es/investigacion.html:307: - - http://www.iea.usp.br/eventos/historia-digital-educacao-caminhos-cruzados is not an HTTPS link - -* At _site/es/investigacion.html:394: - - http://ixa2.si.ehu.eus/intele/?q=webinars is not an HTTPS link - -* At _site/es/investigacion.html:397: - - http://walshbr.com/blog/the-programming-historian-and-editorial-process-in-digital-publishing/ is not an HTTPS link - -* At _site/es/investigacion.html:410: - - http://fredgibbs.net/assets/images/ph-poster/final-board.png is not an HTTPS link - -* At _site/es/investigacion.html:426: - - http://niche-canada.org/2018/03/23/a-decade-of-programming-historians/ is not an HTTPS link - -* At _site/es/investigacion.html:427: - - http://fredgibbs.net/posts/reflections-former-PH-editor is not an HTTPS link - -* At _site/es/investigacion.html:428: - - http://clionauta.hypotheses.org/16979 is not an HTTPS link - -* At _site/es/investigacion.html:430: - - http://humanidadesdigitales.net/blog/2017/03/17/the-programming-historian-en-espanol/ is not an HTTPS link - -* At _site/es/jisc-tna-colaboracion.html:88: - - 'a' tag is missing a reference - -* At _site/es/jisc-tna-colaboracion.html:107: - - 'a' tag is missing a reference - -* At _site/es/jisc-tna-colaboracion.html:144: - - 'a' tag is missing a reference - -* At _site/es/jisc-tna-colaboracion.html:181: - - 'a' tag is missing a reference - -* At _site/es/lecciones/administracion-de-datos-en-r.html:119: - - 'a' tag is missing a reference - -* At _site/es/lecciones/administracion-de-datos-en-r.html:138: - - 'a' tag is missing a reference - -* At _site/es/lecciones/administracion-de-datos-en-r.html:175: - - 'a' tag is missing a reference - -* At _site/es/lecciones/administracion-de-datos-en-r.html:212: - - 'a' tag is missing a reference - -* At _site/es/lecciones/administracion-de-datos-en-r.html:564: - - http://hadley.nz is not an HTTPS link - -* At _site/es/lecciones/administracion-de-datos-en-r.html:585: - - http://academica-e.unavarra.es/bitstream/handle/2454/15785/Gramática.pdf?sequence=1 is not an HTTPS link - -* At _site/es/lecciones/administracion-de-datos-en-r.html:675: - - http://stefanbache.dk is not an HTTPS link - -* At _site/es/lecciones/administracion-de-datos-en-r.html:675: - - http://hadley.nz/ is not an HTTPS link - -* At _site/es/lecciones/administracion-de-datos-en-r.html:1042: - - http://academica-e.unavarra.es/bitstream/handle/2454/15785/Gramática.pdf?sequence=1 is not an HTTPS link - -* At _site/es/lecciones/analisis-de-corpus-con-antconc.html:119: - - 'a' tag is missing a reference - -* At _site/es/lecciones/analisis-de-corpus-con-antconc.html:138: - - 'a' tag is missing a reference - -* At _site/es/lecciones/analisis-de-corpus-con-antconc.html:175: - - 'a' tag is missing a reference - -* At _site/es/lecciones/analisis-de-corpus-con-antconc.html:212: - - 'a' tag is missing a reference - -* At _site/es/lecciones/analisis-de-corpus-con-antconc.html:576: - - http://voyant-tools.org/ is not an HTTPS link - -* At _site/es/lecciones/analisis-de-corpus-con-antconc.html:580: - - http://voyant-tools.org/ is not an HTTPS link - -* At _site/es/lecciones/analisis-de-corpus-con-antconc.html:580: - - http://www.laurenceanthony.net/software/antconc/ is not an HTTPS link - -* At _site/es/lecciones/analisis-de-corpus-con-antconc.html:580: - - http://www.laurenceanthony.net/ is not an HTTPS link - -* At _site/es/lecciones/analisis-de-corpus-con-antconc.html:588: - - http://www.laurenceanthony.net/software/antconc/ is not an HTTPS link - -* At _site/es/lecciones/analisis-de-corpus-con-antconc.html:590: - - http://www.laurenceanthony.net/software/antconc/releases/AntConc324/ is not an HTTPS link - -* At _site/es/lecciones/analisis-de-corpus-con-antconc.html:619: - - http://notepad-plus-plus.org/ is not an HTTPS link - -* At _site/es/lecciones/analisis-de-corpus-con-antconc.html:619: - - http://www.barebones.com/products/textwrangler/ is not an HTTPS link - -* At _site/es/lecciones/analisis-de-corpus-con-antconc.html:628: - - http://www.nltk.org/ is not an HTTPS link - -* At _site/es/lecciones/analisis-de-corpus-con-antconc.html:630: - - http://www.amazon.com/Developing-Linguistic-Corpora-Practice-Guides/dp/1842172050/ref=sr_1_1 is not an HTTPS link - -* At _site/es/lecciones/analisis-de-corpus-con-antconc.html:984: - - http://www.wordfrequency.info/free.asp is not an HTTPS link - -* At _site/es/lecciones/analisis-de-corpus-con-antconc.html:1047: - - http://www.lexically.net/downloads/version6/HTML/index.html?keyness_definition.htm is not an HTTPS link - -* At _site/es/lecciones/analisis-de-corpus-con-antconc.html:1047: - - http://www.laurenceanthony.net/software/antconc/releases/AntConc335/help.pdf is not an HTTPS link - -* At _site/es/lecciones/analisis-de-corpus-con-antconc.html:1115: - - http://hfroehlich.wordpress.com/2014/05/11/intro-bibliography-corpus-linguistics/ is not an HTTPS link - -* At _site/es/lecciones/analisis-de-corpus-con-antconc.html:1117: - - http://hfroehli.ch/workshops/getting-started-with-antconc/ is not an HTTPS link - -* At _site/es/lecciones/analisis-de-corpus-con-antconc.html:1132: - - http://elies.rediris.es/elies18/ is not an HTTPS link - -* At _site/es/lecciones/analisis-de-correspondencia-en-r.html:139: - - 'a' tag is missing a reference - -* At _site/es/lecciones/analisis-de-correspondencia-en-r.html:158: - - 'a' tag is missing a reference - -* At _site/es/lecciones/analisis-de-correspondencia-en-r.html:195: - - 'a' tag is missing a reference - -* At _site/es/lecciones/analisis-de-correspondencia-en-r.html:232: - - 'a' tag is missing a reference - -* At _site/es/lecciones/analisis-de-correspondencia-en-r.html:739: - - http://doi.org/10.5281/zenodo.889846 is not an HTTPS link - -* At _site/es/lecciones/analisis-de-sentimientos-r.html:117: - - 'a' tag is missing a reference - -* At _site/es/lecciones/analisis-de-sentimientos-r.html:136: - - 'a' tag is missing a reference - -* At _site/es/lecciones/analisis-de-sentimientos-r.html:173: - - 'a' tag is missing a reference - -* At _site/es/lecciones/analisis-de-sentimientos-r.html:210: - - 'a' tag is missing a reference - -* At _site/es/lecciones/analisis-de-sentimientos-r.html:541: - - http://saifmohammad.com/WebPages/NRC-Emotion-Lexicon.htm is not an HTTPS link - -* At _site/es/lecciones/analisis-de-sentimientos-r.html:545: - - http://www.matthewjockers.net/page/2/ is not an HTTPS link - -* At _site/es/lecciones/analisis-redes-sociales-teatro-1.html:117: - - 'a' tag is missing a reference - -* At _site/es/lecciones/analisis-redes-sociales-teatro-1.html:136: - - 'a' tag is missing a reference - -* At _site/es/lecciones/analisis-redes-sociales-teatro-1.html:173: - - 'a' tag is missing a reference - -* At _site/es/lecciones/analisis-redes-sociales-teatro-1.html:210: - - 'a' tag is missing a reference - -* At _site/es/lecciones/analisis-redes-sociales-teatro-2.html:117: - - 'a' tag is missing a reference - -* At _site/es/lecciones/analisis-redes-sociales-teatro-2.html:136: - - 'a' tag is missing a reference - -* At _site/es/lecciones/analisis-redes-sociales-teatro-2.html:173: - - 'a' tag is missing a reference - -* At _site/es/lecciones/analisis-redes-sociales-teatro-2.html:210: - - 'a' tag is missing a reference - -* At _site/es/lecciones/analisis-redes-sociales-teatro-2.html:1089: - - http://hdlab.stanford.edu/palladio/ is not an HTTPS link - -* At _site/es/lecciones/analisis-temporal-red.html:122: - - 'a' tag is missing a reference - -* At _site/es/lecciones/analisis-temporal-red.html:141: - - 'a' tag is missing a reference - -* At _site/es/lecciones/analisis-temporal-red.html:178: - - 'a' tag is missing a reference - -* At _site/es/lecciones/analisis-temporal-red.html:215: - - 'a' tag is missing a reference - -* At _site/es/lecciones/analisis-temporal-red.html:1187: - - http://www.epimodel.org is not an HTTPS link - -* At _site/es/lecciones/analisis-voyant-tools.html:120: - - 'a' tag is missing a reference - -* At _site/es/lecciones/analisis-voyant-tools.html:139: - - 'a' tag is missing a reference - -* At _site/es/lecciones/analisis-voyant-tools.html:176: - - 'a' tag is missing a reference - -* At _site/es/lecciones/analisis-voyant-tools.html:213: - - 'a' tag is missing a reference - -* At _site/es/lecciones/analisis-voyant-tools.html:550: - - http://vocabularios.caicyt.gov.ar/portalthes/42/term/26 is not an HTTPS link - -* At _site/es/lecciones/analisis-voyant-tools.html:552: - - http://vocabularios.caicyt.gov.ar/portalthes/42/term/178 is not an HTTPS link - -* At _site/es/lecciones/analisis-voyant-tools.html:1260: - - http://voyant-tools.org/ is not an HTTPS link - -* At _site/es/lecciones/analisis-voyant-tools.html:1262: - - http://melissaterras.blogspot.com/2013/10/for-ada-lovelace-day-father-busas.html is not an HTTPS link - -* At _site/es/lecciones/construir-repositorio-de-fuentes.html:120: - - 'a' tag is missing a reference - -* At _site/es/lecciones/construir-repositorio-de-fuentes.html:139: - - 'a' tag is missing a reference - -* At _site/es/lecciones/construir-repositorio-de-fuentes.html:176: - - 'a' tag is missing a reference - -* At _site/es/lecciones/construir-repositorio-de-fuentes.html:213: - - 'a' tag is missing a reference - -* At _site/es/lecciones/construir-repositorio-de-fuentes.html:555: - - http://www.rubenalcaraz.es/manual-omeka/ is not an HTTPS link - -* At _site/es/lecciones/construir-repositorio-de-fuentes.html:592: - - http://localhost/ is not an HTTPS link - -* At _site/es/lecciones/construir-repositorio-de-fuentes.html:592: - - http://127.0.0.1 is not an HTTPS link - -* At _site/es/lecciones/construir-repositorio-de-fuentes.html:622: - - http://localhost/phpmyadmin/ is not an HTTPS link - -* At _site/es/lecciones/construir-repositorio-de-fuentes.html:692: - - http://localhost/phpmyadmin is not an HTTPS link - -* At _site/es/lecciones/construir-repositorio-de-fuentes.html:791: - - 'a' tag is missing a reference - -* At _site/es/lecciones/construir-repositorio-de-fuentes.html:914: - - http://omeka.org/codex/Plugin_Writing_Best_Practices#Plugin_Directory_Structure is not an HTTPS link - -* At _site/es/lecciones/construir-repositorio-de-fuentes.html:967: - - http://omeka.org/codex/Plugins/DublinCoreExtended_2.0 is not an HTTPS link - -* At _site/es/lecciones/contar-frecuencias.html:121: - - 'a' tag is missing a reference - -* At _site/es/lecciones/contar-frecuencias.html:140: - - 'a' tag is missing a reference - -* At _site/es/lecciones/contar-frecuencias.html:177: - - 'a' tag is missing a reference - -* At _site/es/lecciones/contar-frecuencias.html:214: - - 'a' tag is missing a reference - -* At _site/es/lecciones/contar-frecuencias.html:624: - - http://docs.python.org/tutorial/datastructures.html#list-comprehensions is not an HTTPS link - -* At _site/es/lecciones/contar-frecuencias.html:751: - - http://ir.dcs.gla.ac.uk/resources/linguistic_utils/stop_words is not an HTTPS link - -* At _site/es/lecciones/corpus-paralelo-lfaligner.html:121: - - 'a' tag is missing a reference - -* At _site/es/lecciones/corpus-paralelo-lfaligner.html:140: - - 'a' tag is missing a reference - -* At _site/es/lecciones/corpus-paralelo-lfaligner.html:177: - - 'a' tag is missing a reference - -* At _site/es/lecciones/corpus-paralelo-lfaligner.html:214: - - 'a' tag is missing a reference - -* At _site/es/lecciones/corpus-paralelo-lfaligner.html:489: - - http://vocabularios.caicyt.gov.ar/portalthes/42/term/134 is not an HTTPS link - -* At _site/es/lecciones/corpus-paralelo-lfaligner.html:499: - - http://vocabularios.caicyt.gov.ar/portalthes/42/term/136 is not an HTTPS link - -* At _site/es/lecciones/corpus-paralelo-lfaligner.html:501: - - http://utils.mucattu.com/iso_639-1.html is not an HTTPS link - -* At _site/es/lecciones/corpus-paralelo-lfaligner.html:1100: - - http://www.laurenceanthony.net/software/antpconc/ is not an HTTPS link - -* At _site/es/lecciones/creacion-de-aplicacion-shiny.html:119: - - 'a' tag is missing a reference - -* At _site/es/lecciones/creacion-de-aplicacion-shiny.html:138: - - 'a' tag is missing a reference - -* At _site/es/lecciones/creacion-de-aplicacion-shiny.html:175: - - 'a' tag is missing a reference - -* At _site/es/lecciones/creacion-de-aplicacion-shiny.html:212: - - 'a' tag is missing a reference - -* At _site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html:119: - - 'a' tag is missing a reference - -* At _site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html:138: - - 'a' tag is missing a reference - -* At _site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html:175: - - 'a' tag is missing a reference - -* At _site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html:212: - - 'a' tag is missing a reference - -* At _site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html:530: - - http://hdlab.stanford.edu/palladio/ is not an HTTPS link - -* At _site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html:552: - - http://hal.archives-ouvertes.fr/docs/00/64/93/16/PDF/lemercier_A_zg.pdf is not an HTTPS link - -* At _site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html:552: - - http://hal.archives-ouvertes.fr/docs/00/64/93/16/PDF/lemercier_A_zg.pdf is not an HTTPS link - -* At _site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html:552: - - http://historicalnetworkresearch.org/ is not an HTTPS link - -* At _site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html:562: - - http://web.archive.org/web/20180422010025/http://www.gdw-berlin.de/fileadmin/bilder/publ/publikationen_in_englischer_sprache/2006_Neuman_eng.pdf is not an HTTPS link - -* At _site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html:562: - - http://martenduering.com/research/covert-networks-during-the-holocaust/ is not an HTTPS link - -* At _site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html:695: - - http://hdlab.stanford.edu/palladio/ is not an HTTPS link - -* At _site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html:771: - - http://hdlab.stanford.edu/doc/scenario-simple-map.pdf is not an HTTPS link - -* At _site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html:937: - - http://en.wikipedia.org/wiki/Directed_graph#Indegree_and_outdegree is not an HTTPS link - -* At _site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html:985: - - http://nodegoat.net/ is not an HTTPS link - -* At _site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html:985: - - http://nodegoat.net/cms/UPLOAD/AsmallguidebyYanan11082014.pdf is not an HTTPS link - -* At _site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html:987: - - http://www.youtube.com/watch?v=xKhYGRpbwOc is not an HTTPS link - -* At _site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html:989: - - http://www.clementlevallois.net/training.html is not an HTTPS link - -* At _site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html:989: - - http://www.youtube.com/watch?v=L6hHv6y5GsQ is not an HTTPS link - -* At _site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html:993: - - http://pajek.imfm.si/doku.php is not an HTTPS link - -* At _site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html:993: - - http://www.cambridge.org/us/academic/subjects/sociology/research-methods-sociology-and-criminology/exploratory-social-network-analysis-pajek-2nd-edition is not an HTTPS link - -* At _site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html:1081: - - http://historicalnetworkresearch.org is not an HTTPS link - -* At _site/es/lecciones/crear-exposicion-con-omeka.html:121: - - 'a' tag is missing a reference - -* At _site/es/lecciones/crear-exposicion-con-omeka.html:140: - - 'a' tag is missing a reference - -* At _site/es/lecciones/crear-exposicion-con-omeka.html:177: - - 'a' tag is missing a reference - -* At _site/es/lecciones/crear-exposicion-con-omeka.html:214: - - 'a' tag is missing a reference - -* At _site/es/lecciones/crear-y-ver-archivos-html-con-python.html:121: - - 'a' tag is missing a reference - -* At _site/es/lecciones/crear-y-ver-archivos-html-con-python.html:140: - - 'a' tag is missing a reference - -* At _site/es/lecciones/crear-y-ver-archivos-html-con-python.html:177: - - 'a' tag is missing a reference - -* At _site/es/lecciones/crear-y-ver-archivos-html-con-python.html:214: - - 'a' tag is missing a reference - -* At _site/es/lecciones/crear-y-ver-archivos-html-con-python.html:572: - - http://zotero.org is not an HTTPS link - -* At _site/es/lecciones/crear-y-ver-archivos-html-con-python.html:574: - - http://www.w3schools.com/html/default.asp is not an HTTPS link - -* At _site/es/lecciones/crear-y-ver-archivos-html-con-python.html:580: - - http://www.w3schools.com/tags/tag_doctype.asp is not an HTTPS link - -* At _site/es/lecciones/datos-abiertos-enlazados-wikidata.html:119: - - 'a' tag is missing a reference - -* At _site/es/lecciones/datos-abiertos-enlazados-wikidata.html:138: - - 'a' tag is missing a reference - -* At _site/es/lecciones/datos-abiertos-enlazados-wikidata.html:175: - - 'a' tag is missing a reference - -* At _site/es/lecciones/datos-abiertos-enlazados-wikidata.html:212: - - 'a' tag is missing a reference - -* At _site/es/lecciones/datos-de-investigacion-con-unix.html:121: - - 'a' tag is missing a reference - -* At _site/es/lecciones/datos-de-investigacion-con-unix.html:140: - - 'a' tag is missing a reference - -* At _site/es/lecciones/datos-de-investigacion-con-unix.html:177: - - 'a' tag is missing a reference - -* At _site/es/lecciones/datos-de-investigacion-con-unix.html:214: - - 'a' tag is missing a reference - -* At _site/es/lecciones/datos-de-investigacion-con-unix.html:556: - - http://msysgit.github.io/ is not an HTTPS link - -* At _site/es/lecciones/datos-de-investigacion-con-unix.html:564: - - http://www.7-zip.org/ is not an HTTPS link - -* At _site/es/lecciones/datos-de-investigacion-con-unix.html:580: - - http://en.wikipedia.org/wiki/Tab-separated_values is not an HTTPS link - -* At _site/es/lecciones/datos-de-investigacion-con-unix.html:582: - - http://en.wikipedia.org/wiki/Comma-separated_values is not an HTTPS link - -* At _site/es/lecciones/datos-de-investigacion-con-unix.html:641: - - http://williamjturkel.net/2013/06/15/basic-text-analysis-with-command-line-tools-in-linux/ is not an HTTPS link - -* At _site/es/lecciones/datos-de-investigacion-con-unix.html:642: - - http://williamjturkel.net/2013/06/20/pattern-matching-and-permuted-term-indexing-with-command-line-tools-in-linux/ is not an HTTPS link - -* At _site/es/lecciones/datos-tabulares-en-r.html:119: - - 'a' tag is missing a reference - -* At _site/es/lecciones/datos-tabulares-en-r.html:138: - - 'a' tag is missing a reference - -* At _site/es/lecciones/datos-tabulares-en-r.html:175: - - 'a' tag is missing a reference - -* At _site/es/lecciones/datos-tabulares-en-r.html:212: - - 'a' tag is missing a reference - -* At _site/es/lecciones/datos-tabulares-en-r.html:1046: - - http://web.archive.org/web/20191015004305/https://www.nceas.ucsb.edu/files/scicomp/Dloads/RProgramming/BestFirstRTutorial.pdf is not an HTTPS link - -* At _site/es/lecciones/datos-tabulares-en-r.html:1050: - - http://dh-r.lincolnmullen.com is not an HTTPS link - -* At _site/es/lecciones/de-html-a-lista-de-palabras-1.html:121: - - 'a' tag is missing a reference - -* At _site/es/lecciones/de-html-a-lista-de-palabras-1.html:140: - - 'a' tag is missing a reference - -* At _site/es/lecciones/de-html-a-lista-de-palabras-1.html:177: - - 'a' tag is missing a reference - -* At _site/es/lecciones/de-html-a-lista-de-palabras-1.html:214: - - 'a' tag is missing a reference - -* At _site/es/lecciones/de-html-a-lista-de-palabras-1.html:561: - - http://www.oldbaileyonline.org/browse.jsp?id=t17800628-33&div=t17800628-33 is not an HTTPS link - -* At _site/es/lecciones/de-html-a-lista-de-palabras-1.html:565: - - http://www.w3schools.com/html/ is not an HTTPS link - -* At _site/es/lecciones/de-html-a-lista-de-palabras-2.html:121: - - 'a' tag is missing a reference - -* At _site/es/lecciones/de-html-a-lista-de-palabras-2.html:140: - - 'a' tag is missing a reference - -* At _site/es/lecciones/de-html-a-lista-de-palabras-2.html:177: - - 'a' tag is missing a reference - -* At _site/es/lecciones/de-html-a-lista-de-palabras-2.html:214: - - 'a' tag is missing a reference - -* At _site/es/lecciones/de-html-a-lista-de-palabras-2.html:697: - - http://docs.python.org/2.4/lib/typesnumeric.html is not an HTTPS link - -* At _site/es/lecciones/de-html-a-lista-de-palabras-2.html:709: - - http://docs.python.org/3/library/types.html is not an HTTPS link - -* At _site/es/lecciones/descarga-automatizada-con-wget.html:119: - - 'a' tag is missing a reference - -* At _site/es/lecciones/descarga-automatizada-con-wget.html:138: - - 'a' tag is missing a reference - -* At _site/es/lecciones/descarga-automatizada-con-wget.html:175: - - 'a' tag is missing a reference - -* At _site/es/lecciones/descarga-automatizada-con-wget.html:212: - - 'a' tag is missing a reference - -* At _site/es/lecciones/descarga-automatizada-con-wget.html:663: - - http://www.gnu.org/software/wget/ is not an HTTPS link - -* At _site/es/lecciones/descarga-automatizada-con-wget.html:663: - - http://ftp.gnu.org/gnu/wget/ is not an HTTPS link - -* At _site/es/lecciones/descarga-automatizada-con-wget.html:701: - - http://www.gnu.org/software/wget/manual/wget.html is not an HTTPS link - -* At _site/es/lecciones/descarga-automatizada-con-wget.html:703: - - http://activehistory.ca/papers/ is not an HTTPS link - -* At _site/es/lecciones/descarga-automatizada-con-wget.html:703: - - http://activehistory.ca/papers/historypaper-9/ is not an HTTPS link - -* At _site/es/lecciones/descarga-automatizada-con-wget.html:735: - - http://activehistory.ca/papers/ is not an HTTPS link - -* At _site/es/lecciones/descarga-automatizada-con-wget.html:749: - - http://activehistory.ca/papers/ is not an HTTPS link - -* At _site/es/lecciones/descarga-automatizada-con-wget.html:749: - - http://activehistory.ca/papers/historypaper-9/ is not an HTTPS link - -* At _site/es/lecciones/descarga-automatizada-con-wget.html:749: - - http://uwo.ca is not an HTTPS link - -* At _site/es/lecciones/descarga-automatizada-con-wget.html:822: - - http://www.gnu.org/software/wget/manual/wget.html is not an HTTPS link - -* At _site/es/lecciones/descarga-multiples-registros-usando-cadenas-de-consulta.html:119: - - 'a' tag is missing a reference - -* At _site/es/lecciones/descarga-multiples-registros-usando-cadenas-de-consulta.html:138: - - 'a' tag is missing a reference - -* At _site/es/lecciones/descarga-multiples-registros-usando-cadenas-de-consulta.html:175: - - 'a' tag is missing a reference - -* At _site/es/lecciones/descarga-multiples-registros-usando-cadenas-de-consulta.html:212: - - 'a' tag is missing a reference - -* At _site/es/lecciones/editar-audio-con-audacity.html:119: - - 'a' tag is missing a reference - -* At _site/es/lecciones/editar-audio-con-audacity.html:138: - - 'a' tag is missing a reference - -* At _site/es/lecciones/editar-audio-con-audacity.html:175: - - 'a' tag is missing a reference - -* At _site/es/lecciones/editar-audio-con-audacity.html:212: - - 'a' tag is missing a reference - -* At _site/es/lecciones/editar-audio-con-audacity.html:531: - - http://www.audacityteam.org/ is not an HTTPS link - -* At _site/es/lecciones/editar-audio-con-audacity.html:543: - - http://www.audacityteam.org/ is not an HTTPS link - -* At _site/es/lecciones/editar-audio-con-audacity.html:555: - - http://web.archive.org/web/20161119231053/http://www.indiana.edu:80/~emusic/acoustics/amplitude.htm is not an HTTPS link - -* At _site/es/lecciones/editar-audio-con-audacity.html:601: - - http://www.diffen.com/difference/Mono_vs_Stereo/ is not an HTTPS link - -* At _site/es/lecciones/editar-audio-con-audacity.html:673: - - http://manual.audacityteam.org/man/crossfade_clips.html is not an HTTPS link - -* At _site/es/lecciones/editar-audio-con-audacity.html:689: - - http://www.nch.com.au/acm/formats.html is not an HTTPS link - -* At _site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html:121: - - 'a' tag is missing a reference - -* At _site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html:140: - - 'a' tag is missing a reference - -* At _site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html:177: - - 'a' tag is missing a reference - -* At _site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html:214: - - 'a' tag is missing a reference - -* At _site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html:577: - - http://notepad-plus-plus.org is not an HTTPS link - -* At _site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html:735: - - http://daringfireball.net/projects/markdown/dingus is not an HTTPS link - -* At _site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html:855: - - http://editor.citationstyles.org/about/ is not an HTTPS link - -* At _site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html:887: - - http://stackoverflow.com/questions/tagged/pandoc is not an HTTPS link - -* At _site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html:887: - - http://web.archive.org/web/20190203062832/http://digitalhumanities.org/answers/ is not an HTTPS link - -* At _site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html:889: - - http://web.archive.org/web/20140120195538/http://mashable.com/2013/06/24/markdown-tools/ is not an HTTPS link - -* At _site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html:889: - - http://mouapp.com/ is not an HTTPS link - -* At _site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html:889: - - http://writemonkey.com is not an HTTPS link - -* At _site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html:889: - - http://www.sublimetext.com/ is not an HTTPS link - -* At _site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html:889: - - http://prose.io is not an HTTPS link - -* At _site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html:889: - - http://www.authorea.com is not an HTTPS link - -* At _site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html:889: - - http://www.draftin.com is not an HTTPS link - -* At _site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html:891: - - http://gitit.net/ is not an HTTPS link - -* At _site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html:891: - - http://github.com/fauno/jekyll-pandoc-multiple-formats is not an HTTPS link - -* At _site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html:891: - - http://jaspervdj.be/hakyll/ is not an HTTPS link - -* At _site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html:893: - - http://readthedocs.org is not an HTTPS link - -* At _site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html:904: - - http://www.antipope.org/charlie/blog-static/2013/10/why-microsoft-word-must-die.html is not an HTTPS link - -* At _site/es/lecciones/exhibicion-con-collection-builder.html:117: - - 'a' tag is missing a reference - -* At _site/es/lecciones/exhibicion-con-collection-builder.html:136: - - 'a' tag is missing a reference - -* At _site/es/lecciones/exhibicion-con-collection-builder.html:173: - - 'a' tag is missing a reference - -* At _site/es/lecciones/exhibicion-con-collection-builder.html:210: - - 'a' tag is missing a reference - -* At _site/es/lecciones/exhibicion-con-collection-builder.html:584: - - http://dna.nust.na/heritage_week/ is not an HTTPS link - -* At _site/es/lecciones/exhibicion-con-collection-builder.html:592: - - http://www.gimp.org.es/descargar-gimp.html is not an HTTPS link - -* At _site/es/lecciones/generadores-aventura.html:117: - - 'a' tag is missing a reference - -* At _site/es/lecciones/generadores-aventura.html:136: - - 'a' tag is missing a reference - -* At _site/es/lecciones/generadores-aventura.html:173: - - 'a' tag is missing a reference - -* At _site/es/lecciones/generadores-aventura.html:210: - - 'a' tag is missing a reference - -* At _site/es/lecciones/generadores-aventura.html:534: - - http://www.spoonbill.org/n+7/ is not an HTTPS link - -* At _site/es/lecciones/generadores-aventura.html:729: - - http://clementinagrillo.com/sobremesadigital/flare.json is not an HTTPS link - -* At _site/es/lecciones/georreferenciacion-visualizacion-con-recogito-y-visone.html:119: - - 'a' tag is missing a reference - -* At _site/es/lecciones/georreferenciacion-visualizacion-con-recogito-y-visone.html:138: - - 'a' tag is missing a reference - -* At _site/es/lecciones/georreferenciacion-visualizacion-con-recogito-y-visone.html:175: - - 'a' tag is missing a reference - -* At _site/es/lecciones/georreferenciacion-visualizacion-con-recogito-y-visone.html:212: - - 'a' tag is missing a reference - -* At _site/es/lecciones/georreferenciacion-visualizacion-con-recogito-y-visone.html:774: - - http://visone.ethz.ch/html/download.html is not an HTTPS link - -* At _site/es/lecciones/georreferenciacion-visualizacion-con-recogito-y-visone.html:1033: - - http://dx.doi.org/10.3828/mlo.v0i0.299 is not an HTTPS link - -* At _site/es/lecciones/georreferenciar-qgis.html:126: - - 'a' tag is missing a reference - -* At _site/es/lecciones/georreferenciar-qgis.html:145: - - 'a' tag is missing a reference - -* At _site/es/lecciones/georreferenciar-qgis.html:182: - - 'a' tag is missing a reference - -* At _site/es/lecciones/georreferenciar-qgis.html:219: - - 'a' tag is missing a reference - -* At _site/es/lecciones/georreferenciar-qgis.html:550: - - http://en.wikipedia.org/wiki/Rubbersheeting is not an HTTPS link - -* At _site/es/lecciones/georreferenciar-qgis.html:617: - - http://www.gov.pe.ca/gis/license_agreement.php3?name=lot_town&file_format=SHP is not an HTTPS link - -* At _site/es/lecciones/georreferenciar-qgis.html:908: - - http://en.wikipedia.org/wiki/Tagged_Image_File_Format is not an HTTPS link - -* At _site/es/lecciones/georreferenciar-qgis.html:1059: - - http://geospatialhistorian.wordpress.com/ is not an HTTPS link - -* At _site/es/lecciones/index.html:88: - - 'a' tag is missing a reference - -* At _site/es/lecciones/index.html:107: - - 'a' tag is missing a reference - -* At _site/es/lecciones/index.html:144: - - 'a' tag is missing a reference - -* At _site/es/lecciones/index.html:181: - - 'a' tag is missing a reference - -* At _site/es/lecciones/instalacion-linux.html:121: - - 'a' tag is missing a reference - -* At _site/es/lecciones/instalacion-linux.html:140: - - 'a' tag is missing a reference - -* At _site/es/lecciones/instalacion-linux.html:177: - - 'a' tag is missing a reference - -* At _site/es/lecciones/instalacion-linux.html:214: - - 'a' tag is missing a reference - -* At _site/es/lecciones/instalacion-linux.html:569: - - http://komodoide.com/komodo-edit/ is not an HTTPS link - -* At _site/es/lecciones/instalacion-mac.html:121: - - 'a' tag is missing a reference - -* At _site/es/lecciones/instalacion-mac.html:140: - - 'a' tag is missing a reference - -* At _site/es/lecciones/instalacion-mac.html:177: - - 'a' tag is missing a reference - -* At _site/es/lecciones/instalacion-mac.html:214: - - 'a' tag is missing a reference - -* At _site/es/lecciones/instalacion-windows.html:121: - - 'a' tag is missing a reference - -* At _site/es/lecciones/instalacion-windows.html:140: - - 'a' tag is missing a reference - -* At _site/es/lecciones/instalacion-windows.html:177: - - 'a' tag is missing a reference - -* At _site/es/lecciones/instalacion-windows.html:214: - - 'a' tag is missing a reference - -* At _site/es/lecciones/instalar-modulos-python-pip.html:119: - - 'a' tag is missing a reference - -* At _site/es/lecciones/instalar-modulos-python-pip.html:138: - - 'a' tag is missing a reference - -* At _site/es/lecciones/instalar-modulos-python-pip.html:175: - - 'a' tag is missing a reference - -* At _site/es/lecciones/instalar-modulos-python-pip.html:212: - - 'a' tag is missing a reference - -* At _site/es/lecciones/instalar-modulos-python-pip.html:561: - - http://www.thegeekstuff.com/2012/04/curl-examples/ is not an HTTPS link - -* At _site/es/lecciones/instalar-modulos-python-pip.html:587: - - http://stackoverflow.com/questions/4750806/how-to-install-pip-on-windows is not an HTTPS link - -* At _site/es/lecciones/intro-a-google-maps-y-google-earth.html:126: - - 'a' tag is missing a reference - -* At _site/es/lecciones/intro-a-google-maps-y-google-earth.html:145: - - 'a' tag is missing a reference - -* At _site/es/lecciones/intro-a-google-maps-y-google-earth.html:182: - - 'a' tag is missing a reference - -* At _site/es/lecciones/intro-a-google-maps-y-google-earth.html:219: - - 'a' tag is missing a reference - -* At _site/es/lecciones/intro-a-google-maps-y-google-earth.html:1248: - - http://www.davidrumsey.com/ is not an HTTPS link - -* At _site/es/lecciones/intro-a-google-maps-y-google-earth.html:1516: - - http://niche-canada.org/2011/12/14/mobile-mapping-and-historical-gis-in-the-field/ is not an HTTPS link - -* At _site/es/lecciones/introduccion-a-bash.html:121: - - 'a' tag is missing a reference - -* At _site/es/lecciones/introduccion-a-bash.html:140: - - 'a' tag is missing a reference - -* At _site/es/lecciones/introduccion-a-bash.html:177: - - 'a' tag is missing a reference - -* At _site/es/lecciones/introduccion-a-bash.html:214: - - 'a' tag is missing a reference - -* At _site/es/lecciones/introduccion-a-bash.html:577: - - http://www.tldp.org/LDP/Bash-Beginners-Guide/html/chap_01.html is not an HTTPS link - -* At _site/es/lecciones/introduccion-a-bash.html:627: - - http://ethanschoonover.com/solarized is not an HTTPS link - -* At _site/es/lecciones/introduccion-a-bash.html:779: - - http://www.viemu.com/a-why-vi-vim.html is not an HTTPS link - -* At _site/es/lecciones/introduccion-a-bash.html:791: - - http://www.gutenberg.org/ebooks/2600 is not an HTTPS link - -* At _site/es/lecciones/introduccion-a-bash.html:887: - - http://vimdoc.sourceforge.net/htmldoc/quickref.html is not an HTTPS link - -* At _site/es/lecciones/introduccion-a-ffmpeg.html:119: - - 'a' tag is missing a reference - -* At _site/es/lecciones/introduccion-a-ffmpeg.html:138: - - 'a' tag is missing a reference - -* At _site/es/lecciones/introduccion-a-ffmpeg.html:175: - - 'a' tag is missing a reference - -* At _site/es/lecciones/introduccion-a-ffmpeg.html:212: - - 'a' tag is missing a reference - -* At _site/es/lecciones/introduccion-a-ffmpeg.html:668: - - http://linuxbrew.sh/ is not an HTTPS link - -* At _site/es/lecciones/introduccion-a-ffmpeg.html:1130: - - http://tldp.org/LDP/GNU-Linux-Tools-Summary/html/x11655.htm is not an HTTPS link - -* At _site/es/lecciones/introduccion-a-imageplot-y-la-visualizacion-de-metadatos.html:121: - - 'a' tag is missing a reference - -* At _site/es/lecciones/introduccion-a-imageplot-y-la-visualizacion-de-metadatos.html:140: - - 'a' tag is missing a reference - -* At _site/es/lecciones/introduccion-a-imageplot-y-la-visualizacion-de-metadatos.html:177: - - 'a' tag is missing a reference - -* At _site/es/lecciones/introduccion-a-imageplot-y-la-visualizacion-de-metadatos.html:214: - - 'a' tag is missing a reference - -* At _site/es/lecciones/introduccion-a-imageplot-y-la-visualizacion-de-metadatos.html:523: - - http://lab.culturalanalytics.info/ is not an HTTPS link - -* At _site/es/lecciones/introduccion-a-imageplot-y-la-visualizacion-de-metadatos.html:533: - - http://ladi.lib.utexas.edu/ is not an HTTPS link - -* At _site/es/lecciones/introduccion-a-markdown.html:122: - - 'a' tag is missing a reference - -* At _site/es/lecciones/introduccion-a-markdown.html:141: - - 'a' tag is missing a reference - -* At _site/es/lecciones/introduccion-a-markdown.html:178: - - 'a' tag is missing a reference - -* At _site/es/lecciones/introduccion-a-markdown.html:215: - - 'a' tag is missing a reference - -* At _site/es/lecciones/introduccion-a-markdown.html:541: - - http://daringfireball.net/projects/markdown/ is not an HTTPS link - -* At _site/es/lecciones/introduccion-a-markdown.html:748: - - http://programminghistorian.org/ is not an HTTPS link - -* At _site/es/lecciones/introduccion-a-powershell.html:119: - - 'a' tag is missing a reference - -* At _site/es/lecciones/introduccion-a-powershell.html:138: - - 'a' tag is missing a reference - -* At _site/es/lecciones/introduccion-a-powershell.html:175: - - 'a' tag is missing a reference - -* At _site/es/lecciones/introduccion-a-powershell.html:212: - - 'a' tag is missing a reference - -* At _site/es/lecciones/introduccion-a-tei-1.html:117: - - 'a' tag is missing a reference - -* At _site/es/lecciones/introduccion-a-tei-1.html:136: - - 'a' tag is missing a reference - -* At _site/es/lecciones/introduccion-a-tei-1.html:173: - - 'a' tag is missing a reference - -* At _site/es/lecciones/introduccion-a-tei-1.html:210: - - 'a' tag is missing a reference - -* At _site/es/lecciones/introduccion-a-tei-2.html:117: - - 'a' tag is missing a reference - -* At _site/es/lecciones/introduccion-a-tei-2.html:136: - - 'a' tag is missing a reference - -* At _site/es/lecciones/introduccion-a-tei-2.html:173: - - 'a' tag is missing a reference - -* At _site/es/lecciones/introduccion-a-tei-2.html:210: - - 'a' tag is missing a reference - -* At _site/es/lecciones/introduccion-al-web-scraping-usando-r.html:117: - - 'a' tag is missing a reference - -* At _site/es/lecciones/introduccion-al-web-scraping-usando-r.html:136: - - 'a' tag is missing a reference - -* At _site/es/lecciones/introduccion-al-web-scraping-usando-r.html:173: - - 'a' tag is missing a reference - -* At _site/es/lecciones/introduccion-al-web-scraping-usando-r.html:210: - - 'a' tag is missing a reference - -* At _site/es/lecciones/introduccion-al-web-scraping-usando-r.html:677: - - http://www.memoriachilena.gob.cl/ is not an HTTPS link - -* At _site/es/lecciones/introduccion-datos-abiertos-enlazados.html:119: - - 'a' tag is missing a reference - -* At _site/es/lecciones/introduccion-datos-abiertos-enlazados.html:138: - - 'a' tag is missing a reference - -* At _site/es/lecciones/introduccion-datos-abiertos-enlazados.html:175: - - 'a' tag is missing a reference - -* At _site/es/lecciones/introduccion-datos-abiertos-enlazados.html:212: - - 'a' tag is missing a reference - -* At _site/es/lecciones/introduccion-datos-abiertos-enlazados.html:556: - - http://linkeddatacatalog.dws.informatik.uni-mannheim.de/state/ is not an HTTPS link - -* At _site/es/lecciones/introduccion-datos-abiertos-enlazados.html:591: - - http://www.oxforddnb.com is not an HTTPS link - -* At _site/es/lecciones/introduccion-datos-abiertos-enlazados.html:603: - - http://www.geonames.org/ is not an HTTPS link - -* At _site/es/lecciones/introduccion-datos-abiertos-enlazados.html:701: - - http://www.history.ac.uk/projects/digital/tobias is not an HTTPS link - -* At _site/es/lecciones/introduccion-datos-abiertos-enlazados.html:733: - - http://semanticweb.org/wiki/Main_Page.html is not an HTTPS link - -* At _site/es/lecciones/introduccion-datos-abiertos-enlazados.html:738: - - http://web.archive.org/web/20170715094229/http://www.musicontology.com/ is not an HTTPS link - -* At _site/es/lecciones/introduccion-datos-abiertos-enlazados.html:738: - - http://web.archive.org/web/20170718143925/http://musicontology.com/docs/getting-started.html is not an HTTPS link - -* At _site/es/lecciones/introduccion-datos-abiertos-enlazados.html:748: - - http://wiki.dbpedia.org is not an HTTPS link - -* At _site/es/lecciones/introduccion-datos-abiertos-enlazados.html:853: - - http://web.archive.org/web/20170718143925/http://musicontology.com/docs/getting-started.html is not an HTTPS link - -* At _site/es/lecciones/introduccion-datos-abiertos-enlazados.html:887: - - http://www.history.ac.uk/projects/digital/tobias is not an HTTPS link - -* At _site/es/lecciones/introduccion-datos-abiertos-enlazados.html:907: - - http://www.easyrdf.org/converter is not an HTTPS link - -* At _site/es/lecciones/introduccion-datos-abiertos-enlazados.html:919: - - http://dbpedia.org/snorql/ is not an HTTPS link - -* At _site/es/lecciones/introduccion-datos-abiertos-enlazados.html:1019: - - http://dbpedia.org/class/yago/WikicatBritishHistorians is not an HTTPS link - -* At _site/es/lecciones/introduccion-datos-abiertos-enlazados.html:1019: - - http://dbpedia.org/class/yago/WikicatWomenHistorians is not an HTTPS link - -* At _site/es/lecciones/introduccion-datos-abiertos-enlazados.html:1048: - - 'a' tag is missing a reference - -* At _site/es/lecciones/introduccion-datos-abiertos-enlazados.html:1048: - - 'a' tag is missing a reference - -* At _site/es/lecciones/introduccion-datos-abiertos-enlazados.html:1065: - - http://www.snee.com/bobdc.blog/ is not an HTTPS link - -* At _site/es/lecciones/introduccion-datos-abiertos-enlazados.html:1077: - - http://linkeddata.org/guides-and-tutorials is not an HTTPS link - -* At _site/es/lecciones/introduccion-datos-abiertos-enlazados.html:1083: - - http://linkeddatacatalog.dws.informatik.uni-mannheim.de/state/ is not an HTTPS link - -* At _site/es/lecciones/introduccion-datos-abiertos-enlazados.html:1089: - - http://datos.bcn.cl/es/informacion/que-es is not an HTTPS link - -* At _site/es/lecciones/introduccion-datos-abiertos-enlazados.html:1098: - - http://www.history.ac.uk/projects/digital/tobias is not an HTTPS link - -* At _site/es/lecciones/introduccion-datos-abiertos-enlazados.html:1098: - - http://www.ahrc.ac.uk/ is not an HTTPS link - -* At _site/es/lecciones/introduccion-e-instalacion.html:121: - - 'a' tag is missing a reference - -* At _site/es/lecciones/introduccion-e-instalacion.html:140: - - 'a' tag is missing a reference - -* At _site/es/lecciones/introduccion-e-instalacion.html:177: - - 'a' tag is missing a reference - -* At _site/es/lecciones/introduccion-e-instalacion.html:214: - - 'a' tag is missing a reference - -* At _site/es/lecciones/introduccion-e-instalacion.html:557: - - http://komodoide.com/komodo-edit/ is not an HTTPS link - -* At _site/es/lecciones/introduccion-map-warper.html:122: - - 'a' tag is missing a reference - -* At _site/es/lecciones/introduccion-map-warper.html:141: - - 'a' tag is missing a reference - -* At _site/es/lecciones/introduccion-map-warper.html:178: - - 'a' tag is missing a reference - -* At _site/es/lecciones/introduccion-map-warper.html:215: - - 'a' tag is missing a reference - -* At _site/es/lecciones/introduccion-map-warper.html:521: - - http://bibliotecanacional.gov.co/es-co/colecciones/biblioteca-digital/mapoteca is not an HTTPS link - -* At _site/es/lecciones/introduccion-map-warper.html:521: - - http://cartografia.bogotaendocumentos.com/ is not an HTTPS link - -* At _site/es/lecciones/introduccion-map-warper.html:586: - - http://catalogoenlinea.bibliotecanacional.gov.co/custom/web/content/mapoteca/fmapoteca_984_figac_16/fmapoteca_984_figac_16.html is not an HTTPS link - -* At _site/es/lecciones/lectura-escalable-de-datos-estructurados.html:127: - - 'a' tag is missing a reference - -* At _site/es/lecciones/lectura-escalable-de-datos-estructurados.html:146: - - 'a' tag is missing a reference - -* At _site/es/lecciones/lectura-escalable-de-datos-estructurados.html:183: - - 'a' tag is missing a reference - -* At _site/es/lecciones/lectura-escalable-de-datos-estructurados.html:220: - - 'a' tag is missing a reference - -* At _site/es/lecciones/limpieza-de-datos-con-OpenRefine.html:123: - - 'a' tag is missing a reference - -* At _site/es/lecciones/limpieza-de-datos-con-OpenRefine.html:142: - - 'a' tag is missing a reference - -* At _site/es/lecciones/limpieza-de-datos-con-OpenRefine.html:179: - - 'a' tag is missing a reference - -* At _site/es/lecciones/limpieza-de-datos-con-OpenRefine.html:216: - - 'a' tag is missing a reference - -* At _site/es/lecciones/limpieza-de-datos-con-OpenRefine.html:570: - - http://openrefine.org/ is not an HTTPS link - -* At _site/es/lecciones/limpieza-de-datos-con-OpenRefine.html:591: - - http://vis.stanford.edu/papers/wrangler/ is not an HTTPS link - -* At _site/es/lecciones/limpieza-de-datos-con-OpenRefine.html:591: - - http://openrefine.org/ is not an HTTPS link - -* At _site/es/lecciones/limpieza-de-datos-con-OpenRefine.html:593: - - http://en.wikipedia.org/wiki/Named-entity_recognition is not an HTTPS link - -* At _site/es/lecciones/limpieza-de-datos-con-OpenRefine.html:593: - - http://www.loc.gov/index.html is not an HTTPS link - -* At _site/es/lecciones/limpieza-de-datos-con-OpenRefine.html:593: - - http://www.oclc.org/home.en.html is not an HTTPS link - -* At _site/es/lecciones/manipular-cadenas-de-caracteres-en-python.html:121: - - 'a' tag is missing a reference - -* At _site/es/lecciones/manipular-cadenas-de-caracteres-en-python.html:140: - - 'a' tag is missing a reference - -* At _site/es/lecciones/manipular-cadenas-de-caracteres-en-python.html:177: - - 'a' tag is missing a reference - -* At _site/es/lecciones/manipular-cadenas-de-caracteres-en-python.html:214: - - 'a' tag is missing a reference - -* At _site/es/lecciones/mineria-de-datos-en-internet-archive.html:119: - - 'a' tag is missing a reference - -* At _site/es/lecciones/mineria-de-datos-en-internet-archive.html:138: - - 'a' tag is missing a reference - -* At _site/es/lecciones/mineria-de-datos-en-internet-archive.html:175: - - 'a' tag is missing a reference - -* At _site/es/lecciones/mineria-de-datos-en-internet-archive.html:212: - - 'a' tag is missing a reference - -* At _site/es/lecciones/mineria-de-datos-en-internet-archive.html:531: - - http://archive.org/ is not an HTTPS link - -* At _site/es/lecciones/mineria-de-datos-en-internet-archive.html:532: - - http://activehistory.ca/2013/09/the-internet-archive-rocks-or-two-million-plus-free-sources-to-explore/ is not an HTTPS link - -* At _site/es/lecciones/mineria-de-datos-en-internet-archive.html:536: - - http://archive.org/details/bplscas is not an HTTPS link - -* At _site/es/lecciones/mineria-de-datos-en-internet-archive.html:567: - - http://archive.org/ is not an HTTPS link - -* At _site/es/lecciones/mineria-de-datos-en-internet-archive.html:569: - - http://archive.org/details/lettertowilliaml00doug is not an HTTPS link - -* At _site/es/lecciones/mineria-de-datos-en-internet-archive.html:570: - - http://archive.org/stream/lettertowilliaml00doug/39999066767938#page/n0/mode/2up is not an HTTPS link - -* At _site/es/lecciones/mineria-de-datos-en-internet-archive.html:570: - - http://archive.org/download/lettertowilliaml00doug is not an HTTPS link - -* At _site/es/lecciones/mineria-de-datos-en-internet-archive.html:570: - - http://archive.org/download/lettertowilliaml00doug/lettertowilliaml00doug_dc.xml is not an HTTPS link - -* At _site/es/lecciones/mineria-de-datos-en-internet-archive.html:570: - - http://archive.org/download/lettertowilliaml00doug/lettertowilliaml00doug_marc.xml is not an HTTPS link - -* At _site/es/lecciones/mineria-de-datos-en-internet-archive.html:570: - - http://www.loc.gov/marc/bibliographic/ is not an HTTPS link - -* At _site/es/lecciones/mineria-de-datos-en-internet-archive.html:572: - - http://archive.org/search.php?query=collection%3Abplscas&sort=-publicdate is not an HTTPS link - -* At _site/es/lecciones/mineria-de-datos-en-internet-archive.html:591: - - http://blog.archive.org/2011/03/31/how-archive-org-items-are-structured/ is not an HTTPS link - -* At _site/es/lecciones/mineria-de-datos-en-internet-archive.html:595: - - http://internetarchive.readthedocs.io/en/latest/quickstart.html#searching is not an HTTPS link - -* At _site/es/lecciones/mineria-de-datos-en-internet-archive.html:609: - - http://archive.org/search.php?query=collection%3Abplscas is not an HTTPS link - -* At _site/es/lecciones/mineria-de-datos-en-internet-archive.html:613: - - http://internetarchive.readthedocs.io/en/latest/quickstart.html#downloading is not an HTTPS link - -* At _site/es/lecciones/mineria-de-datos-en-internet-archive.html:698: - - http://internetarchive.readthedocs.io/en/latest/quickstart.html#downloading is not an HTTPS link - -* At _site/es/lecciones/mineria-de-datos-en-internet-archive.html:741: - - http://docs.python.org/2/tutorial/errors.html#handling-exceptions is not an HTTPS link - -* At _site/es/lecciones/mineria-de-datos-en-internet-archive.html:785: - - http://archive.org/download/lettertowilliaml00doug/lettertowilliaml00doug_marc.xml is not an HTTPS link - -* At _site/es/lecciones/mineria-de-datos-en-internet-archive.html:787: - - http://www.loc.gov/marc/bibliographic/bd260.html is not an HTTPS link - -* At _site/es/lecciones/mineria-de-datos-en-internet-archive.html:787: - - http://www.loc.gov/marc/ is not an HTTPS link - -* At _site/es/lecciones/normalizar-datos.html:121: - - 'a' tag is missing a reference - -* At _site/es/lecciones/normalizar-datos.html:140: - - 'a' tag is missing a reference - -* At _site/es/lecciones/normalizar-datos.html:177: - - 'a' tag is missing a reference - -* At _site/es/lecciones/normalizar-datos.html:214: - - 'a' tag is missing a reference - -* At _site/es/lecciones/normalizar-datos.html:684: - - http://unicode.org/ is not an HTTPS link - -* At _site/es/lecciones/palabras-clave-en-contexto-n-grams.html:121: - - 'a' tag is missing a reference - -* At _site/es/lecciones/palabras-clave-en-contexto-n-grams.html:140: - - 'a' tag is missing a reference - -* At _site/es/lecciones/palabras-clave-en-contexto-n-grams.html:177: - - 'a' tag is missing a reference - -* At _site/es/lecciones/palabras-clave-en-contexto-n-grams.html:214: - - 'a' tag is missing a reference - -* At _site/es/lecciones/poniendo-omeka-a-funcionar.html:119: - - 'a' tag is missing a reference - -* At _site/es/lecciones/poniendo-omeka-a-funcionar.html:138: - - 'a' tag is missing a reference - -* At _site/es/lecciones/poniendo-omeka-a-funcionar.html:175: - - 'a' tag is missing a reference - -* At _site/es/lecciones/poniendo-omeka-a-funcionar.html:212: - - 'a' tag is missing a reference - -* At _site/es/lecciones/poniendo-omeka-a-funcionar.html:548: - - http://www.omeka.net is not an HTTPS link - -* At _site/es/lecciones/poniendo-omeka-a-funcionar.html:572: - - http://www.omeka.net is not an HTTPS link - -* At _site/es/lecciones/poniendo-omeka-a-funcionar.html:949: - - http://info.omeka.net is not an HTTPS link - -* At _site/es/lecciones/preservar-datos-de-investigacion.html:119: - - 'a' tag is missing a reference - -* At _site/es/lecciones/preservar-datos-de-investigacion.html:138: - - 'a' tag is missing a reference - -* At _site/es/lecciones/preservar-datos-de-investigacion.html:175: - - 'a' tag is missing a reference - -* At _site/es/lecciones/preservar-datos-de-investigacion.html:212: - - 'a' tag is missing a reference - -* At _site/es/lecciones/preservar-datos-de-investigacion.html:601: - - http://notepad-plus-plus.org/ is not an HTTPS link - -* At _site/es/lecciones/preservar-datos-de-investigacion.html:601: - - http://komodoide.com/komodo-edit/ is not an HTTPS link - -* At _site/es/lecciones/preservar-datos-de-investigacion.html:630: - - http://cradledincaricature.com/2014/02/06/comic-art-beyond-the-print-shop/ is not an HTTPS link - -* At _site/es/lecciones/preservar-datos-de-investigacion.html:637: - - http://www.theguardian.com/uk-news/2014/feb/20/rebekah-brooks-rupert-murdoch-phone-hacking-trial is not an HTTPS link - -* At _site/es/lecciones/preservar-datos-de-investigacion.html:644: - - http://www.cartoons.ac.uk/record/SBD0931 is not an HTTPS link - -* At _site/es/lecciones/preservar-datos-de-investigacion.html:651: - - http://www.oldbaileyonline.org/browse.jsp?ref=OA16780417 is not an HTTPS link - -* At _site/es/lecciones/preservar-datos-de-investigacion.html:758: - - http://historyonics.blogspot.co.uk/2014/01/judging-book-by-its-url.html is not an HTTPS link - -* At _site/es/lecciones/preservar-datos-de-investigacion.html:762: - - http://earlymodernnotes.wordpress.com/2013/05/18/unclean-unclean-what-historians-can-do-about-sharing-our-messy-research-data/ is not an HTTPS link - -* At _site/es/lecciones/preservar-datos-de-investigacion.html:775: - - http://britishlibrary.typepad.co.uk/collectioncare/2013/09/the-twelve-principles-of-digital-preservation.html is not an HTTPS link - -* At _site/es/lecciones/preservar-datos-de-investigacion.html:784: - - http://data-archive.ac.uk/create-manage/document is not an HTTPS link - -* At _site/es/lecciones/procesamiento-basico-de-textos-en-r.html:121: - - 'a' tag is missing a reference - -* At _site/es/lecciones/procesamiento-basico-de-textos-en-r.html:140: - - 'a' tag is missing a reference - -* At _site/es/lecciones/procesamiento-basico-de-textos-en-r.html:177: - - 'a' tag is missing a reference - -* At _site/es/lecciones/procesamiento-basico-de-textos-en-r.html:214: - - 'a' tag is missing a reference - -* At _site/es/lecciones/publicar-archivos-tei-ceteicean.html:119: - - 'a' tag is missing a reference - -* At _site/es/lecciones/publicar-archivos-tei-ceteicean.html:138: - - 'a' tag is missing a reference - -* At _site/es/lecciones/publicar-archivos-tei-ceteicean.html:175: - - 'a' tag is missing a reference - -* At _site/es/lecciones/publicar-archivos-tei-ceteicean.html:212: - - 'a' tag is missing a reference - -* At _site/es/lecciones/publicar-archivos-tei-ceteicean.html:479: - - http://www.jedit.org/ is not an HTTPS link - -* At _site/es/lecciones/publicar-archivos-tei-ceteicean.html:503: - - http://hdlab.space/La-Argentina-Manuscrita is not an HTTPS link - -* At _site/es/lecciones/publicar-archivos-tei-ceteicean.html:505: - - http://hdlab.space/La-Argentina-Manuscrita/assets/Ruy_Diaz-La_argentina_manuscrita.tei.xml is not an HTTPS link - -* At _site/es/lecciones/publicar-archivos-tei-ceteicean.html:774: - - http://teic.github.io/CETEIcean/ is not an HTTPS link - -* At _site/es/lecciones/retirada/introduccion-control-versiones-github-desktop.html:89: - - 'a' tag is missing a reference - -* At _site/es/lecciones/retirada/introduccion-control-versiones-github-desktop.html:108: - - 'a' tag is missing a reference - -* At _site/es/lecciones/retirada/introduccion-control-versiones-github-desktop.html:145: - - 'a' tag is missing a reference - -* At _site/es/lecciones/retirada/introduccion-control-versiones-github-desktop.html:182: - - 'a' tag is missing a reference - -* At _site/es/lecciones/retirada/introduccion-control-versiones-github-desktop.html:649: - - http://flight-manual.atom.io/ is not an HTTPS link - -* At _site/es/lecciones/retirada/introduccion-control-versiones-github-desktop.html:749: - - http://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html is not an HTTPS link - -* At _site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html:90: - - 'a' tag is missing a reference - -* At _site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html:109: - - 'a' tag is missing a reference - -* At _site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html:146: - - 'a' tag is missing a reference - -* At _site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html:183: - - 'a' tag is missing a reference - -* At _site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html:545: - - http://collection.britishmuseum.org/ is not an HTTPS link - -* At _site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html:545: - - http://labs.europeana.eu/api/linked-open-data-introduction is not an HTTPS link - -* At _site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html:545: - - http://americanart.si.edu/ is not an HTTPS link - -* At _site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html:545: - - http://britishart.yale.edu/collections/using-collections/technology/linked-open-data is not an HTTPS link - -* At _site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html:545: - - http://vocab.getty.edu/ is not an HTTPS link - -* At _site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html:597: - - http://hdlab.stanford.edu/palladio/ is not an HTTPS link - -* At _site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html:754: - - http://collection.britishmuseum.org/sparql is not an HTTPS link - -* At _site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html:772: - - http://collection.britishmuseum.org/resource?uri=http://collection.britishmuseum.org/id/object/PPA82633 is not an HTTPS link - -* At _site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html:782: - - http://collection.britishmuseum.org/sparql?query=SELECT+*%0D%0AWHERE+%7B%0D%0A++%3Chttp%3A%2F%2Fcollection.britishmuseum.org%2Fid%2Fobject%2FPPA82633%3E+%3Fp+%3Fo+.%0D%0A++%7D&_implicit=false&_equivalent=false&_form=%2Fsparql is not an HTTPS link - -* At _site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html:1013: - - http://sparql.europeana.eu/ is not an HTTPS link - -* At _site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html:1013: - - http://wiki.dbpedia.org/ is not an HTTPS link - -* At _site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html:1013: - - http://sws.geonames.org/ is not an HTTPS link - -* At _site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html:1013: - - http://sparql.europeana.eu is not an HTTPS link - -* At _site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html:1064: - - http://www.getty.edu/research/ is not an HTTPS link - -* At _site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html:1074: - - http://openrefine.org/ is not an HTTPS link - -* At _site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html:1081: - - http://hdlab.stanford.edu/palladio/ is not an HTTPS link - -* At _site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html:1133: - - http://labs.europeana.eu/api/linked-open-data-SPARQL-endpoint is not an HTTPS link - -* At _site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html:1134: - - http://vocab.getty.edu/queries#Finding_Subjects is not an HTTPS link - -* At _site/es/lecciones/reutilizacion-de-codigo-y-modularidad.html:121: - - 'a' tag is missing a reference - -* At _site/es/lecciones/reutilizacion-de-codigo-y-modularidad.html:140: - - 'a' tag is missing a reference - -* At _site/es/lecciones/reutilizacion-de-codigo-y-modularidad.html:177: - - 'a' tag is missing a reference - -* At _site/es/lecciones/reutilizacion-de-codigo-y-modularidad.html:214: - - 'a' tag is missing a reference - -* At _site/es/lecciones/reutilizando-colecciones-digitales-glam-labs.html:123: - - 'a' tag is missing a reference - -* At _site/es/lecciones/reutilizando-colecciones-digitales-glam-labs.html:142: - - 'a' tag is missing a reference - -* At _site/es/lecciones/reutilizando-colecciones-digitales-glam-labs.html:179: - - 'a' tag is missing a reference - -* At _site/es/lecciones/reutilizando-colecciones-digitales-glam-labs.html:216: - - 'a' tag is missing a reference - -* At _site/es/lecciones/reutilizando-colecciones-digitales-glam-labs.html:497: - - http://labs.bl.uk is not an HTTPS link - -* At _site/es/lecciones/reutilizando-colecciones-digitales-glam-labs.html:515: - - http://rua.ua.es/dspace/handle/10045/110281 is not an HTTPS link - -* At _site/es/lecciones/reutilizando-colecciones-digitales-glam-labs.html:523: - - http://data.cervantesvirtual.com/blog/notebooks/ is not an HTTPS link - -* At _site/es/lecciones/reutilizando-colecciones-digitales-glam-labs.html:542: - - http://api.bnf.fr/ is not an HTTPS link - -* At _site/es/lecciones/reutilizando-colecciones-digitales-glam-labs.html:557: - - http://data.cervantesvirtual.com/blog/labs is not an HTTPS link - -* At _site/es/lecciones/reutilizando-colecciones-digitales-glam-labs.html:607: - - http://www.cervantesvirtual.com/ is not an HTTPS link - -* At _site/es/lecciones/reutilizando-colecciones-digitales-glam-labs.html:789: - - http://bibliontology.com/ is not an HTTPS link - -* At _site/es/lecciones/reutilizando-colecciones-digitales-glam-labs.html:1105: - - http://www.rdaregistry.info is not an HTTPS link - -* At _site/es/lecciones/salida-de-datos-como-archivo-html.html:121: - - 'a' tag is missing a reference - -* At _site/es/lecciones/salida-de-datos-como-archivo-html.html:140: - - 'a' tag is missing a reference - -* At _site/es/lecciones/salida-de-datos-como-archivo-html.html:177: - - 'a' tag is missing a reference - -* At _site/es/lecciones/salida-de-datos-como-archivo-html.html:214: - - 'a' tag is missing a reference - -* At _site/es/lecciones/salida-palabras-clave-contexto-ngrams.html:121: - - 'a' tag is missing a reference - -* At _site/es/lecciones/salida-palabras-clave-contexto-ngrams.html:140: - - 'a' tag is missing a reference - -* At _site/es/lecciones/salida-palabras-clave-contexto-ngrams.html:177: - - 'a' tag is missing a reference - -* At _site/es/lecciones/salida-palabras-clave-contexto-ngrams.html:214: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:119: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:138: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:175: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:212: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:556: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:557: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:558: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:559: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:562: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:563: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:564: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:565: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:566: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:567: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:570: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:571: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:572: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:573: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:574: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:575: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:576: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:577: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:580: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:581: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:582: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:585: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:586: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:587: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:590: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:591: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:592: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:593: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:596: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:597: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:598: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:599: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:602: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:603: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:604: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:605: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:606: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:611: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:615: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:659: - - http://jekyllrb.com/docs/home/ is not an HTTPS link - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:661: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:667: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:712: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:726: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:732: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:750: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:768: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:770: - - http://www.barebones.com/products/bbedit is not an HTTPS link - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:774: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:846: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:850: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:860: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:862: - - http://brew.sh/ is not an HTTPS link - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:922: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:924: - - http://brew.sh/ is not an HTTPS link - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:947: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:969: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:977: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:987: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:1010: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:1081: - - http://amandavisconti.github.io/JekyllDemo/ is not an HTTPS link - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:1111: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:1167: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:1184: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:1188: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:1310: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:1343: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:1349: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:1353: - - http://kramdown.gettalong.org/quickref.html is not an HTTPS link - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:1355: - - http://kramdown.gettalong.org/quickref.html is not an HTTPS link - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:1357: - - http://www.typora.io/ is not an HTTPS link - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:1359: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:1393: - - http://amandavisconti.github.io/JekyllDemo/resume/ is not an HTTPS link - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:1395: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:1471: - - http://raw.githubusercontent.com/amandavisconti/JekyllDemo/gh-pages/_posts/2016-02-29-a-post-about-my-research.markdown is not an HTTPS link - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:1473: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:1677: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:1681: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:1690: - - http://jekyllthemes.org/ is not an HTTPS link - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:1691: - - http://jekyllthemes.io/ is not an HTTPS link - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:1694: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:1698: - - http://jekyllrb.com/docs/plugins/ is not an HTTPS link - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:1704: - - http://jekyllrb.com/docs/plugins/ is not an HTTPS link - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:1704: - - http://jekyllrb.com/docs/plugins/ is not an HTTPS link - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:1707: - - http://literaturegeek.com/ is not an HTTPS link - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:1719: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:1739: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:1741: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:1743: - - http://stackexchange.com/ is not an HTTPS link - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:1745: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:1749: - - 'a' tag is missing a reference - -* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:1755: - - http://jekyll-windows.juthilo.com/ is not an HTTPS link - -* At _site/es/lecciones/topic-modeling-y-mallet.html:123: - - 'a' tag is missing a reference - -* At _site/es/lecciones/topic-modeling-y-mallet.html:142: - - 'a' tag is missing a reference - -* At _site/es/lecciones/topic-modeling-y-mallet.html:179: - - 'a' tag is missing a reference - -* At _site/es/lecciones/topic-modeling-y-mallet.html:216: - - 'a' tag is missing a reference - -* At _site/es/lecciones/topic-modeling-y-mallet.html:565: - - http://mallet.cs.umass.edu/mailinglist.php is not an HTTPS link - -* At _site/es/lecciones/topic-modeling-y-mallet.html:571: - - http://www.cs.umbc.edu/~hillol/NGDM07/abstracts/talks/MKirschenbaum.pdf is not an HTTPS link - -* At _site/es/lecciones/topic-modeling-y-mallet.html:571: - - http://www.worldcat.org/title/reading-machines-toward-an-algorithmic-criticism/oclc/708761605&referer=brief_results is not an HTTPS link - -* At _site/es/lecciones/topic-modeling-y-mallet.html:573: - - http://voyant-tools.org/ is not an HTTPS link - -* At _site/es/lecciones/topic-modeling-y-mallet.html:591: - - http://arxiv.org/abs/1003.6087/ is not an HTTPS link - -* At _site/es/lecciones/topic-modeling-y-mallet.html:598: - - http://dsl.richmond.edu/dispatch/ is not an HTTPS link - -* At _site/es/lecciones/topic-modeling-y-mallet.html:605: - - http://mallet.cs.umass.edu/index.php is not an HTTPS link - -* At _site/es/lecciones/topic-modeling-y-mallet.html:628: - - http://mallet.cs.umass.edu/index.php is not an HTTPS link - -* At _site/es/lecciones/topic-modeling-y-mallet.html:628: - - http://mallet.cs.umass.edu/download.php is not an HTTPS link - -* At _site/es/lecciones/topic-modeling-y-mallet.html:629: - - http://www.oracle.com/technetwork/java/javase/downloads/index.html is not an HTTPS link - -* At _site/es/lecciones/topic-modeling-y-mallet.html:768: - - http://mallet.cs.umass.edu/download.php is not an HTTPS link - -* At _site/es/lecciones/topic-modeling-y-mallet.html:769: - - http://www.oracle.com/technetwork/java/javase/downloads/index.html is not an HTTPS link - -* At _site/es/lecciones/topic-modeling-y-mallet.html:990: - - http://dsl.richmond.edu/dispatch/ is not an HTTPS link - -* At _site/es/lecciones/topic-modeling-y-mallet.html:997: - - http://electricarchaeology.ca/2012/07/09/mining-a-day-of-archaeology/ is not an HTTPS link - -* At _site/es/lecciones/topic-modeling-y-mallet.html:1001: - - http://electricarchaeology.ca/2012/06/08/mining-the-open-web-with-looted-heritage-draft/ is not an HTTPS link - -* At _site/es/lecciones/topic-modeling-y-mallet.html:1008: - - http://tedunderwood.wordpress.com/2012/04/07/topic-modeling-made-just-simple-enough/ is not an HTTPS link - -* At _site/es/lecciones/topic-modeling-y-mallet.html:1009: - - http://web.archive.org/web/20160704150726/http://www.lisarhody.com:80/some-assembly-required/ is not an HTTPS link - -* At _site/es/lecciones/topic-modeling-y-mallet.html:1011: - - http://dl.acm.org/citation.cfm?id=944937 is not an HTTPS link - -* At _site/es/lecciones/topic-modeling-y-mallet.html:1012: - - http://mimno.infosci.cornell.edu/topics.html is not an HTTPS link - -* At _site/es/lecciones/topic-modeling-y-mallet.html:1012: - - http://www.perseus.tufts.edu/publications/02-jocch-mimno.pdf is not an HTTPS link - -* At _site/es/lecciones/topic-modeling-y-mallet.html:1055: - - http://www.morethanbooks.eu/topic-modeling-introduccion/ is not an HTTPS link - -* At _site/es/lecciones/trabajar-con-archivos-de-texto.html:121: - - 'a' tag is missing a reference - -* At _site/es/lecciones/trabajar-con-archivos-de-texto.html:140: - - 'a' tag is missing a reference - -* At _site/es/lecciones/trabajar-con-archivos-de-texto.html:177: - - 'a' tag is missing a reference - -* At _site/es/lecciones/trabajar-con-archivos-de-texto.html:214: - - 'a' tag is missing a reference - -* At _site/es/lecciones/trabajar-con-archivos-de-texto.html:672: - - http://en.wikibooks.org/wiki/Non-Programmer%27s_Tutorial_for_Python_2.6/Hello,_World is not an HTTPS link - -* At _site/es/lecciones/trabajar-con-paginas-web.html:121: - - 'a' tag is missing a reference - -* At _site/es/lecciones/trabajar-con-paginas-web.html:140: - - 'a' tag is missing a reference - -* At _site/es/lecciones/trabajar-con-paginas-web.html:177: - - 'a' tag is missing a reference - -* At _site/es/lecciones/trabajar-con-paginas-web.html:214: - - 'a' tag is missing a reference - -* At _site/es/lecciones/trabajar-con-paginas-web.html:582: - - http://oldbaileyonline.org is not an HTTPS link - -* At _site/es/lecciones/trabajar-con-paginas-web.html:623: - - http://en.wikipedia.org/wiki/Gordon_Riots is not an HTTPS link - -* At _site/es/lecciones/trabajar-con-paginas-web.html:646: - - http://www.oldbaileyonline.org/browse.jsp?foo=bar&path=sessionsPapers/17800628.xml&div=t17800628-33&xml=yes is not an HTTPS link - -* At _site/es/lecciones/trabajar-con-paginas-web.html:646: - - http://www.oldbaileyonline.org/images.jsp?doc=178006280084 is not an HTTPS link - -* At _site/es/lecciones/transformacion-datos-xml-xsl.html:119: - - 'a' tag is missing a reference - -* At _site/es/lecciones/transformacion-datos-xml-xsl.html:138: - - 'a' tag is missing a reference - -* At _site/es/lecciones/transformacion-datos-xml-xsl.html:175: - - 'a' tag is missing a reference - -* At _site/es/lecciones/transformacion-datos-xml-xsl.html:212: - - 'a' tag is missing a reference - -* At _site/es/lecciones/transformacion-datos-xml-xsl.html:777: - - http://scissors-and-paste.net is not an HTTPS link - -* At _site/es/lecciones/transformacion-datos-xml-xsl.html:875: - - http://www.w3.org/ is not an HTTPS link - -* At _site/es/lecciones/uso-las-colecciones-hathitrust-mineria-textual-R.html:117: - - 'a' tag is missing a reference - -* At _site/es/lecciones/uso-las-colecciones-hathitrust-mineria-textual-R.html:136: - - 'a' tag is missing a reference - -* At _site/es/lecciones/uso-las-colecciones-hathitrust-mineria-textual-R.html:173: - - 'a' tag is missing a reference - -* At _site/es/lecciones/uso-las-colecciones-hathitrust-mineria-textual-R.html:210: - - 'a' tag is missing a reference - -* At _site/es/lecciones/uso-las-colecciones-hathitrust-mineria-textual-R.html:545: - - http://hdl.handle.net/2027/uc1.31175010656638 is not an HTTPS link - -* At _site/es/lecciones/ver-archivos-html.html:121: - - 'a' tag is missing a reference - -* At _site/es/lecciones/ver-archivos-html.html:140: - - 'a' tag is missing a reference - -* At _site/es/lecciones/ver-archivos-html.html:177: - - 'a' tag is missing a reference - -* At _site/es/lecciones/ver-archivos-html.html:214: - - 'a' tag is missing a reference - -* At _site/es/lecciones/ver-archivos-html.html:591: - - http://www.w3schools.com/html/default.asp is not an HTTPS link - -* At _site/es/lecciones/ver-archivos-html.html:643: - - http://www.w3schools.com/html/default.asp is not an HTTPS link - -* At _site/es/lecciones/ver-archivos-html.html:644: - - http://www.w3schools.com/html/html5_intro.asp is not an HTTPS link - -* At _site/es/lecciones/visualizacion-y-animacion-de-tablas-historicas-con-R.html:120: - - 'a' tag is missing a reference - -* At _site/es/lecciones/visualizacion-y-animacion-de-tablas-historicas-con-R.html:139: - - 'a' tag is missing a reference - -* At _site/es/lecciones/visualizacion-y-animacion-de-tablas-historicas-con-R.html:176: - - 'a' tag is missing a reference - -* At _site/es/lecciones/visualizacion-y-animacion-de-tablas-historicas-con-R.html:213: - - 'a' tag is missing a reference - -* At _site/es/pia.html:88: - - 'a' tag is missing a reference - -* At _site/es/pia.html:107: - - 'a' tag is missing a reference - -* At _site/es/pia.html:144: - - 'a' tag is missing a reference - -* At _site/es/pia.html:181: - - 'a' tag is missing a reference - -* At _site/es/politica-de-privacidad.html:88: - - 'a' tag is missing a reference - -* At _site/es/politica-de-privacidad.html:107: - - 'a' tag is missing a reference - -* At _site/es/politica-de-privacidad.html:144: - - 'a' tag is missing a reference - -* At _site/es/politica-de-privacidad.html:181: - - 'a' tag is missing a reference - -* At _site/es/politica-retirada-lecciones.html:88: - - 'a' tag is missing a reference - -* At _site/es/politica-retirada-lecciones.html:107: - - 'a' tag is missing a reference - -* At _site/es/politica-retirada-lecciones.html:144: - - 'a' tag is missing a reference - -* At _site/es/politica-retirada-lecciones.html:181: - - 'a' tag is missing a reference - -* At _site/es/retroalimentacion.html:88: - - 'a' tag is missing a reference - -* At _site/es/retroalimentacion.html:107: - - 'a' tag is missing a reference - -* At _site/es/retroalimentacion.html:144: - - 'a' tag is missing a reference - -* At _site/es/retroalimentacion.html:181: - - 'a' tag is missing a reference - -* At _site/es/vacantes.html:88: - - 'a' tag is missing a reference - -* At _site/es/vacantes.html:107: - - 'a' tag is missing a reference - -* At _site/es/vacantes.html:144: - - 'a' tag is missing a reference - -* At _site/es/vacantes.html:181: - - 'a' tag is missing a reference - -* At _site/fr/apropos.html:88: - - 'a' tag is missing a reference - -* At _site/fr/apropos.html:107: - - 'a' tag is missing a reference - -* At _site/fr/apropos.html:144: - - 'a' tag is missing a reference - -* At _site/fr/apropos.html:192: - - 'a' tag is missing a reference - -* At _site/fr/apropos.html:266: - - http://dhawards.org/dhawards2016/results/ is not an HTTPS link - -* At _site/fr/apropos.html:266: - - http://dhawards.org/dhawards2017/results/ is not an HTTPS link - -* At _site/fr/apropos.html:266: - - http://humanidadesdigitaleshispanicas.es/ is not an HTTPS link - -* At _site/fr/apropos.html:266: - - http://dhawards.org/dhawards2022/results/ is not an HTTPS link - -* At _site/fr/apropos.html:279: - - http://digitalhistoryhacks.blogspot.com/2008/01/programming-historian.html is not an HTTPS link - -* At _site/fr/consignes-auteurs.html:88: - - 'a' tag is missing a reference - -* At _site/fr/consignes-auteurs.html:107: - - 'a' tag is missing a reference - -* At _site/fr/consignes-auteurs.html:144: - - 'a' tag is missing a reference - -* At _site/fr/consignes-auteurs.html:192: - - 'a' tag is missing a reference - -* At _site/fr/consignes-evaluateurs.html:88: - - 'a' tag is missing a reference - -* At _site/fr/consignes-evaluateurs.html:107: - - 'a' tag is missing a reference - -* At _site/fr/consignes-evaluateurs.html:144: - - 'a' tag is missing a reference - -* At _site/fr/consignes-evaluateurs.html:192: - - 'a' tag is missing a reference - -* At _site/fr/consignes-redacteurs.html:88: - - 'a' tag is missing a reference - -* At _site/fr/consignes-redacteurs.html:107: - - 'a' tag is missing a reference - -* At _site/fr/consignes-redacteurs.html:144: - - 'a' tag is missing a reference - -* At _site/fr/consignes-redacteurs.html:192: - - 'a' tag is missing a reference - -* At _site/fr/consignes-redacteurs.html:583: - - http://www.loc.gov/maps/collections is not an HTTPS link - -* At _site/fr/consignes-traducteurs.html:88: - - 'a' tag is missing a reference - -* At _site/fr/consignes-traducteurs.html:107: - - 'a' tag is missing a reference - -* At _site/fr/consignes-traducteurs.html:144: - - 'a' tag is missing a reference - -* At _site/fr/consignes-traducteurs.html:192: - - 'a' tag is missing a reference - -* At _site/fr/contribuer.html:88: - - 'a' tag is missing a reference - -* At _site/fr/contribuer.html:107: - - 'a' tag is missing a reference - -* At _site/fr/contribuer.html:144: - - 'a' tag is missing a reference - -* At _site/fr/contribuer.html:192: - - 'a' tag is missing a reference - -* At _site/fr/contribuer.html:298: - - http://www.worldcat.org/title/programming-historian/oclc/951537099 is not an HTTPS link - -* At _site/fr/contribuer.html:300: - - http://purdue-primo-prod.hosted.exlibrisgroup.com/primo_library/libweb/action/dlDisplay.do?vid=PURDUE&search_scope=everything&docId=PURDUE_ALMA51671812890001081&fn=permalink is not an HTTPS link - -* At _site/fr/dons.html:88: - - 'a' tag is missing a reference - -* At _site/fr/dons.html:107: +* At _site/en/lessons/geocoding-qgis.html:118: 'a' tag is missing a reference -* At _site/fr/dons.html:144: +* At _site/en/lessons/geocoding-qgis.html:137: 'a' tag is missing a reference -* At _site/fr/dons.html:192: +* At _site/en/lessons/geocoding-qgis.html:174: 'a' tag is missing a reference -* At _site/fr/equipe-projet.html:88: +* At _site/en/lessons/geocoding-qgis.html:200: 'a' tag is missing a reference -* At _site/fr/equipe-projet.html:107: +* At _site/en/lessons/geoparsing-text-with-edinburgh.html:117: 'a' tag is missing a reference -* At _site/fr/equipe-projet.html:144: +* At _site/en/lessons/geoparsing-text-with-edinburgh.html:136: 'a' tag is missing a reference -* At _site/fr/equipe-projet.html:192: +* At _site/en/lessons/geoparsing-text-with-edinburgh.html:173: 'a' tag is missing a reference -* At _site/fr/equipe-projet.html:310: - - http://twitter.com/maxcarlons is not an HTTPS link - -* At _site/fr/equipe-projet.html:313: - - http://github.com/carlonim is not an HTTPS link - -* At _site/fr/equipe-projet.html:414: - - http://github.com/lachapot is not an HTTPS link - -* At _site/fr/equipe-projet.html:512: - - http://twitter.com/cosovschi is not an HTTPS link - -* At _site/fr/equipe-projet.html:515: - - http://github.com/digitalkosovski is not an HTTPS link - -* At _site/fr/equipe-projet.html:618: - - http://github.com/caiocmello is not an HTTPS link - -* At _site/fr/equipe-projet.html:1182: - - http://github.com/semanticnoodles is not an HTTPS link - -* At _site/fr/equipe-projet.html:1278: - - http://twitter.com/nabsiddiqui is not an HTTPS link - -* At _site/fr/equipe-projet.html:1281: - - http://github.com/nabsiddiqui is not an HTTPS link - -* At _site/fr/equipe-projet.html:1631: - - http://twitter.com/giulia_taurino is not an HTTPS link - -* At _site/fr/equipe-projet.html:1634: - - http://github.com/giuliataurino is not an HTTPS link - -* At _site/fr/equipe-projet.html:1804: - - http://www.alexwermercolan.com/ is not an HTTPS link - -* At _site/fr/equipe-projet.html:1810: - - http://twitter.com/alexwermercolan is not an HTTPS link - -* At _site/fr/equipe-projet.html:1813: - - http://github.com/hawc2 is not an HTTPS link - -* At _site/fr/equipe-projet.html:2059: - - http://www.mariajoseafanador.com is not an HTTPS link - -* At _site/fr/equipe-projet.html:2065: - - http://twitter.com/mariajoafana is not an HTTPS link - -* At _site/fr/equipe-projet.html:2068: - - http://github.com/mariajoafana is not an HTTPS link - -* At _site/fr/equipe-projet.html:2534: - - http://twitter.com/IsaGribomont is not an HTTPS link - -* At _site/fr/equipe-projet.html:2537: - - http://github.com/isag91 is not an HTTPS link - -* At _site/fr/equipe-projet.html:2745: - - http://twitter.com/espejolento is not an HTTPS link - -* At _site/fr/equipe-projet.html:2748: - - http://github.com/silviaegt is not an HTTPS link - -* At _site/fr/equipe-projet.html:3036: - - http://twitter.com/jenniferisve is not an HTTPS link - -* At _site/fr/equipe-projet.html:3039: - - http://github.com/jenniferisasi is not an HTTPS link - -* At _site/fr/equipe-projet.html:3361: - - http://twitter.com/enetreseles is not an HTTPS link - -* At _site/fr/equipe-projet.html:3364: - - http://github.com/nllano is not an HTTPS link - -* At _site/fr/equipe-projet.html:3568: - - http://twitter.com/jgob is not an HTTPS link - -* At _site/fr/equipe-projet.html:3571: - - http://github.com/joshuagob is not an HTTPS link - -* At _site/fr/equipe-projet.html:3863: - - http://twitter.com/rivaquiroga is not an HTTPS link - -* At _site/fr/equipe-projet.html:3866: - - http://github.com/rivaquiroga is not an HTTPS link - -* At _site/fr/equipe-projet.html:4157: - - http://github.com/nivaca is not an HTTPS link - -* At _site/fr/equipe-projet.html:4370: - - http://github.com/marie-flesch is not an HTTPS link - -* At _site/fr/equipe-projet.html:4513: - - http://github.com/matgille is not an HTTPS link - -* At _site/fr/equipe-projet.html:4746: - - http://github.com/mhersent is not an HTTPS link - -* At _site/fr/equipe-projet.html:4804: - - http://twitter.com/superHH is not an HTTPS link - -* At _site/fr/equipe-projet.html:5056: - - http://github.com/DMathelier is not an HTTPS link - -* At _site/fr/equipe-projet.html:5190: - - http://twitter.com/emilienschultz is not an HTTPS link - -* At _site/fr/equipe-projet.html:5193: - - http://github.com/emilienschultz is not an HTTPS link - -* At _site/fr/equipe-projet.html:5317: - - http://twitter.com/davvalent is not an HTTPS link - -* At _site/fr/equipe-projet.html:5320: - - http://github.com/davvalent is not an HTTPS link - -* At _site/fr/equipe-projet.html:5449: - - http://github.com/AlexandreWa is not an HTTPS link - -* At _site/fr/equipe-projet.html:5584: - - http://github.com/josircg is not an HTTPS link - -* At _site/fr/equipe-projet.html:5842: - - http://twitter.com/danielalvesfcsh is not an HTTPS link - -* At _site/fr/equipe-projet.html:5845: - - http://github.com/DanielAlvesLABDH is not an HTTPS link - -* At _site/fr/equipe-projet.html:6107: - - http://twitter.com/ericbrasiln is not an HTTPS link - -* At _site/fr/equipe-projet.html:6110: - - http://github.com/ericbrasiln is not an HTTPS link - -* At _site/fr/equipe-projet.html:6301: +* At _site/en/lessons/geoparsing-text-with-edinburgh.html:199: - http://github.com/luisferla is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/equipe-projet.html:6543: +* At _site/en/lessons/georeferencing-qgis.html:121: - http://twitter.com/jimmy_medeiros is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/equipe-projet.html:6546: +* At _site/en/lessons/georeferencing-qgis.html:140: - http://github.com/JimmyMedeiros82 is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/equipe-projet.html:6781: +* At _site/en/lessons/georeferencing-qgis.html:177: - http://github.com/joanacvp is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/equipe-projet.html:7027: +* At _site/en/lessons/georeferencing-qgis.html:203: - http://twitter.com/araceletorres is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/equipe-projet.html:7030: +* At _site/en/lessons/geospatial-data-analysis.html:120: - http://github.com/aracele is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/equipe-projet.html:7286: +* At _site/en/lessons/geospatial-data-analysis.html:139: - http://twitter.com/j_w_baker is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/equipe-projet.html:7289: +* At _site/en/lessons/geospatial-data-analysis.html:176: - http://github.com/drjwbaker is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/equipe-projet.html:7721: +* At _site/en/lessons/geospatial-data-analysis.html:202: - http://adamcrymble.org is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/equipe-projet.html:7727: +* At _site/en/lessons/getting-started-with-markdown.html:117: - http://twitter.com/Adam_Crymble is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/equipe-projet.html:7730: +* At _site/en/lessons/getting-started-with-markdown.html:136: - http://github.com/acrymble is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/equipe-projet.html:8198: +* At _site/en/lessons/getting-started-with-markdown.html:173: - http://github.com/adamfarquhar is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/equipe-projet.html:8258: +* At _site/en/lessons/getting-started-with-markdown.html:199: - http://twitter.com/jenniferisve is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/equipe-projet.html:8261: +* At _site/en/lessons/getting-started-with-markdown.html:727: - http://github.com/jenniferisasi is not an HTTPS link + http://programminghistorian.org/ is not an HTTPS link -* At _site/fr/equipe-projet.html:8589: +* At _site/en/lessons/getting-started-with-mysql-using-r.html:117: - http://twitter.com/rivaquiroga is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/equipe-projet.html:8592: +* At _site/en/lessons/getting-started-with-mysql-using-r.html:136: - http://github.com/rivaquiroga is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/equipe-projet.html:8878: +* At _site/en/lessons/getting-started-with-mysql-using-r.html:173: - http://twitter.com/amsichani is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/equipe-projet.html:8881: +* At _site/en/lessons/getting-started-with-mysql-using-r.html:199: - http://github.com/amsichani is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/equipe-projet.html:9221: +* At _site/en/lessons/googlemaps-googleearth.html:121: - http://twitter.com/AnisaHawes is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/equipe-projet.html:9224: +* At _site/en/lessons/googlemaps-googleearth.html:140: - http://github.com/anisa-hawes is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/equipe-projet.html:9433: +* At _site/en/lessons/googlemaps-googleearth.html:177: - http://github.com/charlottejmc is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/evenements.html:88: +* At _site/en/lessons/googlemaps-googleearth.html:203: 'a' tag is missing a reference -* At _site/fr/evenements.html:107: +* At _site/en/lessons/gravity-model.html:133: 'a' tag is missing a reference -* At _site/fr/evenements.html:144: +* At _site/en/lessons/gravity-model.html:152: 'a' tag is missing a reference -* At _site/fr/evenements.html:192: +* At _site/en/lessons/gravity-model.html:189: 'a' tag is missing a reference -* At _site/fr/index.html:87: +* At _site/en/lessons/gravity-model.html:215: 'a' tag is missing a reference -* At _site/fr/index.html:106: +* At _site/en/lessons/image-classification-neural-networks.html:117: 'a' tag is missing a reference -* At _site/fr/index.html:143: +* At _site/en/lessons/image-classification-neural-networks.html:136: 'a' tag is missing a reference -* At _site/fr/index.html:191: +* At _site/en/lessons/image-classification-neural-networks.html:173: 'a' tag is missing a reference -* At _site/fr/jisc-tna-partenariat.html:88: +* At _site/en/lessons/image-classification-neural-networks.html:199: 'a' tag is missing a reference -* At _site/fr/jisc-tna-partenariat.html:107: +* At _site/en/lessons/index.html:86: 'a' tag is missing a reference -* At _site/fr/jisc-tna-partenariat.html:144: +* At _site/en/lessons/index.html:105: 'a' tag is missing a reference -* At _site/fr/jisc-tna-partenariat.html:192: +* At _site/en/lessons/index.html:142: 'a' tag is missing a reference -* At _site/fr/lecons/analyse-corpus-antconc.html:120: +* At _site/en/lessons/index.html:168: 'a' tag is missing a reference -* At _site/fr/lecons/analyse-corpus-antconc.html:139: +* At _site/en/lessons/installing-omeka.html:117: 'a' tag is missing a reference -* At _site/fr/lecons/analyse-corpus-antconc.html:176: +* At _site/en/lessons/installing-omeka.html:136: 'a' tag is missing a reference -* At _site/fr/lecons/analyse-corpus-antconc.html:224: +* At _site/en/lessons/installing-omeka.html:173: 'a' tag is missing a reference -* At _site/fr/lecons/analyse-corpus-antconc.html:578: +* At _site/en/lessons/installing-omeka.html:199: - http://voyant-tools.org/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/analyse-corpus-antconc.html:582: +* At _site/en/lessons/installing-python-modules-pip.html:117: - http://voyant-tools.org/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/analyse-corpus-antconc.html:582: +* At _site/en/lessons/installing-python-modules-pip.html:136: - http://www.laurenceanthony.net/software/antconc/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/analyse-corpus-antconc.html:582: +* At _site/en/lessons/installing-python-modules-pip.html:173: - http://www.laurenceanthony.net/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/analyse-corpus-antconc.html:582: +* At _site/en/lessons/installing-python-modules-pip.html:199: - http://hfroehli.ch/2014/05/11/intro-bibliography-corpus-linguistics/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/analyse-corpus-antconc.html:589: +* At _site/en/lessons/interactive-data-visualization-dashboard.html:117: - http://www.laurenceanthony.net/software/antconc/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/analyse-corpus-antconc.html:590: +* At _site/en/lessons/interactive-data-visualization-dashboard.html:136: - http://www.laurenceanthony.net/software/antconc/releases/AntConc324/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/analyse-corpus-antconc.html:620: +* At _site/en/lessons/interactive-data-visualization-dashboard.html:173: - http://notepad-plus-plus.org/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/analyse-corpus-antconc.html:620: +* At _site/en/lessons/interactive-data-visualization-dashboard.html:199: - http://www.barebones.com/products/textwrangler/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/analyse-corpus-antconc.html:629: +* At _site/en/lessons/interactive-text-games-using-twine.html:117: - http://www.nltk.org/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/analyse-corpus-antconc.html:631: +* At _site/en/lessons/interactive-text-games-using-twine.html:136: - http://www.amazon.com/Developing-Linguistic-Corpora-Practice-Guides/dp/1842172050/ref=sr_1_1_1 is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/analyse-corpus-antconc.html:968: +* At _site/en/lessons/interactive-text-games-using-twine.html:173: - http://www.wordfrequency.info/free.asp is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/analyse-corpus-antconc.html:1022: +* At _site/en/lessons/interactive-text-games-using-twine.html:199: - http://www.lexically.net/downloads/version6/HTML/index.html?keyness_definition.htm is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/analyse-corpus-antconc.html:1022: +* At _site/en/lessons/interactive-visualization-with-plotly.html:117: - http://www.laurenceanthony.net/software/antconc/releases/AntConc335/help.pdf is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/analyse-corpus-antconc.html:1087: +* At _site/en/lessons/interactive-visualization-with-plotly.html:136: - http://hfroehlich.wordpress.com/2014/05/11/intro-bibliography-corpus-linguistics/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/analyse-corpus-antconc.html:1088: +* At _site/en/lessons/interactive-visualization-with-plotly.html:173: - http://hfroehli.ch/workshops/getting-started-with-antconc/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/analyse-corpus-antconc.html:1091: +* At _site/en/lessons/interactive-visualization-with-plotly.html:199: - http://edutechwiki.unige.ch/fr/AntConc# is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/analyse-corpus-antconc.html:1092: +* At _site/en/lessons/interrogating-national-narrative-gpt.html:117: - http://explorationdecorpus.corpusecrits.huma-num.fr/antconc/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/analyse-corpus-antconc.html:1093: +* At _site/en/lessons/interrogating-national-narrative-gpt.html:136: - http://cid.ens-lyon.fr/ac_article.asp?fic=antconc.asp is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/analyse-corpus-antconc.html:1095: +* At _site/en/lessons/interrogating-national-narrative-gpt.html:173: - http://ancilla.unice.fr/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/analyse-corpus-antconc.html:1095: +* At _site/en/lessons/interrogating-national-narrative-gpt.html:199: - http://iramuteq.org/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/analyse-corpus-antconc.html:1095: +* At _site/en/lessons/intro-to-bash.html:119: - http://www.lexi-co.com/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/analyse-corpus-antconc.html:1095: +* At _site/en/lessons/intro-to-bash.html:138: - http://textometrie.ens-lyon.fr/?lang=fr is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/analyse-corpus-antconc.html:1099: +* At _site/en/lessons/intro-to-bash.html:175: - http://lexicometrica.univ-paris3.fr/livre/st94/st94-tdm.html is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/analyse-de-documents-avec-tfidf.html:135: +* At _site/en/lessons/intro-to-bash.html:201: 'a' tag is missing a reference -* At _site/fr/lecons/analyse-de-documents-avec-tfidf.html:154: +* At _site/en/lessons/intro-to-linked-data.html:118: 'a' tag is missing a reference -* At _site/fr/lecons/analyse-de-documents-avec-tfidf.html:191: +* At _site/en/lessons/intro-to-linked-data.html:137: 'a' tag is missing a reference -* At _site/fr/lecons/analyse-de-documents-avec-tfidf.html:239: +* At _site/en/lessons/intro-to-linked-data.html:174: 'a' tag is missing a reference -* At _site/fr/lecons/analyse-de-documents-avec-tfidf.html:591: +* At _site/en/lessons/intro-to-linked-data.html:200: - http://www.worldcat.org/oclc/1232233436 is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/analyse-de-documents-avec-tfidf.html:1678: +* At _site/en/lessons/intro-to-linked-data.html:1024: - http://scikit-learn.org/stable/install.html is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/analyse-donnees-tabulaires-R.html:119: +* At _site/en/lessons/intro-to-linked-data.html:1024: 'a' tag is missing a reference -* At _site/fr/lecons/analyse-donnees-tabulaires-R.html:138: +* At _site/en/lessons/intro-to-powershell.html:117: 'a' tag is missing a reference -* At _site/fr/lecons/analyse-donnees-tabulaires-R.html:175: +* At _site/en/lessons/intro-to-powershell.html:136: 'a' tag is missing a reference -* At _site/fr/lecons/analyse-donnees-tabulaires-R.html:223: +* At _site/en/lessons/intro-to-powershell.html:173: 'a' tag is missing a reference -* At _site/fr/lecons/analyse-donnees-tabulaires-R.html:1099: +* At _site/en/lessons/intro-to-powershell.html:199: - http://web.archive.org/web/20191015004305/https://www.nceas.ucsb.edu/files/scicomp/Dloads/RProgramming/BestFirstRTutorial.pdf is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/analyse-reseau-python.html:125: +* At _site/en/lessons/intro-to-twitterbots.html:121: 'a' tag is missing a reference -* At _site/fr/lecons/analyse-reseau-python.html:144: +* At _site/en/lessons/intro-to-twitterbots.html:140: 'a' tag is missing a reference -* At _site/fr/lecons/analyse-reseau-python.html:181: +* At _site/en/lessons/intro-to-twitterbots.html:177: 'a' tag is missing a reference -* At _site/fr/lecons/analyse-reseau-python.html:229: +* At _site/en/lessons/intro-to-twitterbots.html:203: 'a' tag is missing a reference -* At _site/fr/lecons/analyse-reseau-python.html:606: +* At _site/en/lessons/introduction-and-installation.html:119: - http://www.oxforddnb.com is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/analyse-reseau-python.html:1063: +* At _site/en/lessons/introduction-and-installation.html:138: - http://pandas.pydata.org/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/calibration-radiocarbone-avec-r.html:135: +* At _site/en/lessons/introduction-and-installation.html:175: 'a' tag is missing a reference -* At _site/fr/lecons/calibration-radiocarbone-avec-r.html:154: +* At _site/en/lessons/introduction-and-installation.html:201: 'a' tag is missing a reference -* At _site/fr/lecons/calibration-radiocarbone-avec-r.html:191: +* At _site/en/lessons/introduction-map-warper.html:121: 'a' tag is missing a reference -* At _site/fr/lecons/calibration-radiocarbone-avec-r.html:239: +* At _site/en/lessons/introduction-map-warper.html:140: 'a' tag is missing a reference -* At _site/fr/lecons/calibration-radiocarbone-avec-r.html:709: +* At _site/en/lessons/introduction-map-warper.html:177: - http://calib.org is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/comprendre-les-expressions-regulieres.html:119: +* At _site/en/lessons/introduction-map-warper.html:203: 'a' tag is missing a reference -* At _site/fr/lecons/comprendre-les-expressions-regulieres.html:138: +* At _site/en/lessons/introduction-to-ffmpeg.html:117: 'a' tag is missing a reference -* At _site/fr/lecons/comprendre-les-expressions-regulieres.html:175: +* At _site/en/lessons/introduction-to-ffmpeg.html:136: 'a' tag is missing a reference -* At _site/fr/lecons/comprendre-les-expressions-regulieres.html:223: +* At _site/en/lessons/introduction-to-ffmpeg.html:173: 'a' tag is missing a reference -* At _site/fr/lecons/comprendre-les-expressions-regulieres.html:619: +* At _site/en/lessons/introduction-to-ffmpeg.html:199: - http://www.libreoffice.org/download is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/comprendre-les-expressions-regulieres.html:665: +* At _site/en/lessons/introduction-to-populating-a-website-with-api-data.html:120: - http://archive.org/details/jstor-4560629/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/comprendre-les-expressions-regulieres.html:668: +* At _site/en/lessons/introduction-to-populating-a-website-with-api-data.html:139: - http://archive.org/stream/jstor-4560629/4560629#page/n0/mode/2up is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/comprendre-les-expressions-regulieres.html:688: +* At _site/en/lessons/introduction-to-populating-a-website-with-api-data.html:176: - http://archive.org/stream/jstor-4560629/4560629_djvu.txt is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/comprendre-les-expressions-regulieres.html:1461: +* At _site/en/lessons/introduction-to-populating-a-website-with-api-data.html:202: - http://rubular.com/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/comprendre-les-expressions-regulieres.html:1466: +* At _site/en/lessons/introduction-to-stylometry-with-python.html:120: - http://dh.obdurodon.org/regex.html is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/comprendre-les-pages-web.html:121: +* At _site/en/lessons/introduction-to-stylometry-with-python.html:139: 'a' tag is missing a reference -* At _site/fr/lecons/comprendre-les-pages-web.html:140: +* At _site/en/lessons/introduction-to-stylometry-with-python.html:176: 'a' tag is missing a reference -* At _site/fr/lecons/comprendre-les-pages-web.html:177: +* At _site/en/lessons/introduction-to-stylometry-with-python.html:202: 'a' tag is missing a reference -* At _site/fr/lecons/comprendre-les-pages-web.html:225: +* At _site/en/lessons/json-and-jq.html:117: 'a' tag is missing a reference -* At _site/fr/lecons/comprendre-les-pages-web.html:581: +* At _site/en/lessons/json-and-jq.html:136: - http://www.w3schools.com/html/default.asp is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/comprendre-les-pages-web.html:656: +* At _site/en/lessons/json-and-jq.html:173: - http://www.w3schools.com/html/default.asp is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/comprendre-les-pages-web.html:658: +* At _site/en/lessons/json-and-jq.html:199: - http://www.w3schools.com/html/html5_intro.asp is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/concevoir-base-donnees-nodegoat.html:117: +* At _site/en/lessons/jupyter-notebooks.html:121: 'a' tag is missing a reference -* At _site/fr/lecons/concevoir-base-donnees-nodegoat.html:136: +* At _site/en/lessons/jupyter-notebooks.html:140: 'a' tag is missing a reference -* At _site/fr/lecons/concevoir-base-donnees-nodegoat.html:173: +* At _site/en/lessons/jupyter-notebooks.html:177: 'a' tag is missing a reference -* At _site/fr/lecons/concevoir-base-donnees-nodegoat.html:221: +* At _site/en/lessons/jupyter-notebooks.html:203: 'a' tag is missing a reference -* At _site/fr/lecons/debuter-avec-markdown.html:119: +* At _site/en/lessons/jupyter-notebooks.html:651: 'a' tag is missing a reference -* At _site/fr/lecons/debuter-avec-markdown.html:138: +* At _site/en/lessons/jupyter-notebooks.html:879: 'a' tag is missing a reference -* At _site/fr/lecons/debuter-avec-markdown.html:175: +* At _site/en/lessons/keywords-in-context-using-n-grams.html:120: 'a' tag is missing a reference -* At _site/fr/lecons/debuter-avec-markdown.html:223: +* At _site/en/lessons/keywords-in-context-using-n-grams.html:139: 'a' tag is missing a reference -* At _site/fr/lecons/debuter-avec-markdown.html:571: +* At _site/en/lessons/keywords-in-context-using-n-grams.html:176: - http://daringfireball.net/projects/markdown/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/debuter-avec-markdown.html:575: +* At _site/en/lessons/keywords-in-context-using-n-grams.html:202: - http://github.com is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/debuter-avec-markdown.html:772: +* At _site/en/lessons/linear-regression.html:133: - http://programminghistorian.org/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/decomptes-de-frequences-de-mots-en-python.html:121: +* At _site/en/lessons/linear-regression.html:152: 'a' tag is missing a reference -* At _site/fr/lecons/decomptes-de-frequences-de-mots-en-python.html:140: +* At _site/en/lessons/linear-regression.html:189: 'a' tag is missing a reference -* At _site/fr/lecons/decomptes-de-frequences-de-mots-en-python.html:177: +* At _site/en/lessons/linear-regression.html:215: 'a' tag is missing a reference -* At _site/fr/lecons/decomptes-de-frequences-de-mots-en-python.html:225: +* At _site/en/lessons/linux-installation.html:119: 'a' tag is missing a reference -* At _site/fr/lecons/demarrer-avec-omeka-classic.html:119: +* At _site/en/lessons/linux-installation.html:138: 'a' tag is missing a reference -* At _site/fr/lecons/demarrer-avec-omeka-classic.html:138: +* At _site/en/lessons/linux-installation.html:175: 'a' tag is missing a reference -* At _site/fr/lecons/demarrer-avec-omeka-classic.html:175: +* At _site/en/lessons/linux-installation.html:201: 'a' tag is missing a reference -* At _site/fr/lecons/demarrer-avec-omeka-classic.html:223: +* At _site/en/lessons/logistic-regression.html:133: 'a' tag is missing a reference -* At _site/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.html:121: +* At _site/en/lessons/logistic-regression.html:152: 'a' tag is missing a reference -* At _site/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.html:140: +* At _site/en/lessons/logistic-regression.html:189: 'a' tag is missing a reference -* At _site/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.html:177: +* At _site/en/lessons/logistic-regression.html:215: 'a' tag is missing a reference -* At _site/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.html:225: +* At _site/en/lessons/mac-installation.html:119: 'a' tag is missing a reference -* At _site/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.html:582: +* At _site/en/lessons/mac-installation.html:138: - http://www.ccs.neu.edu/home/dasmith/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.html:745: +* At _site/en/lessons/mac-installation.html:175: - http://spark.apache.org/downloads is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.html:953: +* At _site/en/lessons/mac-installation.html:201: - http://jsonlines.org/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.html:1529: +* At _site/en/lessons/manipulating-strings-in-python.html:119: - http://ceur-ws.org/Vol-2253/paper22.pdf is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.html:1530: +* At _site/en/lessons/manipulating-strings-in-python.html:138: - http://dx.doi.org/10.1093/alh/ajv029 is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.html:1531: +* At _site/en/lessons/manipulating-strings-in-python.html:175: - http://dx.doi.org/10.1093/alh/ajv028 is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.html:1532: +* At _site/en/lessons/manipulating-strings-in-python.html:201: - http://dx.doi.org/10.1080/1461670x.2020.1761865 is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.html:1534: +* At _site/en/lessons/mapping-with-python-leaflet.html:117: - http://dx.doi.org/10.1007/978-3-319-12655-5_11 is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.html:1535: +* At _site/en/lessons/mapping-with-python-leaflet.html:136: - http://dx.doi.org/10.22148/16.034 is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.html:1538: +* At _site/en/lessons/mapping-with-python-leaflet.html:173: - http://dx.doi.org/10.1145/2682571.2797068 is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/du-html-a-une-liste-de-mots-1.html:121: +* At _site/en/lessons/mapping-with-python-leaflet.html:199: 'a' tag is missing a reference -* At _site/fr/lecons/du-html-a-une-liste-de-mots-1.html:140: +* At _site/en/lessons/naive-bayesian.html:117: 'a' tag is missing a reference -* At _site/fr/lecons/du-html-a-une-liste-de-mots-1.html:177: +* At _site/en/lessons/naive-bayesian.html:136: 'a' tag is missing a reference -* At _site/fr/lecons/du-html-a-une-liste-de-mots-1.html:225: +* At _site/en/lessons/naive-bayesian.html:173: 'a' tag is missing a reference -* At _site/fr/lecons/du-html-a-une-liste-de-mots-2.html:121: +* At _site/en/lessons/naive-bayesian.html:199: 'a' tag is missing a reference -* At _site/fr/lecons/du-html-a-une-liste-de-mots-2.html:140: +* At _site/en/lessons/normalizing-data.html:119: 'a' tag is missing a reference -* At _site/fr/lecons/du-html-a-une-liste-de-mots-2.html:177: +* At _site/en/lessons/normalizing-data.html:138: 'a' tag is missing a reference -* At _site/fr/lecons/du-html-a-une-liste-de-mots-2.html:225: +* At _site/en/lessons/normalizing-data.html:175: 'a' tag is missing a reference -* At _site/fr/lecons/generer-jeu-donnees-texte-ocr.html:119: +* At _site/en/lessons/normalizing-data.html:201: 'a' tag is missing a reference -* At _site/fr/lecons/generer-jeu-donnees-texte-ocr.html:138: +* At _site/en/lessons/ocr-with-google-vision-and-tesseract.html:117: 'a' tag is missing a reference -* At _site/fr/lecons/generer-jeu-donnees-texte-ocr.html:175: +* At _site/en/lessons/ocr-with-google-vision-and-tesseract.html:136: 'a' tag is missing a reference -* At _site/fr/lecons/generer-jeu-donnees-texte-ocr.html:223: +* At _site/en/lessons/ocr-with-google-vision-and-tesseract.html:173: 'a' tag is missing a reference -* At _site/fr/lecons/generer-jeu-donnees-texte-ocr.html:567: +* At _site/en/lessons/ocr-with-google-vision-and-tesseract.html:199: - http://www.worldcat.org/oclc/17591390 is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/generer-jeu-donnees-texte-ocr.html:1631: +* At _site/en/lessons/output-data-as-html-file.html:119: - http://brat.nlplab.org/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/gestion-manipulation-donnees-r.html:119: +* At _site/en/lessons/output-data-as-html-file.html:138: 'a' tag is missing a reference -* At _site/fr/lecons/gestion-manipulation-donnees-r.html:138: +* At _site/en/lessons/output-data-as-html-file.html:175: 'a' tag is missing a reference -* At _site/fr/lecons/gestion-manipulation-donnees-r.html:175: +* At _site/en/lessons/output-data-as-html-file.html:201: 'a' tag is missing a reference -* At _site/fr/lecons/gestion-manipulation-donnees-r.html:223: +* At _site/en/lessons/output-keywords-in-context-in-html-file.html:119: 'a' tag is missing a reference -* At _site/fr/lecons/gestion-manipulation-donnees-r.html:584: +* At _site/en/lessons/output-keywords-in-context-in-html-file.html:138: - http://tidyverse.org/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/index.html:88: +* At _site/en/lessons/output-keywords-in-context-in-html-file.html:175: 'a' tag is missing a reference -* At _site/fr/lecons/index.html:107: +* At _site/en/lessons/output-keywords-in-context-in-html-file.html:201: 'a' tag is missing a reference -* At _site/fr/lecons/index.html:144: +* At _site/en/lessons/preserving-your-research-data.html:117: 'a' tag is missing a reference -* At _site/fr/lecons/index.html:192: +* At _site/en/lessons/preserving-your-research-data.html:136: 'a' tag is missing a reference -* At _site/fr/lecons/installation-modules-python-pip.html:119: +* At _site/en/lessons/preserving-your-research-data.html:173: 'a' tag is missing a reference -* At _site/fr/lecons/installation-modules-python-pip.html:138: +* At _site/en/lessons/preserving-your-research-data.html:199: 'a' tag is missing a reference -* At _site/fr/lecons/installation-modules-python-pip.html:175: +* At _site/en/lessons/qgis-layers.html:121: 'a' tag is missing a reference -* At _site/fr/lecons/installation-modules-python-pip.html:223: +* At _site/en/lessons/qgis-layers.html:140: 'a' tag is missing a reference -* At _site/fr/lecons/installation-windows-py.html:121: +* At _site/en/lessons/qgis-layers.html:177: 'a' tag is missing a reference -* At _site/fr/lecons/installation-windows-py.html:140: +* At _site/en/lessons/qgis-layers.html:203: 'a' tag is missing a reference -* At _site/fr/lecons/installation-windows-py.html:177: +* At _site/en/lessons/r-basics-with-tabular-data.html:117: 'a' tag is missing a reference -* At _site/fr/lecons/installation-windows-py.html:225: +* At _site/en/lessons/r-basics-with-tabular-data.html:136: 'a' tag is missing a reference -* At _site/fr/lecons/installation-windows-py.html:548: +* At _site/en/lessons/r-basics-with-tabular-data.html:173: - http://www.python.org/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/installation-windows-py.html:556: +* At _site/en/lessons/r-basics-with-tabular-data.html:199: - http://wiki.python.org/moin/PythonEditors/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/installation-windows-py.html:608: +* At _site/en/lessons/research-data-with-unix.html:119: - http://en.wikipedia.org/wiki/UTF-8 is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/installer-ide-python-linux.html:121: +* At _site/en/lessons/research-data-with-unix.html:138: 'a' tag is missing a reference -* At _site/fr/lecons/installer-ide-python-linux.html:140: +* At _site/en/lessons/research-data-with-unix.html:175: 'a' tag is missing a reference -* At _site/fr/lecons/installer-ide-python-linux.html:177: +* At _site/en/lessons/research-data-with-unix.html:201: 'a' tag is missing a reference -* At _site/fr/lecons/installer-ide-python-linux.html:225: +* At _site/en/lessons/retired/OCR-and-Machine-Translation.html:87: 'a' tag is missing a reference -* At _site/fr/lecons/intro-a-bash-et-zsh.html:121: +* At _site/en/lessons/retired/OCR-and-Machine-Translation.html:106: 'a' tag is missing a reference -* At _site/fr/lecons/intro-a-bash-et-zsh.html:140: +* At _site/en/lessons/retired/OCR-and-Machine-Translation.html:143: 'a' tag is missing a reference -* At _site/fr/lecons/intro-a-bash-et-zsh.html:177: +* At _site/en/lessons/retired/OCR-and-Machine-Translation.html:169: 'a' tag is missing a reference -* At _site/fr/lecons/intro-a-bash-et-zsh.html:225: +* At _site/en/lessons/retired/OCR-with-Tesseract-and-ScanTailor.html:87: 'a' tag is missing a reference -* At _site/fr/lecons/intro-a-bash-et-zsh.html:817: +* At _site/en/lessons/retired/OCR-with-Tesseract-and-ScanTailor.html:106: - http://www.gutenberg.org/ebooks/2600 is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/intro-aux-bots-twitter.html:119: +* At _site/en/lessons/retired/OCR-with-Tesseract-and-ScanTailor.html:143: 'a' tag is missing a reference -* At _site/fr/lecons/intro-aux-bots-twitter.html:138: +* At _site/en/lessons/retired/OCR-with-Tesseract-and-ScanTailor.html:169: 'a' tag is missing a reference -* At _site/fr/lecons/intro-aux-bots-twitter.html:175: +* At _site/en/lessons/retired/OCR-with-Tesseract-and-ScanTailor.html:444: 'a' tag is missing a reference -* At _site/fr/lecons/intro-aux-bots-twitter.html:223: +* At _site/en/lessons/retired/OCR-with-Tesseract-and-ScanTailor.html:488: 'a' tag is missing a reference -* At _site/fr/lecons/intro-aux-bots-twitter.html:561: +* At _site/en/lessons/retired/OCR-with-Tesseract-and-ScanTailor.html:662: - http://tracery.io is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/intro-aux-bots-twitter.html:561: +* At _site/en/lessons/retired/OCR-with-Tesseract-and-ScanTailor.html:788: - http://cheapbotsdonequick.com/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/intro-aux-bots-twitter.html:564: +* At _site/en/lessons/retired/counting-frequencies-from-zotero-items.html:87: - http://www.sciencedirect.com/science/article/pii/S0747563213003129 is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/intro-aux-bots-twitter.html:564: +* At _site/en/lessons/retired/counting-frequencies-from-zotero-items.html:106: - http://www.docnow.io/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/intro-aux-bots-twitter.html:651: +* At _site/en/lessons/retired/counting-frequencies-from-zotero-items.html:143: - http://json.org/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/intro-aux-bots-twitter.html:653: +* At _site/en/lessons/retired/counting-frequencies-from-zotero-items.html:169: - http://tracery.io/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/intro-aux-bots-twitter.html:655: +* At _site/en/lessons/retired/creating-new-items-in-zotero.html:87: - http://twitter.com/tinyarchae is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/intro-aux-bots-twitter.html:655: +* At _site/en/lessons/retired/creating-new-items-in-zotero.html:106: - http://web.archive.org/web/20180131161516/https://publicarchaeologyconference.wordpress.com/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/intro-aux-bots-twitter.html:665: +* At _site/en/lessons/retired/creating-new-items-in-zotero.html:143: - http://tracery.io is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/intro-aux-bots-twitter.html:838: +* At _site/en/lessons/retired/creating-new-items-in-zotero.html:169: - http://cheapbotsdonequick.com/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/intro-aux-bots-twitter.html:888: +* At _site/en/lessons/retired/getting-started-with-github-desktop.html:87: - http://tinysubversions.com/2013/03/basic-twitter-bot-etiquette/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/intro-aux-bots-twitter.html:907: +* At _site/en/lessons/retired/getting-started-with-github-desktop.html:106: - http://unicode.org/emoji/charts/full-emoji-list.html is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/intro-aux-bots-twitter.html:930: +* At _site/en/lessons/retired/getting-started-with-github-desktop.html:143: - http://www.crystalcodepalace.com/traceryTut.html is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/intro-aux-bots-twitter.html:946: +* At _site/en/lessons/retired/getting-started-with-github-desktop.html:169: - http://cheapbotsdonequick.com/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/intro-aux-bots-twitter.html:986: +* At _site/en/lessons/retired/graph-databases-and-SPARQL.html:87: - http://cheapbotsdonequick.com/source/softlandscapes is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/intro-aux-bots-twitter.html:1006: +* At _site/en/lessons/retired/graph-databases-and-SPARQL.html:106: - http://www.codingblocks.net/videos/generating-music-in-javascript/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/intro-aux-bots-twitter.html:1012: +* At _site/en/lessons/retired/graph-databases-and-SPARQL.html:143: - http://www.zachwhalen.net/posts/how-to-make-a-twitter-bot-with-google-spreadsheets-version-04/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/introduction-a-heurist.html:117: +* At _site/en/lessons/retired/graph-databases-and-SPARQL.html:169: 'a' tag is missing a reference -* At _site/fr/lecons/introduction-a-heurist.html:136: +* At _site/en/lessons/retired/intro-to-augmented-reality-with-unity.html:87: 'a' tag is missing a reference -* At _site/fr/lecons/introduction-a-heurist.html:173: +* At _site/en/lessons/retired/intro-to-augmented-reality-with-unity.html:106: 'a' tag is missing a reference -* At _site/fr/lecons/introduction-a-heurist.html:221: +* At _site/en/lessons/retired/intro-to-augmented-reality-with-unity.html:143: 'a' tag is missing a reference -* At _site/fr/lecons/introduction-a-heurist.html:667: +* At _site/en/lessons/retired/intro-to-augmented-reality-with-unity.html:169: 'a' tag is missing a reference -* At _site/fr/lecons/introduction-a-heurist.html:832: +* At _site/en/lessons/retired/intro-to-beautiful-soup.html:87: 'a' tag is missing a reference -* At _site/fr/lecons/introduction-a-la-stylometrie-avec-python.html:119: +* At _site/en/lessons/retired/intro-to-beautiful-soup.html:106: 'a' tag is missing a reference -* At _site/fr/lecons/introduction-a-la-stylometrie-avec-python.html:138: +* At _site/en/lessons/retired/intro-to-beautiful-soup.html:143: 'a' tag is missing a reference -* At _site/fr/lecons/introduction-a-la-stylometrie-avec-python.html:175: +* At _site/en/lessons/retired/intro-to-beautiful-soup.html:169: 'a' tag is missing a reference -* At _site/fr/lecons/introduction-a-la-stylometrie-avec-python.html:223: +* At _site/en/lessons/retired/intro-to-the-zotero-api.html:87: 'a' tag is missing a reference -* At _site/fr/lecons/introduction-a-la-stylometrie-avec-python.html:609: +* At _site/en/lessons/retired/intro-to-the-zotero-api.html:106: - http://www.gutenberg.org/cache/epub/1404/pg1404.txt is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/introduction-a-la-stylometrie-avec-python.html:625: +* At _site/en/lessons/retired/intro-to-the-zotero-api.html:143: - http://www.nltk.org/book/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/introduction-a-la-stylometrie-avec-python.html:629: +* At _site/en/lessons/retired/intro-to-the-zotero-api.html:169: - http://www.nltk.org/book/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/introduction-a-la-stylometrie-avec-python.html:982: +* At _site/en/lessons/scalable-reading-of-structured-data.html:125: - http://www.cis.uni-muenchen.de/~schmid/tools/TreeTagger/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/introduction-aux-carnets-jupyter-notebooks.html:123: +* At _site/en/lessons/scalable-reading-of-structured-data.html:144: 'a' tag is missing a reference -* At _site/fr/lecons/introduction-aux-carnets-jupyter-notebooks.html:142: +* At _site/en/lessons/scalable-reading-of-structured-data.html:181: 'a' tag is missing a reference -* At _site/fr/lecons/introduction-aux-carnets-jupyter-notebooks.html:179: +* At _site/en/lessons/scalable-reading-of-structured-data.html:207: 'a' tag is missing a reference -* At _site/fr/lecons/introduction-aux-carnets-jupyter-notebooks.html:227: +* At _site/en/lessons/sentiment-analysis-syuzhet.html:119: 'a' tag is missing a reference -* At _site/fr/lecons/introduction-aux-carnets-jupyter-notebooks.html:695: +* At _site/en/lessons/sentiment-analysis-syuzhet.html:138: 'a' tag is missing a reference -* At _site/fr/lecons/introduction-aux-carnets-jupyter-notebooks.html:942: +* At _site/en/lessons/sentiment-analysis-syuzhet.html:175: 'a' tag is missing a reference -* At _site/fr/lecons/introduction-et-installation.html:121: +* At _site/en/lessons/sentiment-analysis-syuzhet.html:201: 'a' tag is missing a reference -* At _site/fr/lecons/introduction-et-installation.html:140: +* At _site/en/lessons/sentiment-analysis.html:120: 'a' tag is missing a reference -* At _site/fr/lecons/introduction-et-installation.html:177: +* At _site/en/lessons/sentiment-analysis.html:139: 'a' tag is missing a reference -* At _site/fr/lecons/introduction-et-installation.html:225: +* At _site/en/lessons/sentiment-analysis.html:176: 'a' tag is missing a reference -* At _site/fr/lecons/introduction-et-installation.html:544: +* At _site/en/lessons/sentiment-analysis.html:202: - http://www.python.org/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/introduction-et-installation.html:544: +* At _site/en/lessons/shiny-leaflet-newspaper-map-tutorial.html:117: - http://www.crummy.com/software/BeautifulSoup/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/introduction-et-installation.html:544: +* At _site/en/lessons/shiny-leaflet-newspaper-map-tutorial.html:136: - http://www.activestate.com/komodo-edit is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/introduction-et-installation.html:544: +* At _site/en/lessons/shiny-leaflet-newspaper-map-tutorial.html:173: - http://wiki.python.org/moin/PythonEditors/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/introduction-et-installation.html:547: +* At _site/en/lessons/shiny-leaflet-newspaper-map-tutorial.html:199: - http://sebastianraschka.com/Articles/2014_python_2_3_key_diff.html is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/manipuler-chaines-caracteres-python.html:121: +* At _site/en/lessons/simulating-historical-communication-networks-python.html:123: 'a' tag is missing a reference -* At _site/fr/lecons/manipuler-chaines-caracteres-python.html:140: +* At _site/en/lessons/simulating-historical-communication-networks-python.html:142: 'a' tag is missing a reference -* At _site/fr/lecons/manipuler-chaines-caracteres-python.html:177: +* At _site/en/lessons/simulating-historical-communication-networks-python.html:179: 'a' tag is missing a reference -* At _site/fr/lecons/manipuler-chaines-caracteres-python.html:225: +* At _site/en/lessons/simulating-historical-communication-networks-python.html:205: 'a' tag is missing a reference -* At _site/fr/lecons/manipuler-chaines-caracteres-python.html:713: +* At _site/en/lessons/sonification.html:117: - http://www.worldcat.org/oclc/1061273329 is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/nettoyer-ses-donnees-avec-openrefine.html:123: +* At _site/en/lessons/sonification.html:136: 'a' tag is missing a reference -* At _site/fr/lecons/nettoyer-ses-donnees-avec-openrefine.html:142: +* At _site/en/lessons/sonification.html:173: 'a' tag is missing a reference -* At _site/fr/lecons/nettoyer-ses-donnees-avec-openrefine.html:179: +* At _site/en/lessons/sonification.html:199: 'a' tag is missing a reference -* At _site/fr/lecons/nettoyer-ses-donnees-avec-openrefine.html:227: +* At _site/en/lessons/sonification.html:1069: 'a' tag is missing a reference -* At _site/fr/lecons/nettoyer-ses-donnees-avec-openrefine.html:568: +* At _site/en/lessons/sonification.html:1070: - http://openrefine.org is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/nettoyer-ses-donnees-avec-openrefine.html:587: +* At _site/en/lessons/sonification.html:1071: - http://vis.stanford.edu/papers/wrangler/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/nettoyer-ses-donnees-avec-openrefine.html:587: +* At _site/en/lessons/sonification.html:1072: - http://openrefine.org is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/nettoyer-ses-donnees-avec-openrefine.html:589: +* At _site/en/lessons/sonification.html:1073: - http://www.loc.gov/index.html is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/normaliser-donnees-textuelles-python.html:121: +* At _site/en/lessons/sonification.html:1074: 'a' tag is missing a reference -* At _site/fr/lecons/normaliser-donnees-textuelles-python.html:140: +* At _site/en/lessons/sonification.html:1075: 'a' tag is missing a reference -* At _site/fr/lecons/normaliser-donnees-textuelles-python.html:177: +* At _site/en/lessons/sonification.html:1079: 'a' tag is missing a reference -* At _site/fr/lecons/normaliser-donnees-textuelles-python.html:225: +* At _site/en/lessons/sonification.html:1081: 'a' tag is missing a reference -* At _site/fr/lecons/preserver-logiciels-recherche.html:121: +* At _site/en/lessons/sonification.html:1083: 'a' tag is missing a reference -* At _site/fr/lecons/preserver-logiciels-recherche.html:140: +* At _site/en/lessons/sonification.html:1085: 'a' tag is missing a reference -* At _site/fr/lecons/preserver-logiciels-recherche.html:177: +* At _site/en/lessons/sonification.html:1087: 'a' tag is missing a reference -* At _site/fr/lecons/preserver-logiciels-recherche.html:225: +* At _site/en/lessons/sonification.html:1089: 'a' tag is missing a reference -* At _site/fr/lecons/preserver-ses-donnees-de-recherche.html:119: +* At _site/en/lessons/space-place-gazetteers.html:119: 'a' tag is missing a reference -* At _site/fr/lecons/preserver-ses-donnees-de-recherche.html:138: +* At _site/en/lessons/space-place-gazetteers.html:138: 'a' tag is missing a reference -* At _site/fr/lecons/preserver-ses-donnees-de-recherche.html:175: +* At _site/en/lessons/space-place-gazetteers.html:175: 'a' tag is missing a reference -* At _site/fr/lecons/preserver-ses-donnees-de-recherche.html:223: +* At _site/en/lessons/space-place-gazetteers.html:201: 'a' tag is missing a reference -* At _site/fr/lecons/preserver-ses-donnees-de-recherche.html:608: +* At _site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html:119: - http://notepad-plus-plus.org is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/preserver-ses-donnees-de-recherche.html:639: +* At _site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html:138: - http://cradledincaricature.com/2014/02/06/comic-art-beyond-the-print-shop/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/preserver-ses-donnees-de-recherche.html:646: +* At _site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html:175: - http://www.theguardian.com/uk-news/2014/feb/20/rebekah-brooks-rupert-murdoch-phone-hacking-trial is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/preserver-ses-donnees-de-recherche.html:653: +* At _site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html:201: - http://www.cartoons.ac.uk/record/SBD0931 is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/preserver-ses-donnees-de-recherche.html:660: +* At _site/en/lessons/temporal-network-analysis-with-r.html:121: - http://www.oldbaileyonline.org/browse.jsp?ref=OA16780417 is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/preserver-ses-donnees-de-recherche.html:766: +* At _site/en/lessons/temporal-network-analysis-with-r.html:140: - http://historyonics.blogspot.co.uk/2014/01/judging-book-by-its-url.html is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/preserver-ses-donnees-de-recherche.html:770: +* At _site/en/lessons/temporal-network-analysis-with-r.html:177: - http://earlymodernnotes.wordpress.com/2013/05/18/unclean-unclean-what-historians-can-do-about-sharing-our-messy-research-data/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/preserver-ses-donnees-de-recherche.html:783: +* At _site/en/lessons/temporal-network-analysis-with-r.html:203: - http://britishlibrary.typepad.co.uk/collectioncare/2013/09/the-twelve-principles-of-digital-preservation.html is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/preserver-ses-donnees-de-recherche.html:792: +* At _site/en/lessons/text-mining-with-extracted-features.html:136: - http://data-archive.ac.uk/create-manage/document is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/redaction-durable-avec-pandoc-et-markdown.html:121: +* At _site/en/lessons/text-mining-with-extracted-features.html:155: 'a' tag is missing a reference -* At _site/fr/lecons/redaction-durable-avec-pandoc-et-markdown.html:140: +* At _site/en/lessons/text-mining-with-extracted-features.html:192: 'a' tag is missing a reference -* At _site/fr/lecons/redaction-durable-avec-pandoc-et-markdown.html:177: +* At _site/en/lessons/text-mining-with-extracted-features.html:218: 'a' tag is missing a reference -* At _site/fr/lecons/redaction-durable-avec-pandoc-et-markdown.html:225: +* At _site/en/lessons/text-mining-youtube-comments.html:121: 'a' tag is missing a reference -* At _site/fr/lecons/redaction-durable-avec-pandoc-et-markdown.html:578: +* At _site/en/lessons/text-mining-youtube-comments.html:140: - http://notepad-plus-plus.org is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/redaction-durable-avec-pandoc-et-markdown.html:858: +* At _site/en/lessons/text-mining-youtube-comments.html:177: - http://editor.citationstyles.org/about/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/redaction-durable-avec-pandoc-et-markdown.html:894: +* At _site/en/lessons/text-mining-youtube-comments.html:203: - http://stackoverflow.com/questions/tagged/pandoc is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/redaction-durable-avec-pandoc-et-markdown.html:896: +* At _site/en/lessons/topic-modeling-and-mallet.html:121: - http://web.archive.org/web/20140120195538/http://mashable.com/2013/06/24/markdown-tools/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/redaction-durable-avec-pandoc-et-markdown.html:896: +* At _site/en/lessons/topic-modeling-and-mallet.html:140: - http://mouapp.com/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/redaction-durable-avec-pandoc-et-markdown.html:896: +* At _site/en/lessons/topic-modeling-and-mallet.html:177: - http://writemonkey.com is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/redaction-durable-avec-pandoc-et-markdown.html:896: +* At _site/en/lessons/topic-modeling-and-mallet.html:203: - http://www.sublimetext.com/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/redaction-durable-avec-pandoc-et-markdown.html:896: +* At _site/en/lessons/transcribing-handwritten-text-with-python-and-azure.html:117: - http://prose.io is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/redaction-durable-avec-pandoc-et-markdown.html:896: +* At _site/en/lessons/transcribing-handwritten-text-with-python-and-azure.html:136: - http://www.authorea.com is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/redaction-durable-avec-pandoc-et-markdown.html:896: +* At _site/en/lessons/transcribing-handwritten-text-with-python-and-azure.html:173: - http://www.draftin.com is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/redaction-durable-avec-pandoc-et-markdown.html:898: +* At _site/en/lessons/transcribing-handwritten-text-with-python-and-azure.html:199: - http://github.com/fauno/jekyll-pandoc-multiple-formats is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/redaction-durable-avec-pandoc-et-markdown.html:898: +* At _site/en/lessons/transforming-xml-with-xsl.html:117: - http://jaspervdj.be/hakyll/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/redaction-durable-avec-pandoc-et-markdown.html:900: +* At _site/en/lessons/transforming-xml-with-xsl.html:136: - http://readthedocs.org is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/redaction-durable-avec-pandoc-et-markdown.html:921: +* At _site/en/lessons/transforming-xml-with-xsl.html:173: - http://www.antipope.org/charlie/blog-static/2013/10/why-microsoft-word-must-die.html is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/reutilisation-de-code-et-modularite.html:121: +* At _site/en/lessons/transforming-xml-with-xsl.html:199: 'a' tag is missing a reference -* At _site/fr/lecons/reutilisation-de-code-et-modularite.html:140: +* At _site/en/lessons/transliterating.html:117: 'a' tag is missing a reference -* At _site/fr/lecons/reutilisation-de-code-et-modularite.html:177: +* At _site/en/lessons/transliterating.html:136: 'a' tag is missing a reference -* At _site/fr/lecons/reutilisation-de-code-et-modularite.html:225: +* At _site/en/lessons/transliterating.html:173: 'a' tag is missing a reference -* At _site/fr/lecons/telecharger-des-pages-web-avec-python.html:121: +* At _site/en/lessons/transliterating.html:199: 'a' tag is missing a reference -* At _site/fr/lecons/telecharger-des-pages-web-avec-python.html:140: +* At _site/en/lessons/understanding-creating-word-embeddings.html:121: 'a' tag is missing a reference -* At _site/fr/lecons/telecharger-des-pages-web-avec-python.html:177: +* At _site/en/lessons/understanding-creating-word-embeddings.html:140: 'a' tag is missing a reference -* At _site/fr/lecons/telecharger-des-pages-web-avec-python.html:225: +* At _site/en/lessons/understanding-creating-word-embeddings.html:177: 'a' tag is missing a reference -* At _site/fr/lecons/telecharger-des-pages-web-avec-python.html:566: +* At _site/en/lessons/understanding-creating-word-embeddings.html:203: - http://www.oldbaileyonline.org/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/telecharger-des-pages-web-avec-python.html:630: +* At _site/en/lessons/understanding-regular-expressions.html:117: - http://www.oldbaileyonline.org/browse.jsp?foo=bar&path=sessionsPapers/17800628.xml&div=t17800628-33&xml=yes is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/telecharger-des-pages-web-avec-python.html:630: +* At _site/en/lessons/understanding-regular-expressions.html:136: - http://www.oldbaileyonline.org/images.jsp?doc=178006280084 is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/transcription-automatisee-graphies-non-latines.html:133: +* At _site/en/lessons/understanding-regular-expressions.html:173: 'a' tag is missing a reference -* At _site/fr/lecons/transcription-automatisee-graphies-non-latines.html:152: +* At _site/en/lessons/understanding-regular-expressions.html:199: 'a' tag is missing a reference -* At _site/fr/lecons/transcription-automatisee-graphies-non-latines.html:189: +* At _site/en/lessons/up-and-running-with-omeka.html:117: 'a' tag is missing a reference -* At _site/fr/lecons/transcription-automatisee-graphies-non-latines.html:237: +* At _site/en/lessons/up-and-running-with-omeka.html:136: 'a' tag is missing a reference -* At _site/fr/lecons/transcription-automatisee-graphies-non-latines.html:2250: +* At _site/en/lessons/up-and-running-with-omeka.html:173: - http://doi.org/10.30687/arm/9372-8175/2022/01/005 is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/lecons/travailler-avec-des-fichiers-texte.html:121: +* At _site/en/lessons/up-and-running-with-omeka.html:199: 'a' tag is missing a reference -* At _site/fr/lecons/travailler-avec-des-fichiers-texte.html:140: +* At _site/en/lessons/urban-demographic-data-r-ggplot2.html:119: 'a' tag is missing a reference -* At _site/fr/lecons/travailler-avec-des-fichiers-texte.html:177: +* At _site/en/lessons/urban-demographic-data-r-ggplot2.html:138: 'a' tag is missing a reference -* At _site/fr/lecons/travailler-avec-des-fichiers-texte.html:225: +* At _site/en/lessons/urban-demographic-data-r-ggplot2.html:175: 'a' tag is missing a reference -* At _site/fr/nos-soutiens.html:88: +* At _site/en/lessons/urban-demographic-data-r-ggplot2.html:201: 'a' tag is missing a reference -* At _site/fr/nos-soutiens.html:107: +* At _site/en/lessons/using-javascript-to-create-maps.html:123: 'a' tag is missing a reference -* At _site/fr/nos-soutiens.html:144: +* At _site/en/lessons/using-javascript-to-create-maps.html:142: 'a' tag is missing a reference -* At _site/fr/nos-soutiens.html:192: +* At _site/en/lessons/using-javascript-to-create-maps.html:179: 'a' tag is missing a reference -* At _site/fr/nos-soutiens.html:275: +* At _site/en/lessons/using-javascript-to-create-maps.html:205: - http://cdrh.unl.edu/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/pi.html:88: +* At _site/en/lessons/vector-layers-qgis.html:121: 'a' tag is missing a reference -* At _site/fr/pi.html:107: +* At _site/en/lessons/vector-layers-qgis.html:140: 'a' tag is missing a reference -* At _site/fr/pi.html:144: +* At _site/en/lessons/vector-layers-qgis.html:177: 'a' tag is missing a reference -* At _site/fr/pi.html:192: +* At _site/en/lessons/vector-layers-qgis.html:203: 'a' tag is missing a reference -* At _site/fr/politique-retrait-lecons.html:88: +* At _site/en/lessons/viewing-html-files.html:119: 'a' tag is missing a reference -* At _site/fr/politique-retrait-lecons.html:107: +* At _site/en/lessons/viewing-html-files.html:138: 'a' tag is missing a reference -* At _site/fr/politique-retrait-lecons.html:144: +* At _site/en/lessons/viewing-html-files.html:175: 'a' tag is missing a reference -* At _site/fr/politique-retrait-lecons.html:192: +* At _site/en/lessons/viewing-html-files.html:201: 'a' tag is missing a reference -* At _site/fr/politique-vie-privee.html:88: +* At _site/en/lessons/visualizing-with-bokeh.html:117: 'a' tag is missing a reference -* At _site/fr/politique-vie-privee.html:107: +* At _site/en/lessons/visualizing-with-bokeh.html:136: 'a' tag is missing a reference -* At _site/fr/politique-vie-privee.html:144: +* At _site/en/lessons/visualizing-with-bokeh.html:173: 'a' tag is missing a reference -* At _site/fr/politique-vie-privee.html:192: +* At _site/en/lessons/visualizing-with-bokeh.html:199: 'a' tag is missing a reference -* At _site/fr/postes-vacants.html:88: +* At _site/en/lessons/windows-installation.html:119: 'a' tag is missing a reference -* At _site/fr/postes-vacants.html:107: +* At _site/en/lessons/windows-installation.html:138: 'a' tag is missing a reference -* At _site/fr/postes-vacants.html:144: +* At _site/en/lessons/windows-installation.html:175: 'a' tag is missing a reference -* At _site/fr/postes-vacants.html:192: +* At _site/en/lessons/windows-installation.html:201: 'a' tag is missing a reference -* At _site/fr/reaction.html:88: +* At _site/en/lessons/working-with-batches-of-pdf-files.html:117: 'a' tag is missing a reference -* At _site/fr/reaction.html:107: +* At _site/en/lessons/working-with-batches-of-pdf-files.html:136: 'a' tag is missing a reference -* At _site/fr/reaction.html:144: +* At _site/en/lessons/working-with-batches-of-pdf-files.html:173: 'a' tag is missing a reference -* At _site/fr/reaction.html:192: +* At _site/en/lessons/working-with-batches-of-pdf-files.html:199: 'a' tag is missing a reference -* At _site/fr/recherche.html:88: +* At _site/en/lessons/working-with-text-files.html:119: 'a' tag is missing a reference -* At _site/fr/recherche.html:107: +* At _site/en/lessons/working-with-text-files.html:138: 'a' tag is missing a reference -* At _site/fr/recherche.html:144: +* At _site/en/lessons/working-with-text-files.html:175: 'a' tag is missing a reference -* At _site/fr/recherche.html:192: +* At _site/en/lessons/working-with-text-files.html:201: 'a' tag is missing a reference -* At _site/fr/recherche.html:255: +* At _site/en/lessons/working-with-web-pages.html:119: - http://niche-canada.org/wp-content/uploads/2013/09/programming-historian-1.pdf is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/recherche.html:262: +* At _site/en/lessons/working-with-web-pages.html:138: - http://www.diva-portal.org/smash/record.jsf?pid=diva2%3A1508542&dswid=7551 is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/recherche.html:267: +* At _site/en/lessons/working-with-web-pages.html:175: - http://jitp.commons.gc.cuny.edu/review-of-the-programming-historian/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/recherche.html:279: +* At _site/en/lessons/working-with-web-pages.html:201: - http://www.digitalhumanities.org/dhq/vol/16/2/000585/000585.html is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/recherche.html:285: +* At _site/en/privacy-policy.html:86: - http://www.tandfonline.com/doi/full/10.1080/13555502.2017.1301179 is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/recherche.html:287: +* At _site/en/privacy-policy.html:105: - http://web.archive.org/web/20180713014622/http://dhcommons.org/journal/issue-1/editorial-sustainability-and-open-peer-review-programming-historian is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/recherche.html:288: +* At _site/en/privacy-policy.html:142: - http://www.themacroscope.org/2.0/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/recherche.html:296: +* At _site/en/privacy-policy.html:168: - http://doi.org/10.5281/zenodo.3813763 is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/recherche.html:309: +* At _site/en/project-team.html:86: - http://www.iea.usp.br/eventos/historia-digital-educacao-caminhos-cruzados is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/recherche.html:396: +* At _site/en/project-team.html:105: - http://ixa2.si.ehu.eus/intele/?q=webinars is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/recherche.html:399: +* At _site/en/project-team.html:142: - http://walshbr.com/blog/the-programming-historian-and-editorial-process-in-digital-publishing/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/recherche.html:412: +* At _site/en/project-team.html:168: - http://fredgibbs.net/assets/images/ph-poster/final-board.png is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/recherche.html:425: +* At _site/en/research.html:86: - http://niche-canada.org/2018/03/23/a-decade-of-programming-historians/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/recherche.html:426: +* At _site/en/research.html:105: - http://fredgibbs.net/posts/reflections-former-PH-editor is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/recherche.html:427: +* At _site/en/research.html:142: - http://clionauta.hypotheses.org/16979 is not an HTTPS link + 'a' tag is missing a reference -* At _site/fr/recherche.html:429: +* At _site/en/research.html:168: - http://humanidadesdigitales.net/blog/2017/03/17/the-programming-historian-en-espanol/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/index.html:87: +* At _site/en/reviewer-guidelines.html:86: 'a' tag is missing a reference -* At _site/index.html:106: +* At _site/en/reviewer-guidelines.html:105: 'a' tag is missing a reference -* At _site/index.html:143: +* At _site/en/reviewer-guidelines.html:142: 'a' tag is missing a reference -* At _site/index.html:169: +* At _site/en/reviewer-guidelines.html:168: 'a' tag is missing a reference -* At _site/posts/2022-in-review.html:105: +* At _site/en/supporters.html:86: 'a' tag is missing a reference -* At _site/posts/2022-in-review.html:124: +* At _site/en/supporters.html:105: 'a' tag is missing a reference -* At _site/posts/2022-in-review.html:161: +* At _site/en/supporters.html:142: 'a' tag is missing a reference -* At _site/posts/2022-in-review.html:187: +* At _site/en/supporters.html:168: 'a' tag is missing a reference -* At _site/posts/2022-in-review.html:287: +* At _site/en/translator-guidelines.html:86: - [https://programminghistorian.org/en/about] is an invalid URL + 'a' tag is missing a reference -* At _site/posts/DH-Award-2017.html:106: +* At _site/en/translator-guidelines.html:105: 'a' tag is missing a reference -* At _site/posts/DH-Award-2017.html:125: +* At _site/en/translator-guidelines.html:142: 'a' tag is missing a reference -* At _site/posts/DH-Award-2017.html:162: +* At _site/en/translator-guidelines.html:168: 'a' tag is missing a reference -* At _site/posts/DH-Award-2017.html:188: +* At _site/en/vacancies.html:86: 'a' tag is missing a reference -* At _site/posts/DH-Award-2017.html:281: +* At _site/en/vacancies.html:105: - http://dhawards.org/dhawards2017/results/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/FR-team.html:107: +* At _site/en/vacancies.html:142: 'a' tag is missing a reference -* At _site/posts/FR-team.html:126: +* At _site/en/vacancies.html:168: 'a' tag is missing a reference -* At _site/posts/FR-team.html:163: +* At _site/es/acerca-de.html:89: 'a' tag is missing a reference -* At _site/posts/FR-team.html:189: +* At _site/es/acerca-de.html:108: 'a' tag is missing a reference -* At _site/posts/FR-team.html:285: +* At _site/es/acerca-de.html:145: - http://www.parthenos-project.eu/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/FR-team.html:285: +* At _site/es/acerca-de.html:182: - http://www.iperionch.eu/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/FR-team.html:289: +* At _site/es/colaboradores.html:88: - http://www.humanisti.ca/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/Open-Education-Week.html:107: +* At _site/es/colaboradores.html:107: 'a' tag is missing a reference -* At _site/posts/Open-Education-Week.html:126: +* At _site/es/colaboradores.html:144: 'a' tag is missing a reference -* At _site/posts/Open-Education-Week.html:163: +* At _site/es/colaboradores.html:181: 'a' tag is missing a reference -* At _site/posts/Open-Education-Week.html:189: +* At _site/es/contribuciones.html:88: 'a' tag is missing a reference -* At _site/posts/PH-TNA-JISC-event-2-annoucement.html:107: +* At _site/es/contribuciones.html:107: 'a' tag is missing a reference -* At _site/posts/PH-TNA-JISC-event-2-annoucement.html:126: +* At _site/es/contribuciones.html:144: 'a' tag is missing a reference -* At _site/posts/PH-TNA-JISC-event-2-annoucement.html:163: +* At _site/es/contribuciones.html:181: 'a' tag is missing a reference -* At _site/posts/PH-TNA-JISC-event-2-annoucement.html:189: +* At _site/es/donaciones.html:88: 'a' tag is missing a reference -* At _site/posts/PH-commitment-to-diversity.html:107: +* At _site/es/donaciones.html:107: 'a' tag is missing a reference -* At _site/posts/PH-commitment-to-diversity.html:126: +* At _site/es/donaciones.html:144: 'a' tag is missing a reference -* At _site/posts/PH-commitment-to-diversity.html:163: +* At _site/es/donaciones.html:181: 'a' tag is missing a reference -* At _site/posts/PH-commitment-to-diversity.html:189: +* At _site/es/equipo-de-proyecto.html:88: 'a' tag is missing a reference -* At _site/posts/PH-commitment-to-diversity.html:283: +* At _site/es/equipo-de-proyecto.html:107: - http://www.aauw.org/research/why-so-few/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/PH-commitment-to-diversity.html:371: +* At _site/es/equipo-de-proyecto.html:144: - http://web.archive.org/web/20160507170231/http://www.usnews.com/news/blogs/data-mine/2016/02/18/study-shows-women-are-better-coders-but-only-when-gender-is-hidden is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/PH-commitment-to-diversity.html:377: +* At _site/es/equipo-de-proyecto.html:181: - http://www.pyladies.com/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/PH-commitment-to-diversity.html:377: +* At _site/es/eventos.html:88: - http://femtechnet.org/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/PH-contributors.html:107: +* At _site/es/eventos.html:107: 'a' tag is missing a reference -* At _site/posts/PH-contributors.html:126: +* At _site/es/eventos.html:144: 'a' tag is missing a reference -* At _site/posts/PH-contributors.html:163: +* At _site/es/eventos.html:181: 'a' tag is missing a reference -* At _site/posts/PH-contributors.html:189: +* At _site/es/guia-editor.html:88: 'a' tag is missing a reference -* At _site/posts/PH-espanol-in-DH2018.html:107: +* At _site/es/guia-editor.html:107: 'a' tag is missing a reference -* At _site/posts/PH-espanol-in-DH2018.html:126: +* At _site/es/guia-editor.html:144: 'a' tag is missing a reference -* At _site/posts/PH-espanol-in-DH2018.html:163: +* At _site/es/guia-editor.html:181: 'a' tag is missing a reference -* At _site/posts/PH-espanol-in-DH2018.html:189: +* At _site/es/guia-para-autores.html:88: 'a' tag is missing a reference -* At _site/posts/Uses-Of-The-Programming-Historian.html:105: +* At _site/es/guia-para-autores.html:107: 'a' tag is missing a reference -* At _site/posts/Uses-Of-The-Programming-Historian.html:124: +* At _site/es/guia-para-autores.html:144: 'a' tag is missing a reference -* At _site/posts/Uses-Of-The-Programming-Historian.html:161: +* At _site/es/guia-para-autores.html:181: 'a' tag is missing a reference -* At _site/posts/Uses-Of-The-Programming-Historian.html:187: +* At _site/es/guia-para-revisores.html:88: 'a' tag is missing a reference -* At _site/posts/Uses-Of-The-Programming-Historian.html:291: +* At _site/es/guia-para-revisores.html:107: - http://discontents.com.au/unremembering-the-forgotten is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/Uses-Of-The-Programming-Historian.html:293: +* At _site/es/guia-para-revisores.html:144: - http://lj.libraryjournal.com/2014/09/opinion/not-dead-yet/connecting-researchers-to-new-digital-tools-not-dead-yet/#_ is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/Uses-Of-The-Programming-Historian.html:294: +* At _site/es/guia-para-revisores.html:181: - http://muse.jhu.edu/login?auth=0&type=summary&url=/journals/ieee_annals_of_the_history_of_computing/v036/36.2.turkel.html is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/Uses-Of-The-Programming-Historian.html:295: +* At _site/es/guia-para-traductores.html:88: - http://journalofdigitalhumanities.org/2-1/dh-contribution-to-topic-modeling/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/Uses-Of-The-Programming-Historian.html:296: +* At _site/es/guia-para-traductores.html:107: - http://tedunderwood.com/2012/12/14/what-can-topic-models-of-pmla-teach-us-about-the-history-of-literary-scholarship/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/Uses-Of-The-Programming-Historian.html:297: +* At _site/es/guia-para-traductores.html:144: - http://aisel.aisnet.org/cgi/viewcontent.cgi?article=1072&context=amcis2009 is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/Uses-Of-The-Programming-Historian.html:307: +* At _site/es/guia-para-traductores.html:181: - http://grad.craftingdigitalhistory.ca/weekly.html is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/Uses-Of-The-Programming-Historian.html:317: +* At _site/es/index.html:90: - http://adamcrymble.org/intro-to-digital-history-2015/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/Uses-Of-The-Programming-Historian.html:318: +* At _site/es/index.html:109: - http://web.archive.org/web/20150905233647/https://library.uoregon.edu/node/4570 is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/Uses-Of-The-Programming-Historian.html:325: +* At _site/es/index.html:146: - http://lincolnmullen.com/files/clio3.syllabus.hist698.2014f.pdf is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/Uses-Of-The-Programming-Historian.html:326: +* At _site/es/index.html:183: - http://www.wilkohardenberg.net/content/Hardenberg_DigitalHistory_Hist795.pdf is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/Uses-Of-The-Programming-Historian.html:327: +* At _site/es/investigacion.html:88: - http://www.christophermchurch.com/draft-for-new-course-digital-toolbox-for-historians-unr/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/Uses-Of-The-Programming-Historian.html:330: +* At _site/es/investigacion.html:107: - http://intro-dh-2014.andyschocket.net/syllabus/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/Uses-Of-The-Programming-Historian.html:331: +* At _site/es/investigacion.html:144: - http://dh2014.umwblogs.org/syllabus/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/Uses-Of-The-Programming-Historian.html:332: +* At _site/es/investigacion.html:181: - http://devdh.org/files/downloads/Guiliano_Digital_History_Syllabus_Fall2014_IUPUI.pdf is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/Uses-Of-The-Programming-Historian.html:341: +* At _site/es/jisc-tna-colaboracion.html:88: - http://web.archive.org/web/20180127231436/http://www.english.upenn.edu/~jenglish/Courses/Fall2014/505Syllabus.pdf is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/Uses-Of-The-Programming-Historian.html:342: +* At _site/es/jisc-tna-colaboracion.html:107: - http://digitalhumanities.rice.edu/fall-2013-syllabus/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/Uses-Of-The-Programming-Historian.html:344: +* At _site/es/jisc-tna-colaboracion.html:144: - http://dh.chadblack.net/info/syllabus/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/Uses-Of-The-Programming-Historian.html:351: +* At _site/es/jisc-tna-colaboracion.html:181: - http://www.scottbot.net/HIAL/wp-content/uploads/2012/09/Wilkens_DH_Syllabus_Init.pdf is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/Uses-Of-The-Programming-Historian.html:375: +* At _site/es/lecciones/administracion-de-datos-en-r.html:119: - http://prosop.org is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/ad-hoc-translation.html:105: +* At _site/es/lecciones/administracion-de-datos-en-r.html:138: 'a' tag is missing a reference -* At _site/posts/ad-hoc-translation.html:124: +* At _site/es/lecciones/administracion-de-datos-en-r.html:175: 'a' tag is missing a reference -* At _site/posts/ad-hoc-translation.html:161: +* At _site/es/lecciones/administracion-de-datos-en-r.html:212: 'a' tag is missing a reference -* At _site/posts/ad-hoc-translation.html:187: +* At _site/es/lecciones/analisis-de-corpus-con-antconc.html:119: 'a' tag is missing a reference -* At _site/posts/adding-to-library-catalogue.html:105: +* At _site/es/lecciones/analisis-de-corpus-con-antconc.html:138: 'a' tag is missing a reference -* At _site/posts/adding-to-library-catalogue.html:124: +* At _site/es/lecciones/analisis-de-corpus-con-antconc.html:175: 'a' tag is missing a reference -* At _site/posts/adding-to-library-catalogue.html:161: +* At _site/es/lecciones/analisis-de-corpus-con-antconc.html:212: 'a' tag is missing a reference -* At _site/posts/adding-to-library-catalogue.html:187: +* At _site/es/lecciones/analisis-de-correspondencia-en-r.html:139: 'a' tag is missing a reference -* At _site/posts/adding-to-library-catalogue.html:279: +* At _site/es/lecciones/analisis-de-correspondencia-en-r.html:158: - http://purdue-primo-prod.hosted.exlibrisgroup.com/PURDUE:everything:PURDUE_ALMA51671812890001081 is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/adding-to-library-catalogue.html:279: +* At _site/es/lecciones/analisis-de-correspondencia-en-r.html:195: - http://www.worldcat.org/oclc/951537099 is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/adding-to-library-catalogue.html:281: +* At _site/es/lecciones/analisis-de-correspondencia-en-r.html:232: - http://www.twitter.com/Literature_Geek is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/adding-to-library-catalogue.html:287: +* At _site/es/lecciones/analisis-de-sentimientos-r.html:117: - http://www.worldcat.org/oclc/951537099 is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/adding-to-library-catalogue.html:287: +* At _site/es/lecciones/analisis-de-sentimientos-r.html:136: - http://purdue-primo-prod.hosted.exlibrisgroup.com/PURDUE:everything:PURDUE_ALMA51671812890001081 is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/anisa-welcome.html:105: +* At _site/es/lecciones/analisis-de-sentimientos-r.html:173: 'a' tag is missing a reference -* At _site/posts/anisa-welcome.html:124: +* At _site/es/lecciones/analisis-de-sentimientos-r.html:210: 'a' tag is missing a reference -* At _site/posts/anisa-welcome.html:161: +* At _site/es/lecciones/analisis-redes-sociales-teatro-1.html:117: 'a' tag is missing a reference -* At _site/posts/anisa-welcome.html:187: +* At _site/es/lecciones/analisis-redes-sociales-teatro-1.html:136: 'a' tag is missing a reference -* At _site/posts/anna-maria-sichani.html:108: +* At _site/es/lecciones/analisis-redes-sociales-teatro-1.html:173: 'a' tag is missing a reference -* At _site/posts/anna-maria-sichani.html:127: +* At _site/es/lecciones/analisis-redes-sociales-teatro-1.html:210: 'a' tag is missing a reference -* At _site/posts/anna-maria-sichani.html:164: +* At _site/es/lecciones/analisis-redes-sociales-teatro-2.html:117: 'a' tag is missing a reference -* At _site/posts/anna-maria-sichani.html:190: +* At _site/es/lecciones/analisis-redes-sociales-teatro-2.html:136: 'a' tag is missing a reference -* At _site/posts/anna-maria-sichani.html:285: +* At _site/es/lecciones/analisis-redes-sociales-teatro-2.html:173: - http://adho.org/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/announcing-new-team-spanish-language-editors.html:107: +* At _site/es/lecciones/analisis-redes-sociales-teatro-2.html:210: 'a' tag is missing a reference -* At _site/posts/announcing-new-team-spanish-language-editors.html:126: +* At _site/es/lecciones/analisis-temporal-red.html:122: 'a' tag is missing a reference -* At _site/posts/announcing-new-team-spanish-language-editors.html:163: +* At _site/es/lecciones/analisis-temporal-red.html:141: 'a' tag is missing a reference -* At _site/posts/announcing-new-team-spanish-language-editors.html:189: +* At _site/es/lecciones/analisis-temporal-red.html:178: 'a' tag is missing a reference -* At _site/posts/announcing-new-team-spanish-language-editors.html:291: +* At _site/es/lecciones/analisis-temporal-red.html:215: - http://eadh.org/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/appel-a-propositions.html:107: +* At _site/es/lecciones/analisis-voyant-tools.html:120: 'a' tag is missing a reference -* At _site/posts/appel-a-propositions.html:126: +* At _site/es/lecciones/analisis-voyant-tools.html:139: 'a' tag is missing a reference -* At _site/posts/appel-a-propositions.html:163: +* At _site/es/lecciones/analisis-voyant-tools.html:176: 'a' tag is missing a reference -* At _site/posts/appel-a-propositions.html:189: +* At _site/es/lecciones/analisis-voyant-tools.html:213: 'a' tag is missing a reference -* At _site/posts/appel-a-traductions.html:105: +* At _site/es/lecciones/construir-repositorio-de-fuentes.html:120: 'a' tag is missing a reference -* At _site/posts/appel-a-traductions.html:124: +* At _site/es/lecciones/construir-repositorio-de-fuentes.html:139: 'a' tag is missing a reference -* At _site/posts/appel-a-traductions.html:161: +* At _site/es/lecciones/construir-repositorio-de-fuentes.html:176: 'a' tag is missing a reference -* At _site/posts/appel-a-traductions.html:187: +* At _site/es/lecciones/construir-repositorio-de-fuentes.html:213: 'a' tag is missing a reference -* At _site/posts/articles-selected-ph-jisc-tna.html:107: +* At _site/es/lecciones/construir-repositorio-de-fuentes.html:792: 'a' tag is missing a reference -* At _site/posts/articles-selected-ph-jisc-tna.html:126: +* At _site/es/lecciones/contar-frecuencias.html:121: 'a' tag is missing a reference -* At _site/posts/articles-selected-ph-jisc-tna.html:163: +* At _site/es/lecciones/contar-frecuencias.html:140: 'a' tag is missing a reference -* At _site/posts/articles-selected-ph-jisc-tna.html:189: +* At _site/es/lecciones/contar-frecuencias.html:177: 'a' tag is missing a reference -* At _site/posts/bienvenue-ph-fr.html:105: +* At _site/es/lecciones/contar-frecuencias.html:214: 'a' tag is missing a reference -* At _site/posts/bienvenue-ph-fr.html:124: +* At _site/es/lecciones/corpus-paralelo-lfaligner.html:121: 'a' tag is missing a reference -* At _site/posts/bienvenue-ph-fr.html:161: +* At _site/es/lecciones/corpus-paralelo-lfaligner.html:140: 'a' tag is missing a reference -* At _site/posts/bienvenue-ph-fr.html:187: +* At _site/es/lecciones/corpus-paralelo-lfaligner.html:177: 'a' tag is missing a reference -* At _site/posts/bienvenue-ph-fr.html:279: +* At _site/es/lecciones/corpus-paralelo-lfaligner.html:214: - http://programminghistorian.org/fr is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/bienvenue-ph-fr.html:285: +* At _site/es/lecciones/creacion-de-aplicacion-shiny.html:119: - http://dsharp.library.cmu.edu/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/bienvenue-ph-fr.html:289: +* At _site/es/lecciones/creacion-de-aplicacion-shiny.html:138: - http://archives.mundaneum.org/fr/versions-digitalisees/schema-de-paul-otlet-documentation-et-telecommunication is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/bogota-workshop-report.html:107: +* At _site/es/lecciones/creacion-de-aplicacion-shiny.html:175: 'a' tag is missing a reference -* At _site/posts/bogota-workshop-report.html:126: +* At _site/es/lecciones/creacion-de-aplicacion-shiny.html:212: 'a' tag is missing a reference -* At _site/posts/bogota-workshop-report.html:163: +* At _site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html:119: 'a' tag is missing a reference -* At _site/posts/bogota-workshop-report.html:189: +* At _site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html:138: 'a' tag is missing a reference -* At _site/posts/bolentin-informativo.html:105: +* At _site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html:175: 'a' tag is missing a reference -* At _site/posts/bolentin-informativo.html:124: +* At _site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html:212: 'a' tag is missing a reference -* At _site/posts/bolentin-informativo.html:161: +* At _site/es/lecciones/crear-exposicion-con-omeka.html:121: 'a' tag is missing a reference -* At _site/posts/bolentin-informativo.html:187: +* At _site/es/lecciones/crear-exposicion-con-omeka.html:140: 'a' tag is missing a reference -* At _site/posts/bolentin-informativo.html:318: +* At _site/es/lecciones/crear-exposicion-con-omeka.html:177: - http://ach2019.ach.org is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/bolentin-informativo.html:326: +* At _site/es/lecciones/crear-exposicion-con-omeka.html:214: - http://humanidadesdigitaleshispanicas.es/resolucion-convocatoria-i-edicion-premios-hdh/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/boletin-informativo-junio20.html:107: +* At _site/es/lecciones/crear-y-ver-archivos-html-con-python.html:121: 'a' tag is missing a reference -* At _site/posts/boletin-informativo-junio20.html:126: +* At _site/es/lecciones/crear-y-ver-archivos-html-con-python.html:140: 'a' tag is missing a reference -* At _site/posts/boletin-informativo-junio20.html:163: +* At _site/es/lecciones/crear-y-ver-archivos-html-con-python.html:177: 'a' tag is missing a reference -* At _site/posts/boletin-informativo-junio20.html:189: +* At _site/es/lecciones/crear-y-ver-archivos-html-con-python.html:214: 'a' tag is missing a reference -* At _site/posts/boletin-informativo-march20.html:107: +* At _site/es/lecciones/datos-abiertos-enlazados-wikidata.html:119: 'a' tag is missing a reference -* At _site/posts/boletin-informativo-march20.html:126: +* At _site/es/lecciones/datos-abiertos-enlazados-wikidata.html:138: 'a' tag is missing a reference -* At _site/posts/boletin-informativo-march20.html:163: +* At _site/es/lecciones/datos-abiertos-enlazados-wikidata.html:175: 'a' tag is missing a reference -* At _site/posts/boletin-informativo-march20.html:189: +* At _site/es/lecciones/datos-abiertos-enlazados-wikidata.html:212: 'a' tag is missing a reference -* At _site/posts/boletin-informativo-oct20.html:105: +* At _site/es/lecciones/datos-de-investigacion-con-unix.html:121: 'a' tag is missing a reference -* At _site/posts/boletin-informativo-oct20.html:124: +* At _site/es/lecciones/datos-de-investigacion-con-unix.html:140: 'a' tag is missing a reference -* At _site/posts/boletin-informativo-oct20.html:161: +* At _site/es/lecciones/datos-de-investigacion-con-unix.html:177: 'a' tag is missing a reference -* At _site/posts/boletin-informativo-oct20.html:187: +* At _site/es/lecciones/datos-de-investigacion-con-unix.html:214: 'a' tag is missing a reference -* At _site/posts/boletin-informativo.html:107: +* At _site/es/lecciones/datos-tabulares-en-r.html:119: 'a' tag is missing a reference -* At _site/posts/boletin-informativo.html:126: +* At _site/es/lecciones/datos-tabulares-en-r.html:138: 'a' tag is missing a reference -* At _site/posts/boletin-informativo.html:163: +* At _site/es/lecciones/datos-tabulares-en-r.html:175: 'a' tag is missing a reference -* At _site/posts/boletin-informativo.html:189: +* At _site/es/lecciones/datos-tabulares-en-r.html:212: 'a' tag is missing a reference -* At _site/posts/buletin-de-information.html:105: +* At _site/es/lecciones/de-html-a-lista-de-palabras-1.html:121: 'a' tag is missing a reference -* At _site/posts/buletin-de-information.html:124: +* At _site/es/lecciones/de-html-a-lista-de-palabras-1.html:140: 'a' tag is missing a reference -* At _site/posts/buletin-de-information.html:161: +* At _site/es/lecciones/de-html-a-lista-de-palabras-1.html:177: 'a' tag is missing a reference -* At _site/posts/buletin-de-information.html:187: +* At _site/es/lecciones/de-html-a-lista-de-palabras-1.html:214: 'a' tag is missing a reference -* At _site/posts/buletin-de-information.html:315: +* At _site/es/lecciones/de-html-a-lista-de-palabras-2.html:121: - http://ach2019.ach.org is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/bulletin-de-information-juin20.html:107: +* At _site/es/lecciones/de-html-a-lista-de-palabras-2.html:140: 'a' tag is missing a reference -* At _site/posts/bulletin-de-information-juin20.html:126: +* At _site/es/lecciones/de-html-a-lista-de-palabras-2.html:177: 'a' tag is missing a reference -* At _site/posts/bulletin-de-information-juin20.html:163: +* At _site/es/lecciones/de-html-a-lista-de-palabras-2.html:214: 'a' tag is missing a reference -* At _site/posts/bulletin-de-information-juin20.html:189: +* At _site/es/lecciones/descarga-automatizada-con-wget.html:119: 'a' tag is missing a reference -* At _site/posts/bulletin-de-information-juin20.html:393: +* At _site/es/lecciones/descarga-automatizada-con-wget.html:138: 'a' tag is missing a reference -* At _site/posts/bulletin-de-information-march20.html:108: +* At _site/es/lecciones/descarga-automatizada-con-wget.html:175: 'a' tag is missing a reference -* At _site/posts/bulletin-de-information-march20.html:127: +* At _site/es/lecciones/descarga-automatizada-con-wget.html:212: 'a' tag is missing a reference -* At _site/posts/bulletin-de-information-march20.html:164: +* At _site/es/lecciones/descarga-multiples-registros-usando-cadenas-de-consulta.html:119: 'a' tag is missing a reference -* At _site/posts/bulletin-de-information-march20.html:190: +* At _site/es/lecciones/descarga-multiples-registros-usando-cadenas-de-consulta.html:138: 'a' tag is missing a reference -* At _site/posts/bulletin-de-information-march20.html:345: +* At _site/es/lecciones/descarga-multiples-registros-usando-cadenas-de-consulta.html:175: 'a' tag is missing a reference -* At _site/posts/bulletin-de-information-oct20.html:105: +* At _site/es/lecciones/descarga-multiples-registros-usando-cadenas-de-consulta.html:212: 'a' tag is missing a reference -* At _site/posts/bulletin-de-information-oct20.html:124: +* At _site/es/lecciones/editar-audio-con-audacity.html:119: 'a' tag is missing a reference -* At _site/posts/bulletin-de-information-oct20.html:161: +* At _site/es/lecciones/editar-audio-con-audacity.html:138: 'a' tag is missing a reference -* At _site/posts/bulletin-de-information-oct20.html:187: +* At _site/es/lecciones/editar-audio-con-audacity.html:175: 'a' tag is missing a reference -* At _site/posts/bulletin-de-information-oct20.html:337: +* At _site/es/lecciones/editar-audio-con-audacity.html:212: 'a' tag is missing a reference -* At _site/posts/bulletin-de-information.html:107: +* At _site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html:121: 'a' tag is missing a reference -* At _site/posts/bulletin-de-information.html:126: +* At _site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html:140: 'a' tag is missing a reference -* At _site/posts/bulletin-de-information.html:163: +* At _site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html:177: 'a' tag is missing a reference -* At _site/posts/bulletin-de-information.html:189: +* At _site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html:214: 'a' tag is missing a reference -* At _site/posts/bulletin-de-information.html:331: +* At _site/es/lecciones/exhibicion-con-collection-builder.html:117: 'a' tag is missing a reference -* At _site/posts/bulletin-issue-01.html:105: +* At _site/es/lecciones/exhibicion-con-collection-builder.html:136: 'a' tag is missing a reference -* At _site/posts/bulletin-issue-01.html:124: +* At _site/es/lecciones/exhibicion-con-collection-builder.html:173: 'a' tag is missing a reference -* At _site/posts/bulletin-issue-01.html:161: +* At _site/es/lecciones/exhibicion-con-collection-builder.html:210: 'a' tag is missing a reference -* At _site/posts/bulletin-issue-01.html:187: +* At _site/es/lecciones/generadores-aventura.html:117: 'a' tag is missing a reference -* At _site/posts/bulletin-issue-01.html:287: +* At _site/es/lecciones/generadores-aventura.html:136: - http://dhawards.org/dhawards2022/results/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/bulletin-issue-02.html:105: +* At _site/es/lecciones/generadores-aventura.html:173: 'a' tag is missing a reference -* At _site/posts/bulletin-issue-02.html:124: +* At _site/es/lecciones/generadores-aventura.html:210: 'a' tag is missing a reference -* At _site/posts/bulletin-issue-02.html:161: +* At _site/es/lecciones/georreferenciacion-visualizacion-con-recogito-y-visone.html:119: 'a' tag is missing a reference -* At _site/posts/bulletin-issue-02.html:187: +* At _site/es/lecciones/georreferenciacion-visualizacion-con-recogito-y-visone.html:138: 'a' tag is missing a reference -* At _site/posts/bulletin-issue-03.html:105: +* At _site/es/lecciones/georreferenciacion-visualizacion-con-recogito-y-visone.html:175: 'a' tag is missing a reference -* At _site/posts/bulletin-issue-03.html:124: +* At _site/es/lecciones/georreferenciacion-visualizacion-con-recogito-y-visone.html:212: 'a' tag is missing a reference -* At _site/posts/bulletin-issue-03.html:161: +* At _site/es/lecciones/georreferenciar-qgis.html:126: 'a' tag is missing a reference -* At _site/posts/bulletin-issue-03.html:187: +* At _site/es/lecciones/georreferenciar-qgis.html:145: 'a' tag is missing a reference -* At _site/posts/bulletin-issue-03.html:347: +* At _site/es/lecciones/georreferenciar-qgis.html:182: - http://tinyurl.com/PH-patreon is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/bulletin-issue-04.html:105: +* At _site/es/lecciones/georreferenciar-qgis.html:219: 'a' tag is missing a reference -* At _site/posts/bulletin-issue-04.html:124: +* At _site/es/lecciones/index.html:88: 'a' tag is missing a reference -* At _site/posts/bulletin-issue-04.html:161: +* At _site/es/lecciones/index.html:107: 'a' tag is missing a reference -* At _site/posts/bulletin-issue-04.html:187: +* At _site/es/lecciones/index.html:144: 'a' tag is missing a reference -* At _site/posts/bulletin-issue-04.html:325: +* At _site/es/lecciones/index.html:181: - http://dhawards.org/dhawards2022/results/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/bulletin-issue-05.html:105: +* At _site/es/lecciones/instalacion-linux.html:121: 'a' tag is missing a reference -* At _site/posts/bulletin-issue-05.html:124: +* At _site/es/lecciones/instalacion-linux.html:140: 'a' tag is missing a reference -* At _site/posts/bulletin-issue-05.html:161: +* At _site/es/lecciones/instalacion-linux.html:177: 'a' tag is missing a reference -* At _site/posts/bulletin-issue-05.html:187: +* At _site/es/lecciones/instalacion-linux.html:214: 'a' tag is missing a reference -* At _site/posts/bulletin-issue-06.html:105: +* At _site/es/lecciones/instalacion-mac.html:121: 'a' tag is missing a reference -* At _site/posts/bulletin-issue-06.html:124: +* At _site/es/lecciones/instalacion-mac.html:140: 'a' tag is missing a reference -* At _site/posts/bulletin-issue-06.html:161: +* At _site/es/lecciones/instalacion-mac.html:177: 'a' tag is missing a reference -* At _site/posts/bulletin-issue-06.html:187: +* At _site/es/lecciones/instalacion-mac.html:214: 'a' tag is missing a reference -* At _site/posts/bulletin-issue-06.html:337: +* At _site/es/lecciones/instalacion-windows.html:121: - http://tinyurl.com/support-PH is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/call-for-editors-en.html:105: +* At _site/es/lecciones/instalacion-windows.html:140: 'a' tag is missing a reference -* At _site/posts/call-for-editors-en.html:124: +* At _site/es/lecciones/instalacion-windows.html:177: 'a' tag is missing a reference -* At _site/posts/call-for-editors-en.html:161: +* At _site/es/lecciones/instalacion-windows.html:214: 'a' tag is missing a reference -* At _site/posts/call-for-editors-en.html:187: +* At _site/es/lecciones/instalar-modulos-python-pip.html:119: 'a' tag is missing a reference -* At _site/posts/call-for-editors-en.html:288: +* At _site/es/lecciones/instalar-modulos-python-pip.html:138: - http://jitp.commons.gc.cuny.edu/review-of-the-programming-historian/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/call-for-editors.html:105: +* At _site/es/lecciones/instalar-modulos-python-pip.html:175: 'a' tag is missing a reference -* At _site/posts/call-for-editors.html:124: +* At _site/es/lecciones/instalar-modulos-python-pip.html:212: 'a' tag is missing a reference -* At _site/posts/call-for-editors.html:161: +* At _site/es/lecciones/intro-a-google-maps-y-google-earth.html:126: 'a' tag is missing a reference -* At _site/posts/call-for-editors.html:187: +* At _site/es/lecciones/intro-a-google-maps-y-google-earth.html:145: 'a' tag is missing a reference -* At _site/posts/call-for-editors.html:288: +* At _site/es/lecciones/intro-a-google-maps-y-google-earth.html:182: - http://jitp.commons.gc.cuny.edu/review-of-the-programming-historian/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/call-for-fr-members.html:105: +* At _site/es/lecciones/intro-a-google-maps-y-google-earth.html:219: 'a' tag is missing a reference -* At _site/posts/call-for-fr-members.html:124: +* At _site/es/lecciones/introduccion-a-bash.html:121: 'a' tag is missing a reference -* At _site/posts/call-for-fr-members.html:161: +* At _site/es/lecciones/introduccion-a-bash.html:140: 'a' tag is missing a reference -* At _site/posts/call-for-fr-members.html:187: +* At _site/es/lecciones/introduccion-a-bash.html:177: 'a' tag is missing a reference -* At _site/posts/call-to-action.html:107: +* At _site/es/lecciones/introduccion-a-bash.html:214: 'a' tag is missing a reference -* At _site/posts/call-to-action.html:126: +* At _site/es/lecciones/introduccion-a-ffmpeg.html:119: 'a' tag is missing a reference -* At _site/posts/call-to-action.html:163: +* At _site/es/lecciones/introduccion-a-ffmpeg.html:138: 'a' tag is missing a reference -* At _site/posts/call-to-action.html:189: +* At _site/es/lecciones/introduccion-a-ffmpeg.html:175: 'a' tag is missing a reference -* At _site/posts/call-to-action.html:285: +* At _site/es/lecciones/introduccion-a-ffmpeg.html:212: 'a' tag is missing a reference -* At _site/posts/call-to-action.html:295: +* At _site/es/lecciones/introduccion-a-imageplot-y-la-visualizacion-de-metadatos.html:121: - http://languagelog.ldc.upenn.edu/nll/?p=5315 is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/cfp-jisc-ph.html:107: +* At _site/es/lecciones/introduccion-a-imageplot-y-la-visualizacion-de-metadatos.html:140: 'a' tag is missing a reference -* At _site/posts/cfp-jisc-ph.html:126: +* At _site/es/lecciones/introduccion-a-imageplot-y-la-visualizacion-de-metadatos.html:177: 'a' tag is missing a reference -* At _site/posts/cfp-jisc-ph.html:163: +* At _site/es/lecciones/introduccion-a-imageplot-y-la-visualizacion-de-metadatos.html:214: 'a' tag is missing a reference -* At _site/posts/cfp-jisc-ph.html:189: +* At _site/es/lecciones/introduccion-a-markdown.html:122: 'a' tag is missing a reference -* At _site/posts/cfp-jisc-ph.html:308: +* At _site/es/lecciones/introduccion-a-markdown.html:141: - http://go-dh.github.io/mincomp/about/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/charlotte-welcome.html:105: +* At _site/es/lecciones/introduccion-a-markdown.html:178: 'a' tag is missing a reference -* At _site/posts/charlotte-welcome.html:124: +* At _site/es/lecciones/introduccion-a-markdown.html:215: 'a' tag is missing a reference -* At _site/posts/charlotte-welcome.html:161: +* At _site/es/lecciones/introduccion-a-markdown.html:748: - 'a' tag is missing a reference + http://programminghistorian.org/ is not an HTTPS link -* At _site/posts/charlotte-welcome.html:187: +* At _site/es/lecciones/introduccion-a-powershell.html:119: 'a' tag is missing a reference -* At _site/posts/convocatoria-de-tutoriales.html:105: +* At _site/es/lecciones/introduccion-a-powershell.html:138: 'a' tag is missing a reference -* At _site/posts/convocatoria-de-tutoriales.html:124: +* At _site/es/lecciones/introduccion-a-powershell.html:175: 'a' tag is missing a reference -* At _site/posts/convocatoria-de-tutoriales.html:161: +* At _site/es/lecciones/introduccion-a-powershell.html:212: 'a' tag is missing a reference -* At _site/posts/convocatoria-de-tutoriales.html:187: +* At _site/es/lecciones/introduccion-a-tei-1.html:117: 'a' tag is missing a reference -* At _site/posts/convocatoria-editor.html:105: +* At _site/es/lecciones/introduccion-a-tei-1.html:136: 'a' tag is missing a reference -* At _site/posts/convocatoria-editor.html:124: +* At _site/es/lecciones/introduccion-a-tei-1.html:173: 'a' tag is missing a reference -* At _site/posts/convocatoria-editor.html:161: +* At _site/es/lecciones/introduccion-a-tei-1.html:210: 'a' tag is missing a reference -* At _site/posts/convocatoria-editor.html:187: +* At _site/es/lecciones/introduccion-a-tei-2.html:117: 'a' tag is missing a reference -* At _site/posts/convocatoria-editores-2021.html:107: +* At _site/es/lecciones/introduccion-a-tei-2.html:136: 'a' tag is missing a reference -* At _site/posts/convocatoria-editores-2021.html:126: +* At _site/es/lecciones/introduccion-a-tei-2.html:173: 'a' tag is missing a reference -* At _site/posts/convocatoria-editores-2021.html:163: +* At _site/es/lecciones/introduccion-a-tei-2.html:210: 'a' tag is missing a reference -* At _site/posts/convocatoria-editores-2021.html:189: +* At _site/es/lecciones/introduccion-al-web-scraping-usando-r.html:117: 'a' tag is missing a reference -* At _site/posts/convocatoria-para-editores.html:105: +* At _site/es/lecciones/introduccion-al-web-scraping-usando-r.html:136: 'a' tag is missing a reference -* At _site/posts/convocatoria-para-editores.html:124: +* At _site/es/lecciones/introduccion-al-web-scraping-usando-r.html:173: 'a' tag is missing a reference -* At _site/posts/convocatoria-para-editores.html:161: +* At _site/es/lecciones/introduccion-al-web-scraping-usando-r.html:210: 'a' tag is missing a reference -* At _site/posts/convocatoria-para-editores.html:187: +* At _site/es/lecciones/introduccion-datos-abiertos-enlazados.html:119: 'a' tag is missing a reference -* At _site/posts/convocatoria-taller-PH-espanol.html:106: +* At _site/es/lecciones/introduccion-datos-abiertos-enlazados.html:138: 'a' tag is missing a reference -* At _site/posts/convocatoria-taller-PH-espanol.html:125: +* At _site/es/lecciones/introduccion-datos-abiertos-enlazados.html:175: 'a' tag is missing a reference -* At _site/posts/convocatoria-taller-PH-espanol.html:162: +* At _site/es/lecciones/introduccion-datos-abiertos-enlazados.html:212: 'a' tag is missing a reference -* At _site/posts/convocatoria-taller-PH-espanol.html:188: +* At _site/es/lecciones/introduccion-datos-abiertos-enlazados.html:1048: 'a' tag is missing a reference -* At _site/posts/corpus-linguistics-in-action.html:105: +* At _site/es/lecciones/introduccion-datos-abiertos-enlazados.html:1048: 'a' tag is missing a reference -* At _site/posts/corpus-linguistics-in-action.html:124: +* At _site/es/lecciones/introduccion-e-instalacion.html:121: 'a' tag is missing a reference -* At _site/posts/corpus-linguistics-in-action.html:161: +* At _site/es/lecciones/introduccion-e-instalacion.html:140: 'a' tag is missing a reference -* At _site/posts/corpus-linguistics-in-action.html:187: +* At _site/es/lecciones/introduccion-e-instalacion.html:177: 'a' tag is missing a reference -* At _site/posts/corpus-linguistics-in-action.html:279: +* At _site/es/lecciones/introduccion-e-instalacion.html:214: - http://clic.bham.ac.uk/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/corpus-linguistics-in-action.html:281: +* At _site/es/lecciones/introduccion-map-warper.html:122: - http://www.birmingham.ac.uk/schools/edacs/departments/englishlanguage/research/projects/clic/index.aspx is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/corpus-linguistics-in-action.html:287: +* At _site/es/lecciones/introduccion-map-warper.html:141: - http://clic.bham.ac.uk is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/corpus-linguistics-in-action.html:287: +* At _site/es/lecciones/introduccion-map-warper.html:178: - http://www.birmingham.ac.uk/schools/edacs/departments/englishlanguage/research/projects/clic/index.aspx is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/corpus-linguistics-in-action.html:354: +* At _site/es/lecciones/introduccion-map-warper.html:215: - http://www.gutenberg.org/ebooks/766 is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/corpus-linguistics-in-action.html:354: +* At _site/es/lecciones/lectura-escalable-de-datos-estructurados.html:127: - http://www.gutenberg.org/ebooks/821 is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/corpus-linguistics-in-action.html:430: +* At _site/es/lecciones/lectura-escalable-de-datos-estructurados.html:146: - http://www.euppublishing.com/doi/full/10.3366/cor.2016.0102 is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/december-newsletter.html:107: +* At _site/es/lecciones/lectura-escalable-de-datos-estructurados.html:183: 'a' tag is missing a reference -* At _site/posts/december-newsletter.html:126: +* At _site/es/lecciones/lectura-escalable-de-datos-estructurados.html:220: 'a' tag is missing a reference -* At _site/posts/december-newsletter.html:163: +* At _site/es/lecciones/limpieza-de-datos-con-OpenRefine.html:123: 'a' tag is missing a reference -* At _site/posts/december-newsletter.html:189: +* At _site/es/lecciones/limpieza-de-datos-con-OpenRefine.html:142: 'a' tag is missing a reference -* At _site/posts/december-newsletter.html:304: +* At _site/es/lecciones/limpieza-de-datos-con-OpenRefine.html:179: - http://ahlist.org/wp-content/uploads/2021/11/AHLIST-2021-PROGRAM_Virtual_FINAL.pdf is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/dh-award-2016.html:106: +* At _site/es/lecciones/limpieza-de-datos-con-OpenRefine.html:216: 'a' tag is missing a reference -* At _site/posts/dh-award-2016.html:125: +* At _site/es/lecciones/manipular-cadenas-de-caracteres-en-python.html:121: 'a' tag is missing a reference -* At _site/posts/dh-award-2016.html:162: +* At _site/es/lecciones/manipular-cadenas-de-caracteres-en-python.html:140: 'a' tag is missing a reference -* At _site/posts/dh-award-2016.html:188: +* At _site/es/lecciones/manipular-cadenas-de-caracteres-en-python.html:177: 'a' tag is missing a reference -* At _site/posts/dh-award-2016.html:281: +* At _site/es/lecciones/manipular-cadenas-de-caracteres-en-python.html:214: - http://dhawards.org/dhawards2016/results/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/dh-publishing-assistant.html:105: +* At _site/es/lecciones/mineria-de-datos-en-internet-archive.html:119: 'a' tag is missing a reference -* At _site/posts/dh-publishing-assistant.html:124: +* At _site/es/lecciones/mineria-de-datos-en-internet-archive.html:138: 'a' tag is missing a reference -* At _site/posts/dh-publishing-assistant.html:161: +* At _site/es/lecciones/mineria-de-datos-en-internet-archive.html:175: 'a' tag is missing a reference -* At _site/posts/dh-publishing-assistant.html:187: +* At _site/es/lecciones/mineria-de-datos-en-internet-archive.html:212: 'a' tag is missing a reference -* At _site/posts/digital-storytelling-immigrant-stories.html:105: +* At _site/es/lecciones/normalizar-datos.html:121: 'a' tag is missing a reference -* At _site/posts/digital-storytelling-immigrant-stories.html:124: +* At _site/es/lecciones/normalizar-datos.html:140: 'a' tag is missing a reference -* At _site/posts/digital-storytelling-immigrant-stories.html:161: +* At _site/es/lecciones/normalizar-datos.html:177: 'a' tag is missing a reference -* At _site/posts/digital-storytelling-immigrant-stories.html:187: +* At _site/es/lecciones/normalizar-datos.html:214: 'a' tag is missing a reference -* At _site/posts/digital-storytelling-immigrant-stories.html:279: +* At _site/es/lecciones/palabras-clave-en-contexto-n-grams.html:121: - http://cla.umn.edu/ihrc/research/immigrant-stories is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/digital-storytelling-immigrant-stories.html:281: +* At _site/es/lecciones/palabras-clave-en-contexto-n-grams.html:140: - http://immigrants.mndigital.org/exhibits/show/immigrantstories-exhibit/item/508 is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/digital-storytelling-immigrant-stories.html:285: +* At _site/es/lecciones/palabras-clave-en-contexto-n-grams.html:177: - http://immigrants.mndigital.org/exhibits/show/immigrantstories-exhibit is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/digital-storytelling-immigrant-stories.html:285: +* At _site/es/lecciones/palabras-clave-en-contexto-n-grams.html:214: - http://immigrants.mndigital.org/items/browse is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/digital-storytelling-immigrant-stories.html:287: +* At _site/es/lecciones/poniendo-omeka-a-funcionar.html:119: - http://immigrants.mndigital.org/exhibits/show/immigrantstories1975 is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/distant-reading-in-the-undergraduate-classroom.html:105: +* At _site/es/lecciones/poniendo-omeka-a-funcionar.html:138: 'a' tag is missing a reference -* At _site/posts/distant-reading-in-the-undergraduate-classroom.html:124: +* At _site/es/lecciones/poniendo-omeka-a-funcionar.html:175: 'a' tag is missing a reference -* At _site/posts/distant-reading-in-the-undergraduate-classroom.html:161: +* At _site/es/lecciones/poniendo-omeka-a-funcionar.html:212: 'a' tag is missing a reference -* At _site/posts/distant-reading-in-the-undergraduate-classroom.html:187: +* At _site/es/lecciones/preservar-datos-de-investigacion.html:119: 'a' tag is missing a reference -* At _site/posts/distant-reading-in-the-undergraduate-classroom.html:286: +* At _site/es/lecciones/preservar-datos-de-investigacion.html:138: - http://home.heinonline.org/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/distant-reading-in-the-undergraduate-classroom.html:288: +* At _site/es/lecciones/preservar-datos-de-investigacion.html:175: - http://voyant-tools.org/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/distant-reading-in-the-undergraduate-classroom.html:300: +* At _site/es/lecciones/preservar-datos-de-investigacion.html:212: - http://voyant-tools.org/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/distant-reading-in-the-undergraduate-classroom.html:312: +* At _site/es/lecciones/procesamiento-basico-de-textos-en-r.html:121: - http://www.themacroscope.org/?page_id=391 is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/distant-reading-in-the-undergraduate-classroom.html:314: +* At _site/es/lecciones/procesamiento-basico-de-textos-en-r.html:140: - http://tedunderwood.com/2015/06/04/seven-ways-humanists-are-using-computers-to-understand-text/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/dois-for-ph.html:107: +* At _site/es/lecciones/procesamiento-basico-de-textos-en-r.html:177: 'a' tag is missing a reference -* At _site/posts/dois-for-ph.html:126: +* At _site/es/lecciones/procesamiento-basico-de-textos-en-r.html:214: 'a' tag is missing a reference -* At _site/posts/dois-for-ph.html:163: +* At _site/es/lecciones/publicar-archivos-tei-ceteicean.html:119: 'a' tag is missing a reference -* At _site/posts/dois-for-ph.html:189: +* At _site/es/lecciones/publicar-archivos-tei-ceteicean.html:138: 'a' tag is missing a reference -* At _site/posts/edinburgh-workshop-2015.html:105: +* At _site/es/lecciones/publicar-archivos-tei-ceteicean.html:175: 'a' tag is missing a reference -* At _site/posts/edinburgh-workshop-2015.html:124: +* At _site/es/lecciones/publicar-archivos-tei-ceteicean.html:212: 'a' tag is missing a reference -* At _site/posts/edinburgh-workshop-2015.html:161: +* At _site/es/lecciones/retirada/introduccion-control-versiones-github-desktop.html:89: 'a' tag is missing a reference -* At _site/posts/edinburgh-workshop-2015.html:187: +* At _site/es/lecciones/retirada/introduccion-control-versiones-github-desktop.html:108: 'a' tag is missing a reference -* At _site/posts/education-and-community-lead.html:105: +* At _site/es/lecciones/retirada/introduccion-control-versiones-github-desktop.html:145: 'a' tag is missing a reference -* At _site/posts/education-and-community-lead.html:124: +* At _site/es/lecciones/retirada/introduccion-control-versiones-github-desktop.html:182: 'a' tag is missing a reference -* At _site/posts/education-and-community-lead.html:161: +* At _site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html:90: 'a' tag is missing a reference -* At _site/posts/education-and-community-lead.html:187: +* At _site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html:109: 'a' tag is missing a reference -* At _site/posts/en-call-for-lessons.html:107: +* At _site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html:146: 'a' tag is missing a reference -* At _site/posts/en-call-for-lessons.html:126: +* At _site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html:183: 'a' tag is missing a reference -* At _site/posts/en-call-for-lessons.html:163: +* At _site/es/lecciones/reutilizacion-de-codigo-y-modularidad.html:121: 'a' tag is missing a reference -* At _site/posts/en-call-for-lessons.html:189: +* At _site/es/lecciones/reutilizacion-de-codigo-y-modularidad.html:140: 'a' tag is missing a reference -* At _site/posts/en-call-for-proposals.html:107: +* At _site/es/lecciones/reutilizacion-de-codigo-y-modularidad.html:177: 'a' tag is missing a reference -* At _site/posts/en-call-for-proposals.html:126: +* At _site/es/lecciones/reutilizacion-de-codigo-y-modularidad.html:214: 'a' tag is missing a reference -* At _site/posts/en-call-for-proposals.html:163: +* At _site/es/lecciones/reutilizando-colecciones-digitales-glam-labs.html:123: 'a' tag is missing a reference -* At _site/posts/en-call-for-proposals.html:189: +* At _site/es/lecciones/reutilizando-colecciones-digitales-glam-labs.html:142: 'a' tag is missing a reference -* At _site/posts/es-buscamos-revisores.html:107: +* At _site/es/lecciones/reutilizando-colecciones-digitales-glam-labs.html:179: 'a' tag is missing a reference -* At _site/posts/es-buscamos-revisores.html:126: +* At _site/es/lecciones/reutilizando-colecciones-digitales-glam-labs.html:216: 'a' tag is missing a reference -* At _site/posts/es-buscamos-revisores.html:163: +* At _site/es/lecciones/salida-de-datos-como-archivo-html.html:121: 'a' tag is missing a reference -* At _site/posts/es-buscamos-revisores.html:189: +* At _site/es/lecciones/salida-de-datos-como-archivo-html.html:140: 'a' tag is missing a reference -* At _site/posts/fd-laramee.html:108: +* At _site/es/lecciones/salida-de-datos-como-archivo-html.html:177: 'a' tag is missing a reference -* At _site/posts/fd-laramee.html:127: +* At _site/es/lecciones/salida-de-datos-como-archivo-html.html:214: 'a' tag is missing a reference -* At _site/posts/fd-laramee.html:164: +* At _site/es/lecciones/salida-palabras-clave-contexto-ngrams.html:121: 'a' tag is missing a reference -* At _site/posts/fd-laramee.html:190: +* At _site/es/lecciones/salida-palabras-clave-contexto-ngrams.html:140: 'a' tag is missing a reference -* At _site/posts/first-teaching-workshop.html:107: +* At _site/es/lecciones/salida-palabras-clave-contexto-ngrams.html:177: 'a' tag is missing a reference -* At _site/posts/first-teaching-workshop.html:126: +* At _site/es/lecciones/salida-palabras-clave-contexto-ngrams.html:214: 'a' tag is missing a reference -* At _site/posts/first-teaching-workshop.html:163: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:119: 'a' tag is missing a reference -* At _site/posts/first-teaching-workshop.html:189: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:138: 'a' tag is missing a reference -* At _site/posts/full-text-search.html:107: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:175: 'a' tag is missing a reference -* At _site/posts/full-text-search.html:126: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:212: 'a' tag is missing a reference -* At _site/posts/full-text-search.html:163: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:556: 'a' tag is missing a reference -* At _site/posts/full-text-search.html:189: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:557: 'a' tag is missing a reference -* At _site/posts/gisele-welcome.html:105: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:558: 'a' tag is missing a reference -* At _site/posts/gisele-welcome.html:124: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:559: 'a' tag is missing a reference -* At _site/posts/gisele-welcome.html:161: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:562: 'a' tag is missing a reference -* At _site/posts/gisele-welcome.html:187: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:563: 'a' tag is missing a reference -* At _site/posts/history-of-protest.html:108: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:564: 'a' tag is missing a reference -* At _site/posts/history-of-protest.html:127: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:565: 'a' tag is missing a reference -* At _site/posts/history-of-protest.html:164: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:566: 'a' tag is missing a reference -* At _site/posts/history-of-protest.html:190: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:567: 'a' tag is missing a reference -* At _site/posts/history-of-protest.html:283: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:570: - http://www.manchesteruniversitypress.co.uk/9781526116703/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/history-of-protest.html:285: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:571: - http://labs.bl.uk/British+Library+Labs+Competition is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/history-of-protest.html:285: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:572: - http://politicalmeetingsmapper.co.uk is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/history-of-protest.html:289: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:573: - http://www.tandfonline.com/doi/full/10.1080/13555502.2017.1301179 is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/how-we-moved-to-github.html:105: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:574: 'a' tag is missing a reference -* At _site/posts/how-we-moved-to-github.html:124: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:575: 'a' tag is missing a reference -* At _site/posts/how-we-moved-to-github.html:161: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:576: 'a' tag is missing a reference -* At _site/posts/how-we-moved-to-github.html:187: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:577: 'a' tag is missing a reference -* At _site/posts/how-we-moved-to-github.html:277: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:580: - http://en.wikipedia.org/wiki/Static_web_page is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/how-we-moved-to-github.html:277: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:581: - http://pages.github.com is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/how-we-moved-to-github.html:281: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:582: - http://jekyllrb.com is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/how-we-moved-to-github.html:281: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:585: 'a' tag is missing a reference -* At _site/posts/how-we-moved-to-github.html:293: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:586: - http://jekyllrb.com/docs/frontmatter/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/how-we-moved-to-github.html:540: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:587: - http://jekyllrb.com/docs/frontmatter/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/infrastructure-at-ph.html:106: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:590: 'a' tag is missing a reference -* At _site/posts/infrastructure-at-ph.html:125: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:591: 'a' tag is missing a reference -* At _site/posts/infrastructure-at-ph.html:162: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:592: 'a' tag is missing a reference -* At _site/posts/infrastructure-at-ph.html:188: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:593: 'a' tag is missing a reference -* At _site/posts/infrastructure-at-ph.html:286: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:596: - http://jekyllrb.com/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/infrastructure-at-ph.html:300: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:597: - http://web.archive.org/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/infrastructure-at-ph.html:323: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:598: - http://jekyllrb.com/docs/plugins/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/jennifer-isasi-jose-a-motilla.html:106: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:599: 'a' tag is missing a reference -* At _site/posts/jennifer-isasi-jose-a-motilla.html:125: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:602: 'a' tag is missing a reference -* At _site/posts/jennifer-isasi-jose-a-motilla.html:162: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:603: 'a' tag is missing a reference -* At _site/posts/jennifer-isasi-jose-a-motilla.html:188: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:604: 'a' tag is missing a reference -* At _site/posts/join-IPP.html:107: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:605: 'a' tag is missing a reference -* At _site/posts/join-IPP.html:126: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:606: 'a' tag is missing a reference -* At _site/posts/join-IPP.html:163: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:611: 'a' tag is missing a reference -* At _site/posts/join-IPP.html:189: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:615: 'a' tag is missing a reference -* At _site/posts/lanzamiento-PH-espanol.html:106: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:661: 'a' tag is missing a reference -* At _site/posts/lanzamiento-PH-espanol.html:125: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:667: 'a' tag is missing a reference -* At _site/posts/lanzamiento-PH-espanol.html:162: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:712: 'a' tag is missing a reference -* At _site/posts/lanzamiento-PH-espanol.html:188: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:726: 'a' tag is missing a reference -* At _site/posts/launch-PH-Spanish.html:106: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:732: 'a' tag is missing a reference -* At _site/posts/launch-PH-Spanish.html:125: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:750: 'a' tag is missing a reference -* At _site/posts/launch-PH-Spanish.html:162: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:768: 'a' tag is missing a reference -* At _site/posts/launch-PH-Spanish.html:188: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:774: 'a' tag is missing a reference -* At _site/posts/launch-portuguese.html:107: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:846: 'a' tag is missing a reference -* At _site/posts/launch-portuguese.html:126: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:850: 'a' tag is missing a reference -* At _site/posts/launch-portuguese.html:163: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:860: 'a' tag is missing a reference -* At _site/posts/launch-portuguese.html:189: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:922: 'a' tag is missing a reference -* At _site/posts/lessons-we-would-like-to-see.html:107: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:947: 'a' tag is missing a reference -* At _site/posts/lessons-we-would-like-to-see.html:126: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:969: 'a' tag is missing a reference -* At _site/posts/lessons-we-would-like-to-see.html:163: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:977: 'a' tag is missing a reference -* At _site/posts/lessons-we-would-like-to-see.html:189: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:987: 'a' tag is missing a reference -* At _site/posts/llano-gribomont-vaughan.html:105: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:1010: 'a' tag is missing a reference -* At _site/posts/llano-gribomont-vaughan.html:124: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:1111: 'a' tag is missing a reference -* At _site/posts/llano-gribomont-vaughan.html:161: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:1167: 'a' tag is missing a reference -* At _site/posts/llano-gribomont-vaughan.html:187: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:1184: 'a' tag is missing a reference -* At _site/posts/matthew-lincoln.html:108: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:1188: 'a' tag is missing a reference -* At _site/posts/matthew-lincoln.html:127: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:1310: 'a' tag is missing a reference -* At _site/posts/matthew-lincoln.html:164: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:1343: 'a' tag is missing a reference -* At _site/posts/matthew-lincoln.html:190: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:1349: 'a' tag is missing a reference -* At _site/posts/merci-les-amis.html:107: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:1359: 'a' tag is missing a reference -* At _site/posts/merci-les-amis.html:126: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:1395: 'a' tag is missing a reference -* At _site/posts/merci-les-amis.html:163: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:1473: 'a' tag is missing a reference -* At _site/posts/merci-les-amis.html:189: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:1677: 'a' tag is missing a reference -* At _site/posts/merci-les-amis.html:287: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:1681: - http://www.chartes.psl.eu/fr/rubrique-admissions/master-technologies-numeriques-appliquees-histoire is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/mid-year-21-newsletter.html:107: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:1694: 'a' tag is missing a reference -* At _site/posts/mid-year-21-newsletter.html:126: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:1719: 'a' tag is missing a reference -* At _site/posts/mid-year-21-newsletter.html:163: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:1739: 'a' tag is missing a reference -* At _site/posts/mid-year-21-newsletter.html:189: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:1741: 'a' tag is missing a reference -* At _site/posts/mid-year-newsletter.html:107: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:1745: 'a' tag is missing a reference -* At _site/posts/mid-year-newsletter.html:126: +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:1749: 'a' tag is missing a reference -* At _site/posts/mid-year-newsletter.html:163: +* At _site/es/lecciones/topic-modeling-y-mallet.html:123: 'a' tag is missing a reference -* At _site/posts/mid-year-newsletter.html:189: +* At _site/es/lecciones/topic-modeling-y-mallet.html:142: 'a' tag is missing a reference -* At _site/posts/mid-year-newsletter.html:287: +* At _site/es/lecciones/topic-modeling-y-mallet.html:179: - http://programminghistorian.org/fr is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/mid-year-newsletter.html:356: +* At _site/es/lecciones/topic-modeling-y-mallet.html:216: - http://ach2019.ach.org is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/model-workshop.html:106: +* At _site/es/lecciones/trabajar-con-archivos-de-texto.html:121: 'a' tag is missing a reference -* At _site/posts/model-workshop.html:125: +* At _site/es/lecciones/trabajar-con-archivos-de-texto.html:140: 'a' tag is missing a reference -* At _site/posts/model-workshop.html:162: +* At _site/es/lecciones/trabajar-con-archivos-de-texto.html:177: 'a' tag is missing a reference -* At _site/posts/model-workshop.html:188: +* At _site/es/lecciones/trabajar-con-archivos-de-texto.html:214: 'a' tag is missing a reference -* At _site/posts/new-english-managing-editor.html:105: +* At _site/es/lecciones/trabajar-con-paginas-web.html:121: 'a' tag is missing a reference -* At _site/posts/new-english-managing-editor.html:124: +* At _site/es/lecciones/trabajar-con-paginas-web.html:140: 'a' tag is missing a reference -* At _site/posts/new-english-managing-editor.html:161: +* At _site/es/lecciones/trabajar-con-paginas-web.html:177: 'a' tag is missing a reference -* At _site/posts/new-english-managing-editor.html:187: +* At _site/es/lecciones/trabajar-con-paginas-web.html:214: 'a' tag is missing a reference -* At _site/posts/new-lessons-page.html:105: +* At _site/es/lecciones/transformacion-datos-xml-xsl.html:119: 'a' tag is missing a reference -* At _site/posts/new-lessons-page.html:124: +* At _site/es/lecciones/transformacion-datos-xml-xsl.html:138: 'a' tag is missing a reference -* At _site/posts/new-lessons-page.html:161: +* At _site/es/lecciones/transformacion-datos-xml-xsl.html:175: 'a' tag is missing a reference -* At _site/posts/new-lessons-page.html:187: +* At _site/es/lecciones/transformacion-datos-xml-xsl.html:212: 'a' tag is missing a reference -* At _site/posts/new-lessons-page.html:299: +* At _site/es/lecciones/uso-las-colecciones-hathitrust-mineria-textual-R.html:117: - http://listjs.com/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/new-navigation.html:107: +* At _site/es/lecciones/uso-las-colecciones-hathitrust-mineria-textual-R.html:136: 'a' tag is missing a reference -* At _site/posts/new-navigation.html:126: +* At _site/es/lecciones/uso-las-colecciones-hathitrust-mineria-textual-R.html:173: 'a' tag is missing a reference -* At _site/posts/new-navigation.html:163: +* At _site/es/lecciones/uso-las-colecciones-hathitrust-mineria-textual-R.html:210: 'a' tag is missing a reference -* At _site/posts/new-navigation.html:189: +* At _site/es/lecciones/ver-archivos-html.html:121: 'a' tag is missing a reference -* At _site/posts/newsletter-april-21.html:107: +* At _site/es/lecciones/ver-archivos-html.html:140: 'a' tag is missing a reference -* At _site/posts/newsletter-april-21.html:126: +* At _site/es/lecciones/ver-archivos-html.html:177: 'a' tag is missing a reference -* At _site/posts/newsletter-april-21.html:163: +* At _site/es/lecciones/ver-archivos-html.html:214: 'a' tag is missing a reference -* At _site/posts/newsletter-april-21.html:189: +* At _site/es/lecciones/visualizacion-y-animacion-de-tablas-historicas-con-R.html:120: 'a' tag is missing a reference -* At _site/posts/newsletter-april-21.html:289: +* At _site/es/lecciones/visualizacion-y-animacion-de-tablas-historicas-con-R.html:139: - http://walshbr.com/blog/the-programming-historian-and-editorial-process-in-digital-publishing/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/newsletter-april-21.html:297: +* At _site/es/lecciones/visualizacion-y-animacion-de-tablas-historicas-con-R.html:176: - http://ixa2.si.ehu.eus/intele/?q=webinars is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/newsletter-april-21.html:367: +* At _site/es/lecciones/visualizacion-y-animacion-de-tablas-historicas-con-R.html:213: - http://dhawards.org/dhawards2020/results/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/newsletter-june20.html:107: +* At _site/es/pia.html:88: 'a' tag is missing a reference -* At _site/posts/newsletter-june20.html:126: +* At _site/es/pia.html:107: 'a' tag is missing a reference -* At _site/posts/newsletter-june20.html:163: +* At _site/es/pia.html:144: 'a' tag is missing a reference -* At _site/posts/newsletter-june20.html:189: +* At _site/es/pia.html:181: 'a' tag is missing a reference -* At _site/posts/newsletter-march20.html:108: +* At _site/es/politica-de-privacidad.html:88: 'a' tag is missing a reference -* At _site/posts/newsletter-march20.html:127: +* At _site/es/politica-de-privacidad.html:107: 'a' tag is missing a reference -* At _site/posts/newsletter-march20.html:164: +* At _site/es/politica-de-privacidad.html:144: 'a' tag is missing a reference -* At _site/posts/newsletter-march20.html:190: +* At _site/es/politica-de-privacidad.html:181: 'a' tag is missing a reference -* At _site/posts/newsletter-oct20.html:105: +* At _site/es/politica-retirada-lecciones.html:88: 'a' tag is missing a reference -* At _site/posts/newsletter-oct20.html:124: +* At _site/es/politica-retirada-lecciones.html:107: 'a' tag is missing a reference -* At _site/posts/newsletter-oct20.html:161: +* At _site/es/politica-retirada-lecciones.html:144: 'a' tag is missing a reference -* At _site/posts/newsletter-oct20.html:187: +* At _site/es/politica-retirada-lecciones.html:181: 'a' tag is missing a reference -* At _site/posts/newsletter-year20.html:107: +* At _site/es/retroalimentacion.html:88: 'a' tag is missing a reference -* At _site/posts/newsletter-year20.html:126: +* At _site/es/retroalimentacion.html:107: 'a' tag is missing a reference -* At _site/posts/newsletter-year20.html:163: +* At _site/es/retroalimentacion.html:144: 'a' tag is missing a reference -* At _site/posts/newsletter-year20.html:189: +* At _site/es/retroalimentacion.html:181: 'a' tag is missing a reference -* At _site/posts/newsletter.html:107: +* At _site/es/vacantes.html:88: 'a' tag is missing a reference -* At _site/posts/newsletter.html:126: +* At _site/es/vacantes.html:107: 'a' tag is missing a reference -* At _site/posts/newsletter.html:163: +* At _site/es/vacantes.html:144: 'a' tag is missing a reference -* At _site/posts/newsletter.html:189: +* At _site/es/vacantes.html:181: 'a' tag is missing a reference -* At _site/posts/noticias-PH-espanol.html:106: +* At _site/fr/apropos.html:88: 'a' tag is missing a reference -* At _site/posts/noticias-PH-espanol.html:125: +* At _site/fr/apropos.html:107: 'a' tag is missing a reference -* At _site/posts/noticias-PH-espanol.html:162: +* At _site/fr/apropos.html:144: 'a' tag is missing a reference -* At _site/posts/noticias-PH-espanol.html:188: +* At _site/fr/apropos.html:192: 'a' tag is missing a reference -* At _site/posts/ph-is-people.html:107: +* At _site/fr/consignes-auteurs.html:88: 'a' tag is missing a reference -* At _site/posts/ph-is-people.html:126: +* At _site/fr/consignes-auteurs.html:107: 'a' tag is missing a reference -* At _site/posts/ph-is-people.html:163: +* At _site/fr/consignes-auteurs.html:144: 'a' tag is missing a reference -* At _site/posts/ph-is-people.html:189: +* At _site/fr/consignes-auteurs.html:192: 'a' tag is missing a reference -* At _site/posts/plan-s.html:107: +* At _site/fr/consignes-evaluateurs.html:88: 'a' tag is missing a reference -* At _site/posts/plan-s.html:126: +* At _site/fr/consignes-evaluateurs.html:107: 'a' tag is missing a reference -* At _site/posts/plan-s.html:163: +* At _site/fr/consignes-evaluateurs.html:144: 'a' tag is missing a reference -* At _site/posts/plan-s.html:189: +* At _site/fr/consignes-evaluateurs.html:192: 'a' tag is missing a reference -* At _site/posts/planning-a-lesson.html:105: +* At _site/fr/consignes-redacteurs.html:88: 'a' tag is missing a reference -* At _site/posts/planning-a-lesson.html:124: +* At _site/fr/consignes-redacteurs.html:107: 'a' tag is missing a reference -* At _site/posts/planning-a-lesson.html:161: +* At _site/fr/consignes-redacteurs.html:144: 'a' tag is missing a reference -* At _site/posts/planning-a-lesson.html:187: +* At _site/fr/consignes-redacteurs.html:192: 'a' tag is missing a reference -* At _site/posts/premio-hdh-2018.html:106: +* At _site/fr/consignes-traducteurs.html:88: 'a' tag is missing a reference -* At _site/posts/premio-hdh-2018.html:125: +* At _site/fr/consignes-traducteurs.html:107: 'a' tag is missing a reference -* At _site/posts/premio-hdh-2018.html:162: +* At _site/fr/consignes-traducteurs.html:144: 'a' tag is missing a reference -* At _site/posts/premio-hdh-2018.html:188: +* At _site/fr/consignes-traducteurs.html:192: 'a' tag is missing a reference -* At _site/posts/premio-hdh-2018.html:281: +* At _site/fr/contribuer.html:88: - http://humanidadesdigitaleshispanicas.es/resolucion-convocatoria-i-edicion-premios-hdh/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/premio-hdh-2018.html:283: +* At _site/fr/contribuer.html:107: - http://humanidadesdigitaleshispanicas.es is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/presentando-al-nuevo-equipo-de-editores-de-contenidos-en-espanol.html:107: +* At _site/fr/contribuer.html:144: 'a' tag is missing a reference -* At _site/posts/presentando-al-nuevo-equipo-de-editores-de-contenidos-en-espanol.html:126: +* At _site/fr/contribuer.html:192: 'a' tag is missing a reference -* At _site/posts/presentando-al-nuevo-equipo-de-editores-de-contenidos-en-espanol.html:163: +* At _site/fr/dons.html:88: 'a' tag is missing a reference -* At _site/posts/presentando-al-nuevo-equipo-de-editores-de-contenidos-en-espanol.html:189: +* At _site/fr/dons.html:107: 'a' tag is missing a reference -* At _site/posts/presentando-al-nuevo-equipo-de-editores-de-contenidos-en-espanol.html:287: +* At _site/fr/dons.html:144: - http://neogranadina.org/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/presentando-al-nuevo-equipo-de-editores-de-contenidos-en-espanol.html:291: +* At _site/fr/dons.html:192: - http://eadh.org/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/proghist-trustee-advert.html:107: +* At _site/fr/equipe-projet.html:88: 'a' tag is missing a reference -* At _site/posts/proghist-trustee-advert.html:126: +* At _site/fr/equipe-projet.html:107: 'a' tag is missing a reference -* At _site/posts/proghist-trustee-advert.html:163: +* At _site/fr/equipe-projet.html:144: 'a' tag is missing a reference -* At _site/posts/proghist-trustee-advert.html:189: +* At _site/fr/equipe-projet.html:192: 'a' tag is missing a reference -* At _site/posts/programming-historian-community-survey.html:105: +* At _site/fr/evenements.html:88: 'a' tag is missing a reference -* At _site/posts/programming-historian-community-survey.html:124: +* At _site/fr/evenements.html:107: 'a' tag is missing a reference -* At _site/posts/programming-historian-community-survey.html:161: +* At _site/fr/evenements.html:144: 'a' tag is missing a reference -* At _site/posts/programming-historian-community-survey.html:187: +* At _site/fr/evenements.html:192: 'a' tag is missing a reference -* At _site/posts/programming-historian-india.html:106: +* At _site/fr/index.html:87: 'a' tag is missing a reference -* At _site/posts/programming-historian-india.html:125: +* At _site/fr/index.html:106: 'a' tag is missing a reference -* At _site/posts/programming-historian-india.html:162: +* At _site/fr/index.html:143: 'a' tag is missing a reference -* At _site/posts/programming-historian-india.html:188: +* At _site/fr/index.html:191: 'a' tag is missing a reference -* At _site/posts/programming-historian-live-london.html:107: +* At _site/fr/jisc-tna-partenariat.html:88: 'a' tag is missing a reference -* At _site/posts/programming-historian-live-london.html:126: +* At _site/fr/jisc-tna-partenariat.html:107: 'a' tag is missing a reference -* At _site/posts/programming-historian-live-london.html:163: +* At _site/fr/jisc-tna-partenariat.html:144: 'a' tag is missing a reference -* At _site/posts/programming-historian-live-london.html:189: +* At _site/fr/jisc-tna-partenariat.html:192: 'a' tag is missing a reference -* At _site/posts/programming-historian-live-london.html:292: +* At _site/fr/lecons/analyse-corpus-antconc.html:120: - http://proghistlive.eventbrite.co.uk is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/programming-historian-live-london.html:294: +* At _site/fr/lecons/analyse-corpus-antconc.html:139: - http://www.software.ac.uk/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/programming-historian-live-london.html:294: +* At _site/fr/lecons/analyse-corpus-antconc.html:176: - http://www.bl.uk/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/programming-historian-live-london.html:294: +* At _site/fr/lecons/analyse-corpus-antconc.html:224: - http://www.history.ac.uk/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/promoting-digital-archives.html:105: +* At _site/fr/lecons/analyse-de-documents-avec-tfidf.html:135: 'a' tag is missing a reference -* At _site/posts/promoting-digital-archives.html:124: +* At _site/fr/lecons/analyse-de-documents-avec-tfidf.html:154: 'a' tag is missing a reference -* At _site/posts/promoting-digital-archives.html:161: +* At _site/fr/lecons/analyse-de-documents-avec-tfidf.html:191: 'a' tag is missing a reference -* At _site/posts/promoting-digital-archives.html:187: +* At _site/fr/lecons/analyse-de-documents-avec-tfidf.html:239: 'a' tag is missing a reference -* At _site/posts/promoting-digital-archives.html:279: +* At _site/fr/lecons/analyse-donnees-tabulaires-R.html:119: - http://library.si.edu/event/colorourcollections-coloring-event is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/promoting-digital-archives.html:296: +* At _site/fr/lecons/analyse-donnees-tabulaires-R.html:138: - http://www.instructables.com/id/How-to-Make-a-Coloring-Book/?ALLSTEPS is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/promoting-digital-archives.html:302: +* At _site/fr/lecons/analyse-donnees-tabulaires-R.html:175: - http://www.colourlovers.com/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/promoting-digital-archives.html:305: +* At _site/fr/lecons/analyse-donnees-tabulaires-R.html:223: - http://www.openculture.com/2016/02/download-free-coloring-books-from-world-class-libraries-museums.html is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/reintroducing-the-ph-blog.html:105: +* At _site/fr/lecons/analyse-reseau-python.html:125: 'a' tag is missing a reference -* At _site/posts/reintroducing-the-ph-blog.html:124: +* At _site/fr/lecons/analyse-reseau-python.html:144: 'a' tag is missing a reference -* At _site/posts/reintroducing-the-ph-blog.html:161: +* At _site/fr/lecons/analyse-reseau-python.html:181: 'a' tag is missing a reference -* At _site/posts/reintroducing-the-ph-blog.html:187: +* At _site/fr/lecons/analyse-reseau-python.html:229: 'a' tag is missing a reference -* At _site/posts/reintroducing-the-ph-blog.html:283: +* At _site/fr/lecons/calibration-radiocarbone-avec-r.html:135: - http://humanitiesactionlab.org is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/reintroducing-the-ph-blog.html:283: +* At _site/fr/lecons/calibration-radiocarbone-avec-r.html:154: - http://gitmomemory.org is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/retirement-and-sustainability-policies.html:105: +* At _site/fr/lecons/calibration-radiocarbone-avec-r.html:191: 'a' tag is missing a reference -* At _site/posts/retirement-and-sustainability-policies.html:124: +* At _site/fr/lecons/calibration-radiocarbone-avec-r.html:239: 'a' tag is missing a reference -* At _site/posts/retirement-and-sustainability-policies.html:161: +* At _site/fr/lecons/comprendre-les-expressions-regulieres.html:119: 'a' tag is missing a reference -* At _site/posts/retirement-and-sustainability-policies.html:187: +* At _site/fr/lecons/comprendre-les-expressions-regulieres.html:138: 'a' tag is missing a reference -* At _site/posts/riva-quiroga-joshua-ortiz.html:106: +* At _site/fr/lecons/comprendre-les-expressions-regulieres.html:175: 'a' tag is missing a reference -* At _site/posts/riva-quiroga-joshua-ortiz.html:125: +* At _site/fr/lecons/comprendre-les-expressions-regulieres.html:223: 'a' tag is missing a reference -* At _site/posts/riva-quiroga-joshua-ortiz.html:162: +* At _site/fr/lecons/comprendre-les-pages-web.html:121: 'a' tag is missing a reference -* At _site/posts/riva-quiroga-joshua-ortiz.html:188: +* At _site/fr/lecons/comprendre-les-pages-web.html:140: 'a' tag is missing a reference -* At _site/posts/roundup2017a.html:108: +* At _site/fr/lecons/comprendre-les-pages-web.html:177: 'a' tag is missing a reference -* At _site/posts/roundup2017a.html:127: +* At _site/fr/lecons/comprendre-les-pages-web.html:225: 'a' tag is missing a reference -* At _site/posts/roundup2017a.html:164: +* At _site/fr/lecons/concevoir-base-donnees-nodegoat.html:117: 'a' tag is missing a reference -* At _site/posts/roundup2017a.html:190: +* At _site/fr/lecons/concevoir-base-donnees-nodegoat.html:136: 'a' tag is missing a reference -* At _site/posts/september-newsletter.html:107: +* At _site/fr/lecons/concevoir-base-donnees-nodegoat.html:173: 'a' tag is missing a reference -* At _site/posts/september-newsletter.html:126: +* At _site/fr/lecons/concevoir-base-donnees-nodegoat.html:221: 'a' tag is missing a reference -* At _site/posts/september-newsletter.html:163: +* At _site/fr/lecons/debuter-avec-markdown.html:119: 'a' tag is missing a reference -* At _site/posts/september-newsletter.html:189: +* At _site/fr/lecons/debuter-avec-markdown.html:138: 'a' tag is missing a reference -* At _site/posts/september-newsletter.html:303: +* At _site/fr/lecons/debuter-avec-markdown.html:175: - http://ach.org/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/sonic-word-clouds.html:105: +* At _site/fr/lecons/debuter-avec-markdown.html:223: 'a' tag is missing a reference -* At _site/posts/sonic-word-clouds.html:124: +* At _site/fr/lecons/debuter-avec-markdown.html:772: - 'a' tag is missing a reference + http://programminghistorian.org/ is not an HTTPS link -* At _site/posts/sonic-word-clouds.html:161: +* At _site/fr/lecons/decomptes-de-frequences-de-mots-en-python.html:121: 'a' tag is missing a reference -* At _site/posts/sonic-word-clouds.html:187: +* At _site/fr/lecons/decomptes-de-frequences-de-mots-en-python.html:140: 'a' tag is missing a reference -* At _site/posts/sonic-word-clouds.html:279: +* At _site/fr/lecons/decomptes-de-frequences-de-mots-en-python.html:177: - http://www.musicalgorithms.org/3.2/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/spanish-editor.html:105: +* At _site/fr/lecons/decomptes-de-frequences-de-mots-en-python.html:225: 'a' tag is missing a reference -* At _site/posts/spanish-editor.html:124: +* At _site/fr/lecons/demarrer-avec-omeka-classic.html:119: 'a' tag is missing a reference -* At _site/posts/spanish-editor.html:161: +* At _site/fr/lecons/demarrer-avec-omeka-classic.html:138: 'a' tag is missing a reference -* At _site/posts/spanish-editor.html:187: +* At _site/fr/lecons/demarrer-avec-omeka-classic.html:175: 'a' tag is missing a reference -* At _site/posts/spanish-editor.html:277: +* At _site/fr/lecons/demarrer-avec-omeka-classic.html:223: 'a' tag is missing a reference -* At _site/posts/spanish-editor.html:288: +* At _site/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.html:121: - http://jitp.commons.gc.cuny.edu/review-of-the-programming-historian/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/subject-specialist-editor.html:106: +* At _site/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.html:140: 'a' tag is missing a reference -* At _site/posts/subject-specialist-editor.html:125: +* At _site/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.html:177: 'a' tag is missing a reference -* At _site/posts/subject-specialist-editor.html:162: +* At _site/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.html:225: 'a' tag is missing a reference -* At _site/posts/subject-specialist-editor.html:188: +* At _site/fr/lecons/du-html-a-une-liste-de-mots-1.html:121: 'a' tag is missing a reference -* At _site/posts/twenty-sixteen-review.html:107: +* At _site/fr/lecons/du-html-a-une-liste-de-mots-1.html:140: 'a' tag is missing a reference -* At _site/posts/twenty-sixteen-review.html:126: +* At _site/fr/lecons/du-html-a-une-liste-de-mots-1.html:177: 'a' tag is missing a reference -* At _site/posts/twenty-sixteen-review.html:163: +* At _site/fr/lecons/du-html-a-une-liste-de-mots-1.html:225: 'a' tag is missing a reference -* At _site/posts/twenty-sixteen-review.html:189: +* At _site/fr/lecons/du-html-a-une-liste-de-mots-2.html:121: 'a' tag is missing a reference -* At _site/posts/two-new-PH-editors.html:106: +* At _site/fr/lecons/du-html-a-une-liste-de-mots-2.html:140: 'a' tag is missing a reference -* At _site/posts/two-new-PH-editors.html:125: +* At _site/fr/lecons/du-html-a-une-liste-de-mots-2.html:177: 'a' tag is missing a reference -* At _site/posts/two-new-PH-editors.html:162: +* At _site/fr/lecons/du-html-a-une-liste-de-mots-2.html:225: 'a' tag is missing a reference -* At _site/posts/two-new-PH-editors.html:188: +* At _site/fr/lecons/generer-jeu-donnees-texte-ocr.html:119: 'a' tag is missing a reference -* At _site/posts/ucl-placement-2021.html:107: +* At _site/fr/lecons/generer-jeu-donnees-texte-ocr.html:138: 'a' tag is missing a reference -* At _site/posts/ucl-placement-2021.html:126: +* At _site/fr/lecons/generer-jeu-donnees-texte-ocr.html:175: 'a' tag is missing a reference -* At _site/posts/ucl-placement-2021.html:163: +* At _site/fr/lecons/generer-jeu-donnees-texte-ocr.html:223: 'a' tag is missing a reference -* At _site/posts/ucl-placement-2021.html:189: +* At _site/fr/lecons/gestion-manipulation-donnees-r.html:119: 'a' tag is missing a reference -* At _site/posts/vote-dh-award.html:107: +* At _site/fr/lecons/gestion-manipulation-donnees-r.html:138: 'a' tag is missing a reference -* At _site/posts/vote-dh-award.html:126: +* At _site/fr/lecons/gestion-manipulation-donnees-r.html:175: 'a' tag is missing a reference -* At _site/posts/vote-dh-award.html:163: +* At _site/fr/lecons/gestion-manipulation-donnees-r.html:223: 'a' tag is missing a reference -* At _site/posts/vote-dh-award.html:189: +* At _site/fr/lecons/index.html:88: 'a' tag is missing a reference -* At _site/posts/welcome-martin-grandjean.html:107: +* At _site/fr/lecons/index.html:107: 'a' tag is missing a reference -* At _site/posts/welcome-martin-grandjean.html:126: +* At _site/fr/lecons/index.html:144: 'a' tag is missing a reference -* At _site/posts/welcome-martin-grandjean.html:163: +* At _site/fr/lecons/index.html:192: 'a' tag is missing a reference -* At _site/posts/welcome-martin-grandjean.html:189: +* At _site/fr/lecons/installation-modules-python-pip.html:119: 'a' tag is missing a reference -* At _site/posts/welcome-martin-grandjean.html:281: +* At _site/fr/lecons/installation-modules-python-pip.html:138: - http://www.martingrandjean.ch/complex-structures-and-international-organizations/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/welcome-martin-grandjean.html:281: +* At _site/fr/lecons/installation-modules-python-pip.html:175: - http://www.martingrandjean.ch is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/welcome-mc-boucher.html:105: +* At _site/fr/lecons/installation-modules-python-pip.html:223: 'a' tag is missing a reference -* At _site/posts/welcome-mc-boucher.html:124: +* At _site/fr/lecons/installation-windows-py.html:121: 'a' tag is missing a reference -* At _site/posts/welcome-mc-boucher.html:161: +* At _site/fr/lecons/installation-windows-py.html:140: 'a' tag is missing a reference -* At _site/posts/welcome-mc-boucher.html:187: +* At _site/fr/lecons/installation-windows-py.html:177: 'a' tag is missing a reference -* At _site/posts/welcome-ph-fr.html:105: +* At _site/fr/lecons/installation-windows-py.html:225: 'a' tag is missing a reference -* At _site/posts/welcome-ph-fr.html:124: +* At _site/fr/lecons/installer-ide-python-linux.html:121: 'a' tag is missing a reference -* At _site/posts/welcome-ph-fr.html:161: +* At _site/fr/lecons/installer-ide-python-linux.html:140: 'a' tag is missing a reference -* At _site/posts/welcome-ph-fr.html:187: +* At _site/fr/lecons/installer-ide-python-linux.html:177: 'a' tag is missing a reference -* At _site/posts/welcome-ph-fr.html:279: +* At _site/fr/lecons/installer-ide-python-linux.html:225: - http://programminghistorian.org/fr is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/welcome-ph-fr.html:285: +* At _site/fr/lecons/intro-a-bash-et-zsh.html:121: - http://dsharp.library.cmu.edu/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/welcome-ph-fr.html:289: +* At _site/fr/lecons/intro-a-bash-et-zsh.html:140: - http://archives.mundaneum.org/fr/versions-digitalisees/schema-de-paul-otlet-documentation-et-telecommunication is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/welcome-to-ph2.html:107: +* At _site/fr/lecons/intro-a-bash-et-zsh.html:177: 'a' tag is missing a reference -* At _site/posts/welcome-to-ph2.html:126: +* At _site/fr/lecons/intro-a-bash-et-zsh.html:225: 'a' tag is missing a reference -* At _site/posts/welcome-to-ph2.html:163: +* At _site/fr/lecons/intro-aux-bots-twitter.html:119: 'a' tag is missing a reference -* At _site/posts/welcome-to-ph2.html:189: +* At _site/fr/lecons/intro-aux-bots-twitter.html:138: 'a' tag is missing a reference -* At _site/posts/welcome-to-ph2.html:289: +* At _site/fr/lecons/intro-aux-bots-twitter.html:175: - http://niche-canada.org/programming-historian is not an HTTPS link + 'a' tag is missing a reference -* At _site/posts/welcome-zoe-leblanc.html:108: +* At _site/fr/lecons/intro-aux-bots-twitter.html:223: 'a' tag is missing a reference -* At _site/posts/welcome-zoe-leblanc.html:127: +* At _site/fr/lecons/introduction-a-heurist.html:117: 'a' tag is missing a reference -* At _site/posts/welcome-zoe-leblanc.html:164: +* At _site/fr/lecons/introduction-a-heurist.html:136: 'a' tag is missing a reference -* At _site/posts/welcome-zoe-leblanc.html:190: +* At _site/fr/lecons/introduction-a-heurist.html:173: 'a' tag is missing a reference -* At _site/posts/welcome-zoe-leblanc.html:285: +* At _site/fr/lecons/introduction-a-heurist.html:221: - http://scholarslab.org is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/apoiadores.html:88: +* At _site/fr/lecons/introduction-a-heurist.html:667: 'a' tag is missing a reference -* At _site/pt/apoiadores.html:107: +* At _site/fr/lecons/introduction-a-heurist.html:832: 'a' tag is missing a reference -* At _site/pt/apoiadores.html:144: +* At _site/fr/lecons/introduction-a-la-stylometrie-avec-python.html:119: 'a' tag is missing a reference -* At _site/pt/apoiadores.html:203: +* At _site/fr/lecons/introduction-a-la-stylometrie-avec-python.html:138: 'a' tag is missing a reference -* At _site/pt/apoiadores.html:275: +* At _site/fr/lecons/introduction-a-la-stylometrie-avec-python.html:175: - http://cdrh.unl.edu/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/contribua.html:88: +* At _site/fr/lecons/introduction-a-la-stylometrie-avec-python.html:223: 'a' tag is missing a reference -* At _site/pt/contribua.html:107: +* At _site/fr/lecons/introduction-aux-carnets-jupyter-notebooks.html:123: 'a' tag is missing a reference -* At _site/pt/contribua.html:144: +* At _site/fr/lecons/introduction-aux-carnets-jupyter-notebooks.html:142: 'a' tag is missing a reference -* At _site/pt/contribua.html:203: +* At _site/fr/lecons/introduction-aux-carnets-jupyter-notebooks.html:179: 'a' tag is missing a reference -* At _site/pt/contribua.html:292: +* At _site/fr/lecons/introduction-aux-carnets-jupyter-notebooks.html:227: - http://www.worldcat.org/title/programming-historian/oclc/951537099 is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/contribua.html:294: +* At _site/fr/lecons/introduction-aux-carnets-jupyter-notebooks.html:695: - http://purdue-primo-prod.hosted.exlibrisgroup.com/primo_library/libweb/action/dlDisplay.do?vid=PURDUE&search_scope=everything&docId=PURDUE_ALMA51671812890001081&fn=permalink is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/directrizes-autor.html:88: +* At _site/fr/lecons/introduction-aux-carnets-jupyter-notebooks.html:942: 'a' tag is missing a reference -* At _site/pt/directrizes-autor.html:107: +* At _site/fr/lecons/introduction-et-installation.html:121: 'a' tag is missing a reference -* At _site/pt/directrizes-autor.html:144: +* At _site/fr/lecons/introduction-et-installation.html:140: 'a' tag is missing a reference -* At _site/pt/directrizes-autor.html:203: +* At _site/fr/lecons/introduction-et-installation.html:177: 'a' tag is missing a reference -* At _site/pt/directrizes-editor.html:88: +* At _site/fr/lecons/introduction-et-installation.html:225: 'a' tag is missing a reference -* At _site/pt/directrizes-editor.html:107: +* At _site/fr/lecons/manipuler-chaines-caracteres-python.html:121: 'a' tag is missing a reference -* At _site/pt/directrizes-editor.html:144: +* At _site/fr/lecons/manipuler-chaines-caracteres-python.html:140: 'a' tag is missing a reference -* At _site/pt/directrizes-editor.html:203: +* At _site/fr/lecons/manipuler-chaines-caracteres-python.html:177: 'a' tag is missing a reference -* At _site/pt/directrizes-editor.html:591: +* At _site/fr/lecons/manipuler-chaines-caracteres-python.html:225: - http://www.loc.gov/maps/collections is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/directrizes-revisor.html:88: +* At _site/fr/lecons/nettoyer-ses-donnees-avec-openrefine.html:123: 'a' tag is missing a reference -* At _site/pt/directrizes-revisor.html:107: +* At _site/fr/lecons/nettoyer-ses-donnees-avec-openrefine.html:142: 'a' tag is missing a reference -* At _site/pt/directrizes-revisor.html:144: +* At _site/fr/lecons/nettoyer-ses-donnees-avec-openrefine.html:179: 'a' tag is missing a reference -* At _site/pt/directrizes-revisor.html:203: +* At _site/fr/lecons/nettoyer-ses-donnees-avec-openrefine.html:227: 'a' tag is missing a reference -* At _site/pt/directrizes-tradutor.html:88: +* At _site/fr/lecons/normaliser-donnees-textuelles-python.html:121: 'a' tag is missing a reference -* At _site/pt/directrizes-tradutor.html:107: +* At _site/fr/lecons/normaliser-donnees-textuelles-python.html:140: 'a' tag is missing a reference -* At _site/pt/directrizes-tradutor.html:144: +* At _site/fr/lecons/normaliser-donnees-textuelles-python.html:177: 'a' tag is missing a reference -* At _site/pt/directrizes-tradutor.html:203: +* At _site/fr/lecons/normaliser-donnees-textuelles-python.html:225: 'a' tag is missing a reference -* At _site/pt/doacoes.html:88: +* At _site/fr/lecons/preserver-logiciels-recherche.html:121: 'a' tag is missing a reference -* At _site/pt/doacoes.html:107: +* At _site/fr/lecons/preserver-logiciels-recherche.html:140: 'a' tag is missing a reference -* At _site/pt/doacoes.html:144: +* At _site/fr/lecons/preserver-logiciels-recherche.html:177: 'a' tag is missing a reference -* At _site/pt/doacoes.html:203: +* At _site/fr/lecons/preserver-logiciels-recherche.html:225: 'a' tag is missing a reference -* At _site/pt/equipe.html:88: +* At _site/fr/lecons/preserver-ses-donnees-de-recherche.html:119: 'a' tag is missing a reference -* At _site/pt/equipe.html:107: +* At _site/fr/lecons/preserver-ses-donnees-de-recherche.html:138: 'a' tag is missing a reference -* At _site/pt/equipe.html:144: +* At _site/fr/lecons/preserver-ses-donnees-de-recherche.html:175: 'a' tag is missing a reference -* At _site/pt/equipe.html:203: +* At _site/fr/lecons/preserver-ses-donnees-de-recherche.html:223: 'a' tag is missing a reference -* At _site/pt/equipe.html:310: +* At _site/fr/lecons/redaction-durable-avec-pandoc-et-markdown.html:121: - http://twitter.com/maxcarlons is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:313: +* At _site/fr/lecons/redaction-durable-avec-pandoc-et-markdown.html:140: - http://github.com/carlonim is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:414: +* At _site/fr/lecons/redaction-durable-avec-pandoc-et-markdown.html:177: - http://github.com/lachapot is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:512: +* At _site/fr/lecons/redaction-durable-avec-pandoc-et-markdown.html:225: - http://twitter.com/cosovschi is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:515: +* At _site/fr/lecons/reutilisation-de-code-et-modularite.html:121: - http://github.com/digitalkosovski is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:618: +* At _site/fr/lecons/reutilisation-de-code-et-modularite.html:140: - http://github.com/caiocmello is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:1182: +* At _site/fr/lecons/reutilisation-de-code-et-modularite.html:177: - http://github.com/semanticnoodles is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:1278: +* At _site/fr/lecons/reutilisation-de-code-et-modularite.html:225: - http://twitter.com/nabsiddiqui is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:1281: +* At _site/fr/lecons/telecharger-des-pages-web-avec-python.html:121: - http://github.com/nabsiddiqui is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:1631: +* At _site/fr/lecons/telecharger-des-pages-web-avec-python.html:140: - http://twitter.com/giulia_taurino is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:1634: +* At _site/fr/lecons/telecharger-des-pages-web-avec-python.html:177: - http://github.com/giuliataurino is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:1804: +* At _site/fr/lecons/telecharger-des-pages-web-avec-python.html:225: - http://www.alexwermercolan.com/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:1810: +* At _site/fr/lecons/transcription-automatisee-graphies-non-latines.html:133: - http://twitter.com/alexwermercolan is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:1813: +* At _site/fr/lecons/transcription-automatisee-graphies-non-latines.html:152: - http://github.com/hawc2 is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:2059: +* At _site/fr/lecons/transcription-automatisee-graphies-non-latines.html:189: - http://www.mariajoseafanador.com is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:2065: +* At _site/fr/lecons/transcription-automatisee-graphies-non-latines.html:237: - http://twitter.com/mariajoafana is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:2068: +* At _site/fr/lecons/travailler-avec-des-fichiers-texte.html:121: - http://github.com/mariajoafana is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:2534: +* At _site/fr/lecons/travailler-avec-des-fichiers-texte.html:140: - http://twitter.com/IsaGribomont is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:2537: +* At _site/fr/lecons/travailler-avec-des-fichiers-texte.html:177: - http://github.com/isag91 is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:2745: +* At _site/fr/lecons/travailler-avec-des-fichiers-texte.html:225: - http://twitter.com/espejolento is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:2748: +* At _site/fr/nos-soutiens.html:88: - http://github.com/silviaegt is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:3036: +* At _site/fr/nos-soutiens.html:107: - http://twitter.com/jenniferisve is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:3039: +* At _site/fr/nos-soutiens.html:144: - http://github.com/jenniferisasi is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:3361: +* At _site/fr/nos-soutiens.html:192: - http://twitter.com/enetreseles is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:3364: +* At _site/fr/pi.html:88: - http://github.com/nllano is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:3568: +* At _site/fr/pi.html:107: - http://twitter.com/jgob is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:3571: +* At _site/fr/pi.html:144: - http://github.com/joshuagob is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:3863: +* At _site/fr/pi.html:192: - http://twitter.com/rivaquiroga is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:3866: +* At _site/fr/politique-retrait-lecons.html:88: - http://github.com/rivaquiroga is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:4157: +* At _site/fr/politique-retrait-lecons.html:107: - http://github.com/nivaca is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:4370: +* At _site/fr/politique-retrait-lecons.html:144: - http://github.com/marie-flesch is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:4513: +* At _site/fr/politique-retrait-lecons.html:192: - http://github.com/matgille is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:4746: +* At _site/fr/politique-vie-privee.html:88: - http://github.com/mhersent is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:4804: +* At _site/fr/politique-vie-privee.html:107: - http://twitter.com/superHH is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:5056: +* At _site/fr/politique-vie-privee.html:144: - http://github.com/DMathelier is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:5190: +* At _site/fr/politique-vie-privee.html:192: - http://twitter.com/emilienschultz is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:5193: +* At _site/fr/postes-vacants.html:88: - http://github.com/emilienschultz is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:5317: +* At _site/fr/postes-vacants.html:107: - http://twitter.com/davvalent is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:5320: +* At _site/fr/postes-vacants.html:144: - http://github.com/davvalent is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:5449: +* At _site/fr/postes-vacants.html:192: - http://github.com/AlexandreWa is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:5584: +* At _site/fr/reaction.html:88: - http://github.com/josircg is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:5842: +* At _site/fr/reaction.html:107: - http://twitter.com/danielalvesfcsh is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:5845: +* At _site/fr/reaction.html:144: - http://github.com/DanielAlvesLABDH is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:6107: +* At _site/fr/reaction.html:192: - http://twitter.com/ericbrasiln is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:6110: +* At _site/fr/recherche.html:88: - http://github.com/ericbrasiln is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:6301: +* At _site/fr/recherche.html:107: - http://github.com/luisferla is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:6543: +* At _site/fr/recherche.html:144: - http://twitter.com/jimmy_medeiros is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:6546: +* At _site/fr/recherche.html:192: - http://github.com/JimmyMedeiros82 is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:6781: +* At _site/index.html:87: - http://github.com/joanacvp is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:7027: +* At _site/index.html:106: - http://twitter.com/araceletorres is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:7030: +* At _site/index.html:143: - http://github.com/aracele is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:7286: +* At _site/index.html:169: - http://twitter.com/j_w_baker is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:7289: +* At _site/posts/2022-in-review.html:105: - http://github.com/drjwbaker is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:7721: +* At _site/posts/2022-in-review.html:124: - http://adamcrymble.org is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:7727: +* At _site/posts/2022-in-review.html:161: - http://twitter.com/Adam_Crymble is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:7730: +* At _site/posts/2022-in-review.html:187: - http://github.com/acrymble is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:8198: +* At _site/posts/2022-in-review.html:287: - http://github.com/adamfarquhar is not an HTTPS link + [https://programminghistorian.org/en/about] is an invalid URL -* At _site/pt/equipe.html:8258: +* At _site/posts/DH-Award-2017.html:106: - http://twitter.com/jenniferisve is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:8261: +* At _site/posts/DH-Award-2017.html:125: - http://github.com/jenniferisasi is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:8589: +* At _site/posts/DH-Award-2017.html:162: - http://twitter.com/rivaquiroga is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:8592: +* At _site/posts/DH-Award-2017.html:188: - http://github.com/rivaquiroga is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:8878: +* At _site/posts/FR-team.html:107: - http://twitter.com/amsichani is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:8881: +* At _site/posts/FR-team.html:126: - http://github.com/amsichani is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:9221: +* At _site/posts/FR-team.html:163: - http://twitter.com/AnisaHawes is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:9224: +* At _site/posts/FR-team.html:189: - http://github.com/anisa-hawes is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/equipe.html:9433: +* At _site/posts/Open-Education-Week.html:107: - http://github.com/charlottejmc is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/eventos.html:88: +* At _site/posts/Open-Education-Week.html:126: 'a' tag is missing a reference -* At _site/pt/eventos.html:107: +* At _site/posts/Open-Education-Week.html:163: 'a' tag is missing a reference -* At _site/pt/eventos.html:144: +* At _site/posts/Open-Education-Week.html:189: 'a' tag is missing a reference -* At _site/pt/eventos.html:203: +* At _site/posts/PH-TNA-JISC-event-2-annoucement.html:107: 'a' tag is missing a reference -* At _site/pt/index.html:89: +* At _site/posts/PH-TNA-JISC-event-2-annoucement.html:126: 'a' tag is missing a reference -* At _site/pt/index.html:108: +* At _site/posts/PH-TNA-JISC-event-2-annoucement.html:163: 'a' tag is missing a reference -* At _site/pt/index.html:145: +* At _site/posts/PH-TNA-JISC-event-2-annoucement.html:189: 'a' tag is missing a reference -* At _site/pt/index.html:204: +* At _site/posts/PH-commitment-to-diversity.html:107: 'a' tag is missing a reference -* At _site/pt/jisc-tna-parceria.html:88: +* At _site/posts/PH-commitment-to-diversity.html:126: 'a' tag is missing a reference -* At _site/pt/jisc-tna-parceria.html:107: +* At _site/posts/PH-commitment-to-diversity.html:163: 'a' tag is missing a reference -* At _site/pt/jisc-tna-parceria.html:144: +* At _site/posts/PH-commitment-to-diversity.html:189: 'a' tag is missing a reference -* At _site/pt/jisc-tna-parceria.html:203: +* At _site/posts/PH-contributors.html:107: 'a' tag is missing a reference -* At _site/pt/licoes-politica-remocao.html:88: +* At _site/posts/PH-contributors.html:126: 'a' tag is missing a reference -* At _site/pt/licoes-politica-remocao.html:107: +* At _site/posts/PH-contributors.html:163: 'a' tag is missing a reference -* At _site/pt/licoes-politica-remocao.html:144: +* At _site/posts/PH-contributors.html:189: 'a' tag is missing a reference -* At _site/pt/licoes-politica-remocao.html:203: +* At _site/posts/PH-espanol-in-DH2018.html:107: 'a' tag is missing a reference -* At _site/pt/licoes/HTML-lista-palavras-1.html:121: +* At _site/posts/PH-espanol-in-DH2018.html:126: 'a' tag is missing a reference -* At _site/pt/licoes/HTML-lista-palavras-1.html:140: +* At _site/posts/PH-espanol-in-DH2018.html:163: 'a' tag is missing a reference -* At _site/pt/licoes/HTML-lista-palavras-1.html:177: +* At _site/posts/PH-espanol-in-DH2018.html:189: 'a' tag is missing a reference -* At _site/pt/licoes/HTML-lista-palavras-1.html:236: +* At _site/posts/Uses-Of-The-Programming-Historian.html:105: 'a' tag is missing a reference -* At _site/pt/licoes/HTML-lista-palavras-1.html:558: +* At _site/posts/Uses-Of-The-Programming-Historian.html:124: - http://www.w3schools.com/html/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/HTML-lista-palavras-2.html:121: +* At _site/posts/Uses-Of-The-Programming-Historian.html:161: 'a' tag is missing a reference -* At _site/pt/licoes/HTML-lista-palavras-2.html:140: +* At _site/posts/Uses-Of-The-Programming-Historian.html:187: 'a' tag is missing a reference -* At _site/pt/licoes/HTML-lista-palavras-2.html:177: +* At _site/posts/ad-hoc-translation.html:105: 'a' tag is missing a reference -* At _site/pt/licoes/HTML-lista-palavras-2.html:236: +* At _site/posts/ad-hoc-translation.html:124: 'a' tag is missing a reference -* At _site/pt/licoes/algoritmos-agrupamento-scikit-learn-python.html:135: +* At _site/posts/ad-hoc-translation.html:161: 'a' tag is missing a reference -* At _site/pt/licoes/algoritmos-agrupamento-scikit-learn-python.html:154: +* At _site/posts/ad-hoc-translation.html:187: 'a' tag is missing a reference -* At _site/pt/licoes/algoritmos-agrupamento-scikit-learn-python.html:191: +* At _site/posts/adding-to-library-catalogue.html:105: 'a' tag is missing a reference -* At _site/pt/licoes/algoritmos-agrupamento-scikit-learn-python.html:250: +* At _site/posts/adding-to-library-catalogue.html:124: 'a' tag is missing a reference -* At _site/pt/licoes/analise-correspondencia-pesquisa-historica-R.html:135: +* At _site/posts/adding-to-library-catalogue.html:161: 'a' tag is missing a reference -* At _site/pt/licoes/analise-correspondencia-pesquisa-historica-R.html:154: +* At _site/posts/adding-to-library-catalogue.html:187: 'a' tag is missing a reference -* At _site/pt/licoes/analise-correspondencia-pesquisa-historica-R.html:191: +* At _site/posts/anisa-welcome.html:105: 'a' tag is missing a reference -* At _site/pt/licoes/analise-correspondencia-pesquisa-historica-R.html:250: +* At _site/posts/anisa-welcome.html:124: 'a' tag is missing a reference -* At _site/pt/licoes/analise-correspondencia-pesquisa-historica-R.html:703: +* At _site/posts/anisa-welcome.html:161: - http://factominer.free.fr/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/analise-sentimento-R-syuzhet.html:119: +* At _site/posts/anisa-welcome.html:187: 'a' tag is missing a reference -* At _site/pt/licoes/analise-sentimento-R-syuzhet.html:138: +* At _site/posts/anna-maria-sichani.html:108: 'a' tag is missing a reference -* At _site/pt/licoes/analise-sentimento-R-syuzhet.html:175: +* At _site/posts/anna-maria-sichani.html:127: 'a' tag is missing a reference -* At _site/pt/licoes/analise-sentimento-R-syuzhet.html:234: +* At _site/posts/anna-maria-sichani.html:164: 'a' tag is missing a reference -* At _site/pt/licoes/analise-sentimento-R-syuzhet.html:584: +* At _site/posts/anna-maria-sichani.html:190: - http://saifmohammad.com/WebPages/NRC-Emotion-Lexicon.htm is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/analise-sentimento-R-syuzhet.html:588: +* At _site/posts/announcing-new-team-spanish-language-editors.html:107: - http://www.matthewjockers.net/page/2/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/analise-sentimento-exploracao-dados.html:119: +* At _site/posts/announcing-new-team-spanish-language-editors.html:126: 'a' tag is missing a reference -* At _site/pt/licoes/analise-sentimento-exploracao-dados.html:138: +* At _site/posts/announcing-new-team-spanish-language-editors.html:163: 'a' tag is missing a reference -* At _site/pt/licoes/analise-sentimento-exploracao-dados.html:175: +* At _site/posts/announcing-new-team-spanish-language-editors.html:189: 'a' tag is missing a reference -* At _site/pt/licoes/analise-sentimento-exploracao-dados.html:234: +* At _site/posts/appel-a-propositions.html:107: 'a' tag is missing a reference -* At _site/pt/licoes/analise-sentimento-exploracao-dados.html:605: +* At _site/posts/appel-a-propositions.html:126: - http://www.nltk.org/install.html is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/analise-sentimento-exploracao-dados.html:617: +* At _site/posts/appel-a-propositions.html:163: - http://www.nltk.org/_modules/nltk/sentiment/vader.html is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/analise-sentimento-exploracao-dados.html:639: +* At _site/posts/appel-a-propositions.html:189: - http://www.nltk.org/_modules/nltk/sentiment/vader.html is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/aplicacao-web-interativa-r-shiny-leaflet.html:119: +* At _site/posts/appel-a-traductions.html:105: 'a' tag is missing a reference -* At _site/pt/licoes/aplicacao-web-interativa-r-shiny-leaflet.html:138: +* At _site/posts/appel-a-traductions.html:124: 'a' tag is missing a reference -* At _site/pt/licoes/aplicacao-web-interativa-r-shiny-leaflet.html:175: +* At _site/posts/appel-a-traductions.html:161: 'a' tag is missing a reference -* At _site/pt/licoes/aplicacao-web-interativa-r-shiny-leaflet.html:234: +* At _site/posts/appel-a-traductions.html:187: 'a' tag is missing a reference -* At _site/pt/licoes/autoria-sustentavel-texto-simples-pandoc-markdown.html:121: +* At _site/posts/articles-selected-ph-jisc-tna.html:107: 'a' tag is missing a reference -* At _site/pt/licoes/autoria-sustentavel-texto-simples-pandoc-markdown.html:140: +* At _site/posts/articles-selected-ph-jisc-tna.html:126: 'a' tag is missing a reference -* At _site/pt/licoes/autoria-sustentavel-texto-simples-pandoc-markdown.html:177: +* At _site/posts/articles-selected-ph-jisc-tna.html:163: 'a' tag is missing a reference -* At _site/pt/licoes/autoria-sustentavel-texto-simples-pandoc-markdown.html:236: +* At _site/posts/articles-selected-ph-jisc-tna.html:189: 'a' tag is missing a reference -* At _site/pt/licoes/autoria-sustentavel-texto-simples-pandoc-markdown.html:868: +* At _site/posts/bienvenue-ph-fr.html:105: - http://writemonkey.com/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/autoria-sustentavel-texto-simples-pandoc-markdown.html:868: +* At _site/posts/bienvenue-ph-fr.html:124: - http://prose.io/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/autoria-sustentavel-texto-simples-pandoc-markdown.html:868: +* At _site/posts/bienvenue-ph-fr.html:161: - http://www.draftin.com/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/autoria-sustentavel-texto-simples-pandoc-markdown.html:870: +* At _site/posts/bienvenue-ph-fr.html:187: - http://gitit.net/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/autoria-sustentavel-texto-simples-pandoc-markdown.html:870: +* At _site/posts/bogota-workshop-report.html:107: - http://jaspervdj.be/hakyll/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/autoria-sustentavel-texto-simples-pandoc-markdown.html:884: +* At _site/posts/bogota-workshop-report.html:126: - http://www.antipope.org/charlie/blog-static/2013/10/why-microsoft-word-must-die.html is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/camadas-vetoriais-qgis.html:123: +* At _site/posts/bogota-workshop-report.html:163: 'a' tag is missing a reference -* At _site/pt/licoes/camadas-vetoriais-qgis.html:142: +* At _site/posts/bogota-workshop-report.html:189: 'a' tag is missing a reference -* At _site/pt/licoes/camadas-vetoriais-qgis.html:179: +* At _site/posts/bolentin-informativo.html:105: 'a' tag is missing a reference -* At _site/pt/licoes/camadas-vetoriais-qgis.html:238: +* At _site/posts/bolentin-informativo.html:124: 'a' tag is missing a reference -* At _site/pt/licoes/camadas-vetoriais-qgis.html:1090: +* At _site/posts/bolentin-informativo.html:161: - http://geospatialhistorian.wordpress.com/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/contagem-mineracao-dados-investigacao-unix.html:121: +* At _site/posts/bolentin-informativo.html:187: 'a' tag is missing a reference -* At _site/pt/licoes/contagem-mineracao-dados-investigacao-unix.html:140: +* At _site/posts/boletin-informativo-junio20.html:107: 'a' tag is missing a reference -* At _site/pt/licoes/contagem-mineracao-dados-investigacao-unix.html:177: +* At _site/posts/boletin-informativo-junio20.html:126: 'a' tag is missing a reference -* At _site/pt/licoes/contagem-mineracao-dados-investigacao-unix.html:236: +* At _site/posts/boletin-informativo-junio20.html:163: 'a' tag is missing a reference -* At _site/pt/licoes/contagem-mineracao-dados-investigacao-unix.html:562: +* At _site/posts/boletin-informativo-junio20.html:189: - http://www.7-zip.org/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/contagem-mineracao-dados-investigacao-unix.html:576: +* At _site/posts/boletin-informativo-march20.html:107: - http://en.wikipedia.org/wiki/Tab-separated_values is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/contagem-mineracao-dados-investigacao-unix.html:641: +* At _site/posts/boletin-informativo-march20.html:126: - http://williamjturkel.net/2013/06/15/basic-text-analysis-with-command-line-tools-in-linux/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/contagem-mineracao-dados-investigacao-unix.html:642: +* At _site/posts/boletin-informativo-march20.html:163: - http://williamjturkel.net/2013/06/20/pattern-matching-and-permuted-term-indexing-with-command-line-tools-in-linux/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/contar-frequencias-palavras-python.html:121: +* At _site/posts/boletin-informativo-march20.html:189: 'a' tag is missing a reference -* At _site/pt/licoes/contar-frequencias-palavras-python.html:140: +* At _site/posts/boletin-informativo-oct20.html:105: 'a' tag is missing a reference -* At _site/pt/licoes/contar-frequencias-palavras-python.html:177: +* At _site/posts/boletin-informativo-oct20.html:124: 'a' tag is missing a reference -* At _site/pt/licoes/contar-frequencias-palavras-python.html:236: +* At _site/posts/boletin-informativo-oct20.html:161: 'a' tag is missing a reference -* At _site/pt/licoes/contar-frequencias-palavras-python.html:596: +* At _site/posts/boletin-informativo-oct20.html:187: - http://docs.python.org/tutorial/datastructures.html#list-comprehensions is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/contar-frequencias-palavras-python.html:721: +* At _site/posts/boletin-informativo.html:107: - http://ir.dcs.gla.ac.uk/resources/linguistic_utils/stop_words is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/criacao-visualizacao-ficheiros-html-python.html:121: +* At _site/posts/boletin-informativo.html:126: 'a' tag is missing a reference -* At _site/pt/licoes/criacao-visualizacao-ficheiros-html-python.html:140: +* At _site/posts/boletin-informativo.html:163: 'a' tag is missing a reference -* At _site/pt/licoes/criacao-visualizacao-ficheiros-html-python.html:177: +* At _site/posts/boletin-informativo.html:189: 'a' tag is missing a reference -* At _site/pt/licoes/criacao-visualizacao-ficheiros-html-python.html:236: +* At _site/posts/buletin-de-information.html:105: 'a' tag is missing a reference -* At _site/pt/licoes/criacao-visualizacao-ficheiros-html-python.html:553: +* At _site/posts/buletin-de-information.html:124: - http://www.w3schools.com/html/default.asp is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/criacao-visualizacao-ficheiros-html-python.html:559: +* At _site/posts/buletin-de-information.html:161: - http://www.w3schools.com/tags/tag_doctype.asp is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/criar-exposicao-omeka.html:121: +* At _site/posts/buletin-de-information.html:187: 'a' tag is missing a reference -* At _site/pt/licoes/criar-exposicao-omeka.html:140: +* At _site/posts/bulletin-de-information-juin20.html:107: 'a' tag is missing a reference -* At _site/pt/licoes/criar-exposicao-omeka.html:177: +* At _site/posts/bulletin-de-information-juin20.html:126: 'a' tag is missing a reference -* At _site/pt/licoes/criar-exposicao-omeka.html:236: +* At _site/posts/bulletin-de-information-juin20.html:163: 'a' tag is missing a reference -* At _site/pt/licoes/download-automatico-wget.html:119: +* At _site/posts/bulletin-de-information-juin20.html:189: 'a' tag is missing a reference -* At _site/pt/licoes/download-automatico-wget.html:138: +* At _site/posts/bulletin-de-information-juin20.html:393: 'a' tag is missing a reference -* At _site/pt/licoes/download-automatico-wget.html:175: +* At _site/posts/bulletin-de-information-march20.html:108: 'a' tag is missing a reference -* At _site/pt/licoes/download-automatico-wget.html:234: +* At _site/posts/bulletin-de-information-march20.html:127: 'a' tag is missing a reference -* At _site/pt/licoes/download-automatico-wget.html:662: +* At _site/posts/bulletin-de-information-march20.html:164: - http://www.gnu.org/software/wget/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/download-automatico-wget.html:662: +* At _site/posts/bulletin-de-information-march20.html:190: - http://ftp.gnu.org/gnu/wget/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/download-multiplos-registros-query-strings.html:119: +* At _site/posts/bulletin-de-information-march20.html:345: 'a' tag is missing a reference -* At _site/pt/licoes/download-multiplos-registros-query-strings.html:138: +* At _site/posts/bulletin-de-information-oct20.html:105: 'a' tag is missing a reference -* At _site/pt/licoes/download-multiplos-registros-query-strings.html:175: +* At _site/posts/bulletin-de-information-oct20.html:124: 'a' tag is missing a reference -* At _site/pt/licoes/download-multiplos-registros-query-strings.html:234: +* At _site/posts/bulletin-de-information-oct20.html:161: 'a' tag is missing a reference -* At _site/pt/licoes/download-multiplos-registros-query-strings.html:549: +* At _site/posts/bulletin-de-information-oct20.html:187: - http://www.oldbaileyonline.org/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/download-multiplos-registros-query-strings.html:565: +* At _site/posts/bulletin-de-information-oct20.html:337: - http://www.oldbaileyonline.org/browse.jsp?id=t17800628-33&div=t17800628-33 is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/download-multiplos-registros-query-strings.html:603: +* At _site/posts/bulletin-de-information.html:107: - http://www.oldbaileyonline.org/forms/formMain.jsp is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/download-multiplos-registros-query-strings.html:1148: +* At _site/posts/bulletin-de-information.html:126: - http://docs.python.org/tutorial/errors.html is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/download-multiplos-registros-query-strings.html:1293: +* At _site/posts/bulletin-de-information.html:163: - http://www.oldbaileyonline.org/static/DocAPI.jsp is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/download-multiplos-registros-query-strings.html:1294: +* At _site/posts/bulletin-de-information.html:189: - http://stackoverflow.com/questions/273192/python-best-way-to-create-directory-if-it-doesnt-exist-for-file-write is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/download-paginas-web-python.html:121: +* At _site/posts/bulletin-de-information.html:331: 'a' tag is missing a reference -* At _site/pt/licoes/download-paginas-web-python.html:140: +* At _site/posts/bulletin-issue-01.html:105: 'a' tag is missing a reference -* At _site/pt/licoes/download-paginas-web-python.html:177: +* At _site/posts/bulletin-issue-01.html:124: 'a' tag is missing a reference -* At _site/pt/licoes/download-paginas-web-python.html:236: +* At _site/posts/bulletin-issue-01.html:161: 'a' tag is missing a reference -* At _site/pt/licoes/download-paginas-web-python.html:624: +* At _site/posts/bulletin-issue-01.html:187: - http://www.oldbaileyonline.org/browse.jsp?foo=bar&path=sessionsPapers/17800628.xml&div=t17800628-33&xml=yes is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/download-paginas-web-python.html:624: +* At _site/posts/bulletin-issue-02.html:105: - http://www.oldbaileyonline.org/images.jsp?doc=178006280084 is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/explorar-analisar-dados-rede-python.html:125: +* At _site/posts/bulletin-issue-02.html:124: 'a' tag is missing a reference -* At _site/pt/licoes/explorar-analisar-dados-rede-python.html:144: +* At _site/posts/bulletin-issue-02.html:161: 'a' tag is missing a reference -* At _site/pt/licoes/explorar-analisar-dados-rede-python.html:181: +* At _site/posts/bulletin-issue-02.html:187: 'a' tag is missing a reference -* At _site/pt/licoes/explorar-analisar-dados-rede-python.html:240: +* At _site/posts/bulletin-issue-03.html:105: 'a' tag is missing a reference -* At _site/pt/licoes/explorar-analisar-dados-rede-python.html:621: +* At _site/posts/bulletin-issue-03.html:124: - http://www.oxforddnb.com is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/explorar-analisar-dados-rede-python.html:621: +* At _site/posts/bulletin-issue-03.html:161: - http://www.sixdegreesoffrancisbacon.com is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/explorar-analisar-dados-rede-python.html:897: +* At _site/posts/bulletin-issue-03.html:187: - http://sixdegreesoffrancisbacon.com/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/extrair-paginas-ilustradas-com-python.html:119: +* At _site/posts/bulletin-issue-04.html:105: 'a' tag is missing a reference -* At _site/pt/licoes/extrair-paginas-ilustradas-com-python.html:138: +* At _site/posts/bulletin-issue-04.html:124: 'a' tag is missing a reference -* At _site/pt/licoes/extrair-paginas-ilustradas-com-python.html:175: +* At _site/posts/bulletin-issue-04.html:161: 'a' tag is missing a reference -* At _site/pt/licoes/extrair-paginas-ilustradas-com-python.html:234: +* At _site/posts/bulletin-issue-04.html:187: 'a' tag is missing a reference -* At _site/pt/licoes/extrair-paginas-ilustradas-com-python.html:685: +* At _site/posts/bulletin-issue-05.html:105: - http://web.archive.org/web/20190115051900/https://conda.io/docs/_downloads/conda-cheatsheet.pdf is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/extrair-palavras-chave.html:119: +* At _site/posts/bulletin-issue-05.html:124: 'a' tag is missing a reference -* At _site/pt/licoes/extrair-palavras-chave.html:138: +* At _site/posts/bulletin-issue-05.html:161: 'a' tag is missing a reference -* At _site/pt/licoes/extrair-palavras-chave.html:175: +* At _site/posts/bulletin-issue-05.html:187: 'a' tag is missing a reference -* At _site/pt/licoes/extrair-palavras-chave.html:234: +* At _site/posts/bulletin-issue-06.html:105: 'a' tag is missing a reference -* At _site/pt/licoes/extrair-palavras-chave.html:554: +* At _site/posts/bulletin-issue-06.html:124: - http://www.british-history.ac.uk/alumni-oxon/1500-1714 is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/geocodificando-qgis.html:119: +* At _site/posts/bulletin-issue-06.html:161: 'a' tag is missing a reference -* At _site/pt/licoes/geocodificando-qgis.html:138: +* At _site/posts/bulletin-issue-06.html:187: 'a' tag is missing a reference -* At _site/pt/licoes/geocodificando-qgis.html:175: +* At _site/posts/call-for-editors-en.html:105: 'a' tag is missing a reference -* At _site/pt/licoes/geocodificando-qgis.html:234: +* At _site/posts/call-for-editors-en.html:124: 'a' tag is missing a reference -* At _site/pt/licoes/geocodificando-qgis.html:599: +* At _site/posts/call-for-editors-en.html:161: - [url](https://www.oracle.com/java/technologies/downloads/#java8) is an invalid URL + 'a' tag is missing a reference -* At _site/pt/licoes/geocodificando-qgis.html:602: +* At _site/posts/call-for-editors-en.html:187: - http://www.british-history.ac.uk/alumni-oxon/1500-1714 is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/geocodificando-qgis.html:684: +* At _site/posts/call-for-editors.html:105: - http://www.county-borders.co.uk/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/geocodificando-qgis.html:769: +* At _site/posts/call-for-editors.html:124: - http://www.gazetteer.org.uk/index.php is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/geocodificando-qgis.html:781: +* At _site/posts/call-for-editors.html:161: - http://www.county-borders.co.uk/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/georreferenciamento-qgis.html:123: +* At _site/posts/call-for-editors.html:187: 'a' tag is missing a reference -* At _site/pt/licoes/georreferenciamento-qgis.html:142: +* At _site/posts/call-for-fr-members.html:105: 'a' tag is missing a reference -* At _site/pt/licoes/georreferenciamento-qgis.html:179: +* At _site/posts/call-for-fr-members.html:124: 'a' tag is missing a reference -* At _site/pt/licoes/georreferenciamento-qgis.html:238: +* At _site/posts/call-for-fr-members.html:161: 'a' tag is missing a reference -* At _site/pt/licoes/georreferenciamento-qgis.html:612: +* At _site/posts/call-for-fr-members.html:187: - http://www.gov.pe.ca/gis/license_agreement.php3?name=lot_town&file_format=SHP is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/git-ferramenta-metodologica-projetos-historia-1.html:117: +* At _site/posts/call-to-action.html:107: 'a' tag is missing a reference -* At _site/pt/licoes/git-ferramenta-metodologica-projetos-historia-1.html:136: +* At _site/posts/call-to-action.html:126: 'a' tag is missing a reference -* At _site/pt/licoes/git-ferramenta-metodologica-projetos-historia-1.html:173: +* At _site/posts/call-to-action.html:163: 'a' tag is missing a reference -* At _site/pt/licoes/git-ferramenta-metodologica-projetos-historia-1.html:232: +* At _site/posts/call-to-action.html:189: 'a' tag is missing a reference -* At _site/pt/licoes/index.html:88: +* At _site/posts/call-to-action.html:285: 'a' tag is missing a reference -* At _site/pt/licoes/index.html:107: +* At _site/posts/cfp-jisc-ph.html:107: 'a' tag is missing a reference -* At _site/pt/licoes/index.html:144: +* At _site/posts/cfp-jisc-ph.html:126: 'a' tag is missing a reference -* At _site/pt/licoes/index.html:203: +* At _site/posts/cfp-jisc-ph.html:163: 'a' tag is missing a reference -* At _site/pt/licoes/instalacao-linux.html:121: +* At _site/posts/cfp-jisc-ph.html:189: 'a' tag is missing a reference -* At _site/pt/licoes/instalacao-linux.html:140: +* At _site/posts/charlotte-welcome.html:105: 'a' tag is missing a reference -* At _site/pt/licoes/instalacao-linux.html:177: +* At _site/posts/charlotte-welcome.html:124: 'a' tag is missing a reference -* At _site/pt/licoes/instalacao-linux.html:236: +* At _site/posts/charlotte-welcome.html:161: 'a' tag is missing a reference -* At _site/pt/licoes/instalacao-mac.html:121: +* At _site/posts/charlotte-welcome.html:187: 'a' tag is missing a reference -* At _site/pt/licoes/instalacao-mac.html:140: +* At _site/posts/convocatoria-de-tutoriales.html:105: 'a' tag is missing a reference -* At _site/pt/licoes/instalacao-mac.html:177: +* At _site/posts/convocatoria-de-tutoriales.html:124: 'a' tag is missing a reference -* At _site/pt/licoes/instalacao-mac.html:236: +* At _site/posts/convocatoria-de-tutoriales.html:161: 'a' tag is missing a reference -* At _site/pt/licoes/instalacao-mac.html:538: +* At _site/posts/convocatoria-de-tutoriales.html:187: - http://support.apple.com/kb/ht1427 is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/instalacao-modulos-python-pip.html:119: +* At _site/posts/convocatoria-editor.html:105: 'a' tag is missing a reference -* At _site/pt/licoes/instalacao-modulos-python-pip.html:138: +* At _site/posts/convocatoria-editor.html:124: 'a' tag is missing a reference -* At _site/pt/licoes/instalacao-modulos-python-pip.html:175: +* At _site/posts/convocatoria-editor.html:161: 'a' tag is missing a reference -* At _site/pt/licoes/instalacao-modulos-python-pip.html:234: +* At _site/posts/convocatoria-editor.html:187: 'a' tag is missing a reference -* At _site/pt/licoes/instalacao-windows.html:121: +* At _site/posts/convocatoria-editores-2021.html:107: 'a' tag is missing a reference -* At _site/pt/licoes/instalacao-windows.html:140: +* At _site/posts/convocatoria-editores-2021.html:126: 'a' tag is missing a reference -* At _site/pt/licoes/instalacao-windows.html:177: +* At _site/posts/convocatoria-editores-2021.html:163: 'a' tag is missing a reference -* At _site/pt/licoes/instalacao-windows.html:236: +* At _site/posts/convocatoria-editores-2021.html:189: 'a' tag is missing a reference -* At _site/pt/licoes/introducao-ao-markdown.html:119: +* At _site/posts/convocatoria-para-editores.html:105: 'a' tag is missing a reference -* At _site/pt/licoes/introducao-ao-markdown.html:138: +* At _site/posts/convocatoria-para-editores.html:124: 'a' tag is missing a reference -* At _site/pt/licoes/introducao-ao-markdown.html:175: +* At _site/posts/convocatoria-para-editores.html:161: 'a' tag is missing a reference -* At _site/pt/licoes/introducao-ao-markdown.html:234: +* At _site/posts/convocatoria-para-editores.html:187: 'a' tag is missing a reference -* At _site/pt/licoes/introducao-ao-markdown.html:572: +* At _site/posts/convocatoria-taller-PH-espanol.html:106: - http://daringfireball.net/projects/markdown/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/introducao-ao-markdown.html:576: +* At _site/posts/convocatoria-taller-PH-espanol.html:125: - http://github.com is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/introducao-codificacao-textos-tei-1.html:119: +* At _site/posts/convocatoria-taller-PH-espanol.html:162: 'a' tag is missing a reference -* At _site/pt/licoes/introducao-codificacao-textos-tei-1.html:138: +* At _site/posts/convocatoria-taller-PH-espanol.html:188: 'a' tag is missing a reference -* At _site/pt/licoes/introducao-codificacao-textos-tei-1.html:175: +* At _site/posts/corpus-linguistics-in-action.html:105: 'a' tag is missing a reference -* At _site/pt/licoes/introducao-codificacao-textos-tei-1.html:234: +* At _site/posts/corpus-linguistics-in-action.html:124: 'a' tag is missing a reference -* At _site/pt/licoes/introducao-dados-abertos-conectados.html:119: +* At _site/posts/corpus-linguistics-in-action.html:161: 'a' tag is missing a reference -* At _site/pt/licoes/introducao-dados-abertos-conectados.html:138: +* At _site/posts/corpus-linguistics-in-action.html:187: 'a' tag is missing a reference -* At _site/pt/licoes/introducao-dados-abertos-conectados.html:175: +* At _site/posts/december-newsletter.html:107: 'a' tag is missing a reference -* At _site/pt/licoes/introducao-dados-abertos-conectados.html:234: +* At _site/posts/december-newsletter.html:126: 'a' tag is missing a reference -* At _site/pt/licoes/introducao-dados-abertos-conectados.html:575: +* At _site/posts/december-newsletter.html:163: - http://linkeddatacatalog.dws.informatik.uni-mannheim.de/state/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/introducao-dados-abertos-conectados.html:604: +* At _site/posts/december-newsletter.html:189: - http://www.oxforddnb.com is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/introducao-dados-abertos-conectados.html:616: +* At _site/posts/dh-award-2016.html:106: - http://www.geonames.org/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/introducao-dados-abertos-conectados.html:750: +* At _site/posts/dh-award-2016.html:125: - http://semanticweb.org/wiki/Main_Page.html is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/introducao-dados-abertos-conectados.html:752: +* At _site/posts/dh-award-2016.html:162: - http://web.archive.org/web/20170715094229/http://www.musicontology.com/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/introducao-dados-abertos-conectados.html:752: +* At _site/posts/dh-award-2016.html:188: - http://web.archive.org/web/20170718143925/http://musicontology.com/docs/getting-started.html is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/introducao-dados-abertos-conectados.html:870: +* At _site/posts/dh-publishing-assistant.html:105: - http://web.archive.org/web/20170718143925/http://musicontology.com/docs/getting-started.html is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/introducao-dados-abertos-conectados.html:926: +* At _site/posts/dh-publishing-assistant.html:124: - http://www.easyrdf.org/converter is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/introducao-dados-abertos-conectados.html:938: +* At _site/posts/dh-publishing-assistant.html:161: - http://dbpedia.org/snorql/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/introducao-dados-abertos-conectados.html:1038: +* At _site/posts/dh-publishing-assistant.html:187: - http://dbpedia.org/class/yago/WikicatBritishHistorians is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/introducao-dados-abertos-conectados.html:1038: +* At _site/posts/digital-storytelling-immigrant-stories.html:105: - http://dbpedia.org/class/yago/WikicatWomenHistorians is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/introducao-dados-abertos-conectados.html:1077: +* At _site/posts/digital-storytelling-immigrant-stories.html:124: - http://www.snee.com/bobdc.blog/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/introducao-dados-abertos-conectados.html:1081: +* At _site/posts/digital-storytelling-immigrant-stories.html:161: - http://linkeddata.org/guides-and-tutorials is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/introducao-dados-abertos-conectados.html:1083: +* At _site/posts/digital-storytelling-immigrant-stories.html:187: - http://linkeddatacatalog.dws.informatik.uni-mannheim.de/state/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/introducao-dados-abertos-conectados.html:1089: +* At _site/posts/distant-reading-in-the-undergraduate-classroom.html:105: - http://www.ahrc.ac.uk/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/introducao-estilometria-python.html:119: +* At _site/posts/distant-reading-in-the-undergraduate-classroom.html:124: 'a' tag is missing a reference -* At _site/pt/licoes/introducao-estilometria-python.html:138: +* At _site/posts/distant-reading-in-the-undergraduate-classroom.html:161: 'a' tag is missing a reference -* At _site/pt/licoes/introducao-estilometria-python.html:175: +* At _site/posts/distant-reading-in-the-undergraduate-classroom.html:187: 'a' tag is missing a reference -* At _site/pt/licoes/introducao-estilometria-python.html:234: +* At _site/posts/dois-for-ph.html:107: 'a' tag is missing a reference -* At _site/pt/licoes/introducao-estilometria-python.html:805: +* At _site/posts/dois-for-ph.html:126: - http://jupyter.org/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/introducao-estilometria-python.html:805: +* At _site/posts/dois-for-ph.html:163: - http://jupyterlab.readthedocs.io/en/stable/getting_started/installation.html is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/introducao-instalacao-python.html:121: +* At _site/posts/dois-for-ph.html:189: 'a' tag is missing a reference -* At _site/pt/licoes/introducao-instalacao-python.html:140: +* At _site/posts/edinburgh-workshop-2015.html:105: 'a' tag is missing a reference -* At _site/pt/licoes/introducao-instalacao-python.html:177: +* At _site/posts/edinburgh-workshop-2015.html:124: 'a' tag is missing a reference -* At _site/pt/licoes/introducao-instalacao-python.html:236: +* At _site/posts/edinburgh-workshop-2015.html:161: 'a' tag is missing a reference -* At _site/pt/licoes/introducao-instalacao-python.html:544: +* At _site/posts/edinburgh-workshop-2015.html:187: - http://www.python.org/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/introducao-instalacao-python.html:544: +* At _site/posts/education-and-community-lead.html:105: - http://www.crummy.com/software/BeautifulSoup/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/introducao-instalacao-python.html:546: +* At _site/posts/education-and-community-lead.html:124: - http://www.activestate.com/komodo-edit is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/introducao-instalacao-python.html:546: +* At _site/posts/education-and-community-lead.html:161: - http://wiki.python.org/moin/PythonEditors/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/introducao-instalacao-python.html:554: +* At _site/posts/education-and-community-lead.html:187: - http://sebastianraschka.com/Articles/2014_python_2_3_key_diff.html is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/introducao-jupyter-notebooks.html:123: +* At _site/posts/en-call-for-lessons.html:107: 'a' tag is missing a reference -* At _site/pt/licoes/introducao-jupyter-notebooks.html:142: +* At _site/posts/en-call-for-lessons.html:126: 'a' tag is missing a reference -* At _site/pt/licoes/introducao-jupyter-notebooks.html:179: +* At _site/posts/en-call-for-lessons.html:163: 'a' tag is missing a reference -* At _site/pt/licoes/introducao-jupyter-notebooks.html:238: +* At _site/posts/en-call-for-lessons.html:189: 'a' tag is missing a reference -* At _site/pt/licoes/introducao-linha-comando-bash.html:121: +* At _site/posts/en-call-for-proposals.html:107: 'a' tag is missing a reference -* At _site/pt/licoes/introducao-linha-comando-bash.html:140: +* At _site/posts/en-call-for-proposals.html:126: 'a' tag is missing a reference -* At _site/pt/licoes/introducao-linha-comando-bash.html:177: +* At _site/posts/en-call-for-proposals.html:163: 'a' tag is missing a reference -* At _site/pt/licoes/introducao-linha-comando-bash.html:236: +* At _site/posts/en-call-for-proposals.html:189: 'a' tag is missing a reference -* At _site/pt/licoes/introducao-linha-comando-bash.html:564: +* At _site/posts/es-buscamos-revisores.html:107: - http://www.tldp.org/LDP/Bash-Beginners-Guide/html/chap_01.html is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/introducao-linha-comando-bash.html:786: +* At _site/posts/es-buscamos-revisores.html:126: - http://www.gutenberg.org/ebooks/2600 is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/introducao-mysql-r.html:119: +* At _site/posts/es-buscamos-revisores.html:163: 'a' tag is missing a reference -* At _site/pt/licoes/introducao-mysql-r.html:138: +* At _site/posts/es-buscamos-revisores.html:189: 'a' tag is missing a reference -* At _site/pt/licoes/introducao-mysql-r.html:175: +* At _site/posts/fd-laramee.html:108: 'a' tag is missing a reference -* At _site/pt/licoes/introducao-mysql-r.html:234: +* At _site/posts/fd-laramee.html:127: 'a' tag is missing a reference -* At _site/pt/licoes/introducao-mysql-r.html:910: +* At _site/posts/fd-laramee.html:164: - http://dev.mysql.com/downloads/workbench/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/introducao-mysql-r.html:1818: +* At _site/posts/fd-laramee.html:190: - http://web.archive.org/web/20171228130133/https://www.ntu.edu.sg/home/ehchua/programming/sql/MySQL_Beginner.html is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/introducao-omeka-net.html:119: +* At _site/posts/first-teaching-workshop.html:107: 'a' tag is missing a reference -* At _site/pt/licoes/introducao-omeka-net.html:138: +* At _site/posts/first-teaching-workshop.html:126: 'a' tag is missing a reference -* At _site/pt/licoes/introducao-omeka-net.html:175: +* At _site/posts/first-teaching-workshop.html:163: 'a' tag is missing a reference -* At _site/pt/licoes/introducao-omeka-net.html:234: +* At _site/posts/first-teaching-workshop.html:189: 'a' tag is missing a reference -* At _site/pt/licoes/introducao-omeka-net.html:544: +* At _site/posts/full-text-search.html:107: - http://www.omeka.net is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/introducao-omeka-net.html:978: +* At _site/posts/full-text-search.html:126: - http://info.omeka.net/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/investigar-literatura-lusofona-literateca.html:117: +* At _site/posts/full-text-search.html:163: 'a' tag is missing a reference -* At _site/pt/licoes/investigar-literatura-lusofona-literateca.html:136: +* At _site/posts/full-text-search.html:189: 'a' tag is missing a reference -* At _site/pt/licoes/investigar-literatura-lusofona-literateca.html:173: +* At _site/posts/gisele-welcome.html:105: 'a' tag is missing a reference -* At _site/pt/licoes/investigar-literatura-lusofona-literateca.html:232: +* At _site/posts/gisele-welcome.html:124: 'a' tag is missing a reference -* At _site/pt/licoes/limpar-dados-openrefine.html:123: +* At _site/posts/gisele-welcome.html:161: 'a' tag is missing a reference -* At _site/pt/licoes/limpar-dados-openrefine.html:142: +* At _site/posts/gisele-welcome.html:187: 'a' tag is missing a reference -* At _site/pt/licoes/limpar-dados-openrefine.html:179: +* At _site/posts/history-of-protest.html:108: 'a' tag is missing a reference -* At _site/pt/licoes/limpar-dados-openrefine.html:238: +* At _site/posts/history-of-protest.html:127: 'a' tag is missing a reference -* At _site/pt/licoes/limpar-dados-openrefine.html:574: +* At _site/posts/history-of-protest.html:164: - http://openrefine.org is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/manipulacao-transformacao-dados-r.html:119: +* At _site/posts/history-of-protest.html:190: 'a' tag is missing a reference -* At _site/pt/licoes/manipulacao-transformacao-dados-r.html:138: +* At _site/posts/how-we-moved-to-github.html:105: 'a' tag is missing a reference -* At _site/pt/licoes/manipulacao-transformacao-dados-r.html:175: +* At _site/posts/how-we-moved-to-github.html:124: 'a' tag is missing a reference -* At _site/pt/licoes/manipulacao-transformacao-dados-r.html:234: +* At _site/posts/how-we-moved-to-github.html:161: 'a' tag is missing a reference -* At _site/pt/licoes/manipulacao-transformacao-dados-r.html:567: +* At _site/posts/how-we-moved-to-github.html:187: - http://hadley.nz/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/manipulacao-transformacao-dados-r.html:585: +* At _site/posts/how-we-moved-to-github.html:281: - http://tidyverse.org/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/manipulacao-transformacao-dados-r.html:590: +* At _site/posts/infrastructure-at-ph.html:106: - http://magrittr.tidyverse.org is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/manipulacao-transformacao-dados-r.html:591: +* At _site/posts/infrastructure-at-ph.html:125: - http://ggplot2.tidyverse.org/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/manipulacao-transformacao-dados-r.html:591: +* At _site/posts/infrastructure-at-ph.html:162: - http://www.springer.com/us/book/9780387245447 is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/manipulacao-transformacao-dados-r.html:592: +* At _site/posts/infrastructure-at-ph.html:188: - http://tibble.tidyverse.org/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/manipulacao-transformacao-dados-r.html:694: +* At _site/posts/jennifer-isasi-jose-a-motilla.html:106: - http://stefanbache.dk/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/manipulacao-transformacao-dados-r.html:694: +* At _site/posts/jennifer-isasi-jose-a-motilla.html:125: - http://hadley.nz/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/manipulacao-transformacao-dados-r.html:1018: +* At _site/posts/jennifer-isasi-jose-a-motilla.html:162: - http://www.ggplot2.org is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/manipular-strings-python.html:121: +* At _site/posts/jennifer-isasi-jose-a-motilla.html:188: 'a' tag is missing a reference -* At _site/pt/licoes/manipular-strings-python.html:140: +* At _site/posts/join-IPP.html:107: 'a' tag is missing a reference -* At _site/pt/licoes/manipular-strings-python.html:177: +* At _site/posts/join-IPP.html:126: 'a' tag is missing a reference -* At _site/pt/licoes/manipular-strings-python.html:236: +* At _site/posts/join-IPP.html:163: 'a' tag is missing a reference -* At _site/pt/licoes/nocoes-basicas-R-dados-tabulares.html:119: +* At _site/posts/join-IPP.html:189: 'a' tag is missing a reference -* At _site/pt/licoes/nocoes-basicas-R-dados-tabulares.html:138: +* At _site/posts/lanzamiento-PH-espanol.html:106: 'a' tag is missing a reference -* At _site/pt/licoes/nocoes-basicas-R-dados-tabulares.html:175: +* At _site/posts/lanzamiento-PH-espanol.html:125: 'a' tag is missing a reference -* At _site/pt/licoes/nocoes-basicas-R-dados-tabulares.html:234: +* At _site/posts/lanzamiento-PH-espanol.html:162: 'a' tag is missing a reference -* At _site/pt/licoes/nocoes-basicas-R-dados-tabulares.html:1054: +* At _site/posts/lanzamiento-PH-espanol.html:188: - http://dh-r.lincolnmullen.com/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/nocoes-basicas-paginas-web-html.html:121: +* At _site/posts/launch-PH-Spanish.html:106: 'a' tag is missing a reference -* At _site/pt/licoes/nocoes-basicas-paginas-web-html.html:140: +* At _site/posts/launch-PH-Spanish.html:125: 'a' tag is missing a reference -* At _site/pt/licoes/nocoes-basicas-paginas-web-html.html:177: +* At _site/posts/launch-PH-Spanish.html:162: 'a' tag is missing a reference -* At _site/pt/licoes/nocoes-basicas-paginas-web-html.html:236: +* At _site/posts/launch-PH-Spanish.html:188: 'a' tag is missing a reference -* At _site/pt/licoes/nocoes-basicas-paginas-web-html.html:583: +* At _site/posts/launch-portuguese.html:107: - http://www.w3schools.com/html/default.asp is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/nocoes-basicas-paginas-web-html.html:636: +* At _site/posts/launch-portuguese.html:126: - http://www.w3schools.com/html/default.asp is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/nocoes-basicas-paginas-web-html.html:637: +* At _site/posts/launch-portuguese.html:163: - http://www.w3schools.com/html/html5_intro.asp is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/normalizacao-dados-textuais-python.html:121: +* At _site/posts/launch-portuguese.html:189: 'a' tag is missing a reference -* At _site/pt/licoes/normalizacao-dados-textuais-python.html:140: +* At _site/posts/lessons-we-would-like-to-see.html:107: 'a' tag is missing a reference -* At _site/pt/licoes/normalizacao-dados-textuais-python.html:177: +* At _site/posts/lessons-we-would-like-to-see.html:126: 'a' tag is missing a reference -* At _site/pt/licoes/normalizacao-dados-textuais-python.html:236: +* At _site/posts/lessons-we-would-like-to-see.html:163: 'a' tag is missing a reference -* At _site/pt/licoes/palavras-chave-contexto-usando-n-grams-python.html:121: +* At _site/posts/lessons-we-would-like-to-see.html:189: 'a' tag is missing a reference -* At _site/pt/licoes/palavras-chave-contexto-usando-n-grams-python.html:140: +* At _site/posts/llano-gribomont-vaughan.html:105: 'a' tag is missing a reference -* At _site/pt/licoes/palavras-chave-contexto-usando-n-grams-python.html:177: +* At _site/posts/llano-gribomont-vaughan.html:124: 'a' tag is missing a reference -* At _site/pt/licoes/palavras-chave-contexto-usando-n-grams-python.html:236: +* At _site/posts/llano-gribomont-vaughan.html:161: 'a' tag is missing a reference -* At _site/pt/licoes/preservar-os-seus-dados-de-investigacao.html:119: +* At _site/posts/llano-gribomont-vaughan.html:187: 'a' tag is missing a reference -* At _site/pt/licoes/preservar-os-seus-dados-de-investigacao.html:138: +* At _site/posts/matthew-lincoln.html:108: 'a' tag is missing a reference -* At _site/pt/licoes/preservar-os-seus-dados-de-investigacao.html:175: +* At _site/posts/matthew-lincoln.html:127: 'a' tag is missing a reference -* At _site/pt/licoes/preservar-os-seus-dados-de-investigacao.html:234: +* At _site/posts/matthew-lincoln.html:164: 'a' tag is missing a reference -* At _site/pt/licoes/preservar-os-seus-dados-de-investigacao.html:691: +* At _site/posts/matthew-lincoln.html:190: - http://notepad-plus-plus.org/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/preservar-os-seus-dados-de-investigacao.html:693: +* At _site/posts/merci-les-amis.html:107: - http://komodoide.com/komodo-edit/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/preservar-os-seus-dados-de-investigacao.html:762: +* At _site/posts/merci-les-amis.html:126: - http://homensenaviosdobacalhau.cm-ilhavo.pt/header/diretorio/showppl/17606 is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/preservar-os-seus-dados-de-investigacao.html:940: +* At _site/posts/merci-les-amis.html:163: - http://historyonics.blogspot.co.uk/2014/01/judging-book-by-its-url.html is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/preservar-os-seus-dados-de-investigacao.html:944: +* At _site/posts/merci-les-amis.html:189: - http://earlymodernnotes.wordpress.com/2013/05/18/unclean-unclean-what-historians-can-do-about-sharing-our-messy-research-data/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/preservar-os-seus-dados-de-investigacao.html:957: +* At _site/posts/mid-year-21-newsletter.html:107: - http://britishlibrary.typepad.co.uk/collectioncare/2013/09/the-twelve-principles-of-digital-preservation.html is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/preservar-os-seus-dados-de-investigacao.html:966: +* At _site/posts/mid-year-21-newsletter.html:126: - http://data-archive.ac.uk/create-manage/document is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/processamento-basico-texto-r.html:121: +* At _site/posts/mid-year-21-newsletter.html:163: 'a' tag is missing a reference -* At _site/pt/licoes/processamento-basico-texto-r.html:140: +* At _site/posts/mid-year-21-newsletter.html:189: 'a' tag is missing a reference -* At _site/pt/licoes/processamento-basico-texto-r.html:177: +* At _site/posts/mid-year-newsletter.html:107: 'a' tag is missing a reference -* At _site/pt/licoes/processamento-basico-texto-r.html:236: +* At _site/posts/mid-year-newsletter.html:126: 'a' tag is missing a reference -* At _site/pt/licoes/processamento-basico-texto-r.html:1386: +* At _site/posts/mid-year-newsletter.html:163: - http://www.presidency.ucsb.edu/sou.php is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/qgis-camadas.html:123: +* At _site/posts/mid-year-newsletter.html:189: 'a' tag is missing a reference -* At _site/pt/licoes/qgis-camadas.html:142: +* At _site/posts/model-workshop.html:106: 'a' tag is missing a reference -* At _site/pt/licoes/qgis-camadas.html:179: +* At _site/posts/model-workshop.html:125: 'a' tag is missing a reference -* At _site/pt/licoes/qgis-camadas.html:238: +* At _site/posts/model-workshop.html:162: 'a' tag is missing a reference -* At _site/pt/licoes/qgis-camadas.html:607: +* At _site/posts/model-workshop.html:188: - http://www.gov.pe.ca/gis/download.php3?name=coastline&file_format=SHP is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/qgis-camadas.html:608: +* At _site/posts/new-english-managing-editor.html:105: - http://www.gov.pe.ca/gis/download.php3?name=lot_town&file_format=SHP is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/qgis-camadas.html:609: +* At _site/posts/new-english-managing-editor.html:124: - http://www.gov.pe.ca/gis/download.php3?name=hydronetwork&file_format=SHP is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/qgis-camadas.html:610: +* At _site/posts/new-english-managing-editor.html:161: - http://www.gov.pe.ca/gis/download.php3?name=forest_35&file_format=SHP is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/qgis-camadas.html:611: +* At _site/posts/new-english-managing-editor.html:187: - http://www.gov.pe.ca/gis/download.php3?name=nat_parks&file_format=SHP is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/qgis-camadas.html:693: +* At _site/posts/new-lessons-page.html:105: - http://web.archive.org/web/20180715071501/http://www.qgistutorials.com/pt_BR/docs/working_with_projections.html is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/qgis-camadas.html:1308: +* At _site/posts/new-lessons-page.html:124: - http://geospatialhistorian.wordpress.com/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/reutilizacao-codigo-modularidade-python.html:121: +* At _site/posts/new-lessons-page.html:161: 'a' tag is missing a reference -* At _site/pt/licoes/reutilizacao-codigo-modularidade-python.html:140: +* At _site/posts/new-lessons-page.html:187: 'a' tag is missing a reference -* At _site/pt/licoes/reutilizacao-codigo-modularidade-python.html:177: +* At _site/posts/new-navigation.html:107: 'a' tag is missing a reference -* At _site/pt/licoes/reutilizacao-codigo-modularidade-python.html:236: +* At _site/posts/new-navigation.html:126: 'a' tag is missing a reference -* At _site/pt/licoes/saida-dados-ficheiro-html-python.html:121: +* At _site/posts/new-navigation.html:163: 'a' tag is missing a reference -* At _site/pt/licoes/saida-dados-ficheiro-html-python.html:140: +* At _site/posts/new-navigation.html:189: 'a' tag is missing a reference -* At _site/pt/licoes/saida-dados-ficheiro-html-python.html:177: +* At _site/posts/newsletter-april-21.html:107: 'a' tag is missing a reference -* At _site/pt/licoes/saida-dados-ficheiro-html-python.html:236: +* At _site/posts/newsletter-april-21.html:126: 'a' tag is missing a reference -* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:119: +* At _site/posts/newsletter-april-21.html:163: 'a' tag is missing a reference -* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:138: +* At _site/posts/newsletter-april-21.html:189: 'a' tag is missing a reference -* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:175: +* At _site/posts/newsletter-june20.html:107: 'a' tag is missing a reference -* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:234: +* At _site/posts/newsletter-june20.html:126: 'a' tag is missing a reference -* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:560: +* At _site/posts/newsletter-june20.html:163: - http://www.digitalhumanities.org/dhq/vol/5/1/000091/000091.html is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:564: +* At _site/posts/newsletter-june20.html:189: - http://blog.taracopplestone.co.uk/making-things-photobashing-as-archaeological-remediation/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:564: +* At _site/posts/newsletter-march20.html:108: - http://www.samplereality.com/2012/05/02/notes-towards-a-deformed-humanities/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:564: +* At _site/posts/newsletter-march20.html:127: - http://nowviskie.org/2013/resistance-in-the-materials/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:564: +* At _site/posts/newsletter-march20.html:164: - http://nooart.org/post/73353953758/temkin-glitchhumancomputerinteraction is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:576: +* At _site/posts/newsletter-march20.html:190: - http://musicalgorithms.org/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:578: +* At _site/posts/newsletter-oct20.html:105: - http://sonic-pi.net/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:591: +* At _site/posts/newsletter-oct20.html:124: - http://www.icad.org/Proceedings/2008/Hermann2008.pdf is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:602: +* At _site/posts/newsletter-oct20.html:161: - http://waxy.org/2015/12/if_drake_was_born_a_piano/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:606: +* At _site/posts/newsletter-oct20.html:187: - http://www.icad.org/Proceedings/2008/Hermann2008.pdf is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:614: +* At _site/posts/newsletter-year20.html:107: - http://musicalgorithms.org/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:614: +* At _site/posts/newsletter-year20.html:126: - http://musicalgorithms.org/3.0/index.html is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:672: +* At _site/posts/newsletter-year20.html:163: - http://www.lynda.com/GarageBand-tutorials/Importing-audio-tracks/156620/164050-4.html is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:710: +* At _site/posts/newsletter-year20.html:189: - http://musicalgorithms.org/3.0/index.html is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:755: +* At _site/posts/newsletter.html:107: - http://www.ethanhein.com/wp/2010/scales-and-emotions/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:820: +* At _site/posts/newsletter.html:126: - http://docs.python-guide.org/en/latest/starting/install/win/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:824: +* At _site/posts/newsletter.html:163: - http://www.howtogeek.com/235101/10-ways-to-open-the-command-prompt-in-windows-10/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:882: +* At _site/posts/newsletter.html:189: - http://www.electronics.dit.ie/staff/tscarff/Music_technology/midi/midi_note_numbers_for_octaves.html is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:884: +* At _site/posts/noticias-PH-espanol.html:106: - http://trillian.mit.edu/~jc/music/abc/ABCcontrib.html is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:887: +* At _site/posts/noticias-PH-espanol.html:125: - http://themacroscope.org is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:1039: +* At _site/posts/noticias-PH-espanol.html:162: - http://sonic-pi.net is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:1045: +* At _site/posts/noticias-PH-espanol.html:188: - http://puffin.creighton.edu/jesuit/relations/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:1105: +* At _site/posts/ph-is-people.html:107: - http://library.gwu.edu/scholarly-technology-group/posts/sound-library-work is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:1109: +* At _site/posts/ph-is-people.html:126: - http://www.lilypond.org/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:1113: +* At _site/posts/ph-is-people.html:163: - http://www.trevorowens.org/2012/11/discovery-and-justification-are-different-notes-on-sciencing-the-humanities/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:1118: +* At _site/posts/ph-is-people.html:189: 'a' tag is missing a reference -* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:1119: +* At _site/posts/plan-s.html:107: 'a' tag is missing a reference -* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:1120: +* At _site/posts/plan-s.html:126: 'a' tag is missing a reference -* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:1121: +* At _site/posts/plan-s.html:163: 'a' tag is missing a reference -* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:1122: +* At _site/posts/plan-s.html:189: 'a' tag is missing a reference -* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:1123: +* At _site/posts/planning-a-lesson.html:105: 'a' tag is missing a reference -* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:1124: +* At _site/posts/planning-a-lesson.html:124: 'a' tag is missing a reference -* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:1128: +* At _site/posts/planning-a-lesson.html:161: 'a' tag is missing a reference -* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:1130: +* At _site/posts/planning-a-lesson.html:187: 'a' tag is missing a reference -* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:1130: +* At _site/posts/premio-hdh-2018.html:106: - http://www.digitalhumanities.org/dhq/vol/5/1/000091/000091.html is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:1132: +* At _site/posts/premio-hdh-2018.html:125: 'a' tag is missing a reference -* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:1132: +* At _site/posts/premio-hdh-2018.html:162: - http://www.jstor.org/stable/734136 is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:1134: +* At _site/posts/premio-hdh-2018.html:188: 'a' tag is missing a reference -* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:1134: +* At _site/posts/presentando-al-nuevo-equipo-de-editores-de-contenidos-en-espanol.html:107: - http://www.icad.org/Proceedings/2008/Hermann2008.pdf is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:1136: +* At _site/posts/presentando-al-nuevo-equipo-de-editores-de-contenidos-en-espanol.html:126: 'a' tag is missing a reference -* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:1136: +* At _site/posts/presentando-al-nuevo-equipo-de-editores-de-contenidos-en-espanol.html:163: - http://motherboard.vice.com/read/the-strange-acoustic-phenomenon-behind-these-wacked-out-versions-of-pop-songs is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:1138: +* At _site/posts/presentando-al-nuevo-equipo-de-editores-de-contenidos-en-espanol.html:189: 'a' tag is missing a reference -* At _site/pt/licoes/sumarizacao-narrativas-web-python.html:119: +* At _site/posts/proghist-trustee-advert.html:107: 'a' tag is missing a reference -* At _site/pt/licoes/sumarizacao-narrativas-web-python.html:138: +* At _site/posts/proghist-trustee-advert.html:126: 'a' tag is missing a reference -* At _site/pt/licoes/sumarizacao-narrativas-web-python.html:175: +* At _site/posts/proghist-trustee-advert.html:163: 'a' tag is missing a reference -* At _site/pt/licoes/sumarizacao-narrativas-web-python.html:234: +* At _site/posts/proghist-trustee-advert.html:189: 'a' tag is missing a reference -* At _site/pt/licoes/sumarizacao-narrativas-web-python.html:529: +* At _site/posts/programming-historian-community-survey.html:105: - http://arquivo.pt is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/sumarizacao-narrativas-web-python.html:822: +* At _site/posts/programming-historian-community-survey.html:124: - http://yake.inesctec.pt is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/sumarizacao-narrativas-web-python.html:978: +* At _site/posts/programming-historian-community-survey.html:161: - http://ecir2019.org/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/trabalhando-ficheiros-texto-python.html:121: +* At _site/posts/programming-historian-community-survey.html:187: 'a' tag is missing a reference -* At _site/pt/licoes/trabalhando-ficheiros-texto-python.html:140: +* At _site/posts/programming-historian-india.html:106: 'a' tag is missing a reference -* At _site/pt/licoes/trabalhando-ficheiros-texto-python.html:177: +* At _site/posts/programming-historian-india.html:125: 'a' tag is missing a reference -* At _site/pt/licoes/trabalhando-ficheiros-texto-python.html:236: +* At _site/posts/programming-historian-india.html:162: 'a' tag is missing a reference -* At _site/pt/licoes/trabalhando-ficheiros-texto-python.html:603: +* At _site/posts/programming-historian-india.html:188: - http://docs.python.org/release/2.5.4/ref/keywords.html is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/transcricao-automatica-grafias-nao-latinas.html:135: +* At _site/posts/programming-historian-live-london.html:107: 'a' tag is missing a reference -* At _site/pt/licoes/transcricao-automatica-grafias-nao-latinas.html:154: +* At _site/posts/programming-historian-live-london.html:126: 'a' tag is missing a reference -* At _site/pt/licoes/transcricao-automatica-grafias-nao-latinas.html:191: +* At _site/posts/programming-historian-live-london.html:163: 'a' tag is missing a reference -* At _site/pt/licoes/transcricao-automatica-grafias-nao-latinas.html:250: +* At _site/posts/programming-historian-live-london.html:189: 'a' tag is missing a reference -* At _site/pt/licoes/transcricao-automatica-grafias-nao-latinas.html:2193: +* At _site/posts/promoting-digital-archives.html:105: - http://patristica.net/graeca is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/transcricao-automatica-grafias-nao-latinas.html:2193: +* At _site/posts/promoting-digital-archives.html:124: - http://stephanus.tlg.uci.edu is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/transcricao-automatica-grafias-nao-latinas.html:2304: +* At _site/posts/promoting-digital-archives.html:161: - http://doi.org/10.30687/arm/9372-8175/2022/01/005 is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/licoes/visualizacao-animacao-tabelas-historicas-R.html:119: +* At _site/posts/promoting-digital-archives.html:187: 'a' tag is missing a reference -* At _site/pt/licoes/visualizacao-animacao-tabelas-historicas-R.html:138: +* At _site/posts/reintroducing-the-ph-blog.html:105: 'a' tag is missing a reference -* At _site/pt/licoes/visualizacao-animacao-tabelas-historicas-R.html:175: +* At _site/posts/reintroducing-the-ph-blog.html:124: 'a' tag is missing a reference -* At _site/pt/licoes/visualizacao-animacao-tabelas-historicas-R.html:234: +* At _site/posts/reintroducing-the-ph-blog.html:161: 'a' tag is missing a reference -* At _site/pt/licoes/visualizacao-basica-dados-tabulares-r.html:117: +* At _site/posts/reintroducing-the-ph-blog.html:187: 'a' tag is missing a reference -* At _site/pt/licoes/visualizacao-basica-dados-tabulares-r.html:136: +* At _site/posts/retirement-and-sustainability-policies.html:105: 'a' tag is missing a reference -* At _site/pt/licoes/visualizacao-basica-dados-tabulares-r.html:173: +* At _site/posts/retirement-and-sustainability-policies.html:124: 'a' tag is missing a reference -* At _site/pt/licoes/visualizacao-basica-dados-tabulares-r.html:232: +* At _site/posts/retirement-and-sustainability-policies.html:161: 'a' tag is missing a reference -* At _site/pt/pesquisa.html:88: +* At _site/posts/retirement-and-sustainability-policies.html:187: 'a' tag is missing a reference -* At _site/pt/pesquisa.html:107: +* At _site/posts/riva-quiroga-joshua-ortiz.html:106: 'a' tag is missing a reference -* At _site/pt/pesquisa.html:144: +* At _site/posts/riva-quiroga-joshua-ortiz.html:125: 'a' tag is missing a reference -* At _site/pt/pesquisa.html:203: +* At _site/posts/riva-quiroga-joshua-ortiz.html:162: 'a' tag is missing a reference -* At _site/pt/pesquisa.html:253: +* At _site/posts/riva-quiroga-joshua-ortiz.html:188: - http://niche-canada.org/wp-content/uploads/2013/09/programming-historian-1.pdf is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/pesquisa.html:259: +* At _site/posts/roundup2017a.html:108: - http://www.diva-portal.org/smash/record.jsf?pid=diva2%3A1508542&dswid=7551 is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/pesquisa.html:263: +* At _site/posts/roundup2017a.html:127: - http://jah.oxfordjournals.org/content/103/1/299.2.full is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/pesquisa.html:264: +* At _site/posts/roundup2017a.html:164: - http://jitp.commons.gc.cuny.edu/review-of-the-programming-historian/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/pesquisa.html:276: +* At _site/posts/roundup2017a.html:190: - http://www.digitalhumanities.org/dhq/vol/16/2/000585/000585.html is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/pesquisa.html:282: +* At _site/posts/september-newsletter.html:107: - http://www.tandfonline.com/doi/full/10.1080/13555502.2017.1301179 is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/pesquisa.html:284: +* At _site/posts/september-newsletter.html:126: - http://web.archive.org/web/20180713014622/http://dhcommons.org/journal/issue-1/editorial-sustainability-and-open-peer-review-programming-historian is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/pesquisa.html:285: +* At _site/posts/september-newsletter.html:163: - http://www.themacroscope.org/2.0/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/pesquisa.html:306: +* At _site/posts/september-newsletter.html:189: - http://www.iea.usp.br/eventos/historia-digital-educacao-caminhos-cruzados is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/pesquisa.html:390: +* At _site/posts/sonic-word-clouds.html:105: - http://ixa2.si.ehu.eus/intele/?q=webinars is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/pesquisa.html:393: +* At _site/posts/sonic-word-clouds.html:124: - http://walshbr.com/blog/the-programming-historian-and-editorial-process-in-digital-publishing/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/pesquisa.html:406: +* At _site/posts/sonic-word-clouds.html:161: - http://fredgibbs.net/assets/images/ph-poster/final-board.png is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/pesquisa.html:419: +* At _site/posts/sonic-word-clouds.html:187: - http://niche-canada.org/2018/03/23/a-decade-of-programming-historians/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/pesquisa.html:420: +* At _site/posts/spanish-editor.html:105: - http://fredgibbs.net/posts/reflections-former-PH-editor is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/pesquisa.html:421: +* At _site/posts/spanish-editor.html:124: - http://clionauta.hypotheses.org/16979 is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/pesquisa.html:423: +* At _site/posts/spanish-editor.html:161: - http://humanidadesdigitales.net/blog/2017/03/17/the-programming-historian-en-espanol/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/politica-de-privacidade.html:88: +* At _site/posts/spanish-editor.html:187: 'a' tag is missing a reference -* At _site/pt/politica-de-privacidade.html:107: +* At _site/posts/spanish-editor.html:277: 'a' tag is missing a reference -* At _site/pt/politica-de-privacidade.html:144: +* At _site/posts/subject-specialist-editor.html:106: 'a' tag is missing a reference -* At _site/pt/politica-de-privacidade.html:203: +* At _site/posts/subject-specialist-editor.html:125: 'a' tag is missing a reference -* At _site/pt/ppi.html:88: +* At _site/posts/subject-specialist-editor.html:162: 'a' tag is missing a reference -* At _site/pt/ppi.html:107: +* At _site/posts/subject-specialist-editor.html:188: 'a' tag is missing a reference -* At _site/pt/ppi.html:144: +* At _site/posts/twenty-sixteen-review.html:107: 'a' tag is missing a reference -* At _site/pt/ppi.html:203: +* At _site/posts/twenty-sixteen-review.html:126: 'a' tag is missing a reference -* At _site/pt/reportar-um-erro.html:88: +* At _site/posts/twenty-sixteen-review.html:163: 'a' tag is missing a reference -* At _site/pt/reportar-um-erro.html:107: +* At _site/posts/twenty-sixteen-review.html:189: 'a' tag is missing a reference -* At _site/pt/reportar-um-erro.html:144: +* At _site/posts/two-new-PH-editors.html:106: 'a' tag is missing a reference -* At _site/pt/reportar-um-erro.html:203: +* At _site/posts/two-new-PH-editors.html:125: 'a' tag is missing a reference -* At _site/pt/sobre.html:88: +* At _site/posts/two-new-PH-editors.html:162: 'a' tag is missing a reference -* At _site/pt/sobre.html:107: +* At _site/posts/two-new-PH-editors.html:188: 'a' tag is missing a reference -* At _site/pt/sobre.html:144: +* At _site/posts/ucl-placement-2021.html:107: 'a' tag is missing a reference -* At _site/pt/sobre.html:203: +* At _site/posts/ucl-placement-2021.html:126: 'a' tag is missing a reference -* At _site/pt/sobre.html:266: +* At _site/posts/ucl-placement-2021.html:163: - http://dhawards.org/dhawards2016/results/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/sobre.html:266: +* At _site/posts/ucl-placement-2021.html:189: - http://humanidadesdigitaleshispanicas.es/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/sobre.html:266: +* At _site/posts/vote-dh-award.html:107: - http://dhawards.org/dhawards2022/results/ is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/sobre.html:280: +* At _site/posts/vote-dh-award.html:126: - http://digitalhistoryhacks.blogspot.com/2008/01/programming-historian.html is not an HTTPS link + 'a' tag is missing a reference -* At _site/pt/vagas.html:88: +* At _site/posts/vote-dh-award.html:163: 'a' tag is missing a reference -* At _site/pt/vagas.html:107: +* At _site/posts/vote-dh-award.html:189: 'a' tag is missing a reference -* At _site/pt/vagas.html:144: +* At _site/posts/welcome-martin-grandjean.html:107: 'a' tag is missing a reference -* At _site/pt/vagas.html:203: +* At _site/posts/welcome-martin-grandjean.html:126: 'a' tag is missing a reference -* At _site/translation-concordance.html:86: +* At _site/posts/welcome-martin-grandjean.html:163: 'a' tag is missing a reference -* At _site/translation-concordance.html:105: +* At _site/posts/welcome-martin-grandjean.html:189: 'a' tag is missing a reference -* At _site/translation-concordance.html:142: +* At _site/posts/welcome-mc-boucher.html:105: 'a' tag is missing a reference -* At _site/translation-concordance.html:168: +* At _site/posts/welcome-mc-boucher.html:124: 'a' tag is missing a reference -* At _site/translation-concordance.html:267: +* At _site/posts/welcome-mc-boucher.html:161: 'a' tag is missing a reference -* At _site/translation-concordance.html:270: +* At _site/posts/welcome-mc-boucher.html:187: 'a' tag is missing a reference -* At _site/translation-concordance.html:273: +* At _site/posts/welcome-ph-fr.html:105: 'a' tag is missing a reference -* At _site/translation-concordance.html:286: +* At _site/posts/welcome-ph-fr.html:124: 'a' tag is missing a reference -* At _site/translation-concordance.html:289: +* At _site/posts/welcome-ph-fr.html:161: 'a' tag is missing a reference -* At _site/translation-concordance.html:292: +* At _site/posts/welcome-ph-fr.html:187: 'a' tag is missing a reference -* At _site/translation-concordance.html:308: +* At _site/posts/welcome-to-ph2.html:107: 'a' tag is missing a reference -* At _site/translation-concordance.html:321: +* At _site/posts/welcome-to-ph2.html:126: 'a' tag is missing a reference -* At _site/translation-concordance.html:327: +* At _site/posts/welcome-to-ph2.html:163: 'a' tag is missing a reference -* At _site/translation-concordance.html:330: +* At _site/posts/welcome-to-ph2.html:189: 'a' tag is missing a reference -* At _site/translation-concordance.html:340: +* At _site/posts/welcome-zoe-leblanc.html:108: 'a' tag is missing a reference -* At _site/translation-concordance.html:346: +* At _site/posts/welcome-zoe-leblanc.html:127: 'a' tag is missing a reference -* At _site/translation-concordance.html:349: +* At _site/posts/welcome-zoe-leblanc.html:164: 'a' tag is missing a reference -* At _site/translation-concordance.html:359: +* At _site/posts/welcome-zoe-leblanc.html:190: 'a' tag is missing a reference -* At _site/translation-concordance.html:366: +* At _site/pt/apoiadores.html:88: 'a' tag is missing a reference -* At _site/translation-concordance.html:369: +* At _site/pt/apoiadores.html:107: 'a' tag is missing a reference -* At _site/translation-concordance.html:382: +* At _site/pt/apoiadores.html:144: 'a' tag is missing a reference -* At _site/translation-concordance.html:388: +* At _site/pt/apoiadores.html:203: 'a' tag is missing a reference -* At _site/translation-concordance.html:401: +* At _site/pt/contribua.html:88: 'a' tag is missing a reference -* At _site/translation-concordance.html:404: +* At _site/pt/contribua.html:107: 'a' tag is missing a reference -* At _site/translation-concordance.html:407: +* At _site/pt/contribua.html:144: 'a' tag is missing a reference -* At _site/translation-concordance.html:420: +* At _site/pt/contribua.html:203: 'a' tag is missing a reference -* At _site/translation-concordance.html:423: +* At _site/pt/directrizes-autor.html:88: 'a' tag is missing a reference -* At _site/translation-concordance.html:426: +* At _site/pt/directrizes-autor.html:107: 'a' tag is missing a reference -* At _site/translation-concordance.html:442: +* At _site/pt/directrizes-autor.html:144: 'a' tag is missing a reference -* At _site/translation-concordance.html:462: +* At _site/pt/directrizes-autor.html:203: 'a' tag is missing a reference -* At _site/translation-concordance.html:479: +* At _site/pt/directrizes-editor.html:88: 'a' tag is missing a reference -* At _site/translation-concordance.html:482: +* At _site/pt/directrizes-editor.html:107: 'a' tag is missing a reference -* At _site/translation-concordance.html:485: +* At _site/pt/directrizes-editor.html:144: 'a' tag is missing a reference -* At _site/translation-concordance.html:501: +* At _site/pt/directrizes-editor.html:203: 'a' tag is missing a reference -* At _site/translation-concordance.html:504: +* At _site/pt/directrizes-revisor.html:88: 'a' tag is missing a reference -* At _site/translation-concordance.html:517: +* At _site/pt/directrizes-revisor.html:107: 'a' tag is missing a reference -* At _site/translation-concordance.html:523: +* At _site/pt/directrizes-revisor.html:144: 'a' tag is missing a reference -* At _site/translation-concordance.html:555: +* At _site/pt/directrizes-revisor.html:203: 'a' tag is missing a reference -* At _site/translation-concordance.html:558: +* At _site/pt/directrizes-tradutor.html:88: 'a' tag is missing a reference -* At _site/translation-concordance.html:561: +* At _site/pt/directrizes-tradutor.html:107: 'a' tag is missing a reference -* At _site/translation-concordance.html:574: +* At _site/pt/directrizes-tradutor.html:144: 'a' tag is missing a reference -* At _site/translation-concordance.html:577: +* At _site/pt/directrizes-tradutor.html:203: 'a' tag is missing a reference -* At _site/translation-concordance.html:580: +* At _site/pt/doacoes.html:88: 'a' tag is missing a reference -* At _site/translation-concordance.html:593: +* At _site/pt/doacoes.html:107: 'a' tag is missing a reference -* At _site/translation-concordance.html:596: +* At _site/pt/doacoes.html:144: 'a' tag is missing a reference -* At _site/translation-concordance.html:631: +* At _site/pt/doacoes.html:203: 'a' tag is missing a reference -* At _site/translation-concordance.html:634: +* At _site/pt/equipe.html:88: 'a' tag is missing a reference -* At _site/translation-concordance.html:637: +* At _site/pt/equipe.html:107: 'a' tag is missing a reference -* At _site/translation-concordance.html:650: +* At _site/pt/equipe.html:144: 'a' tag is missing a reference -* At _site/translation-concordance.html:653: +* At _site/pt/equipe.html:203: 'a' tag is missing a reference -* At _site/translation-concordance.html:656: +* At _site/pt/eventos.html:88: 'a' tag is missing a reference -* At _site/translation-concordance.html:669: +* At _site/pt/eventos.html:107: 'a' tag is missing a reference -* At _site/translation-concordance.html:672: +* At _site/pt/eventos.html:144: 'a' tag is missing a reference -* At _site/translation-concordance.html:675: +* At _site/pt/eventos.html:203: 'a' tag is missing a reference -* At _site/translation-concordance.html:688: +* At _site/pt/index.html:89: 'a' tag is missing a reference -* At _site/translation-concordance.html:691: +* At _site/pt/index.html:108: 'a' tag is missing a reference -* At _site/translation-concordance.html:694: +* At _site/pt/index.html:145: 'a' tag is missing a reference -* At _site/translation-concordance.html:704: +* At _site/pt/index.html:204: 'a' tag is missing a reference -* At _site/translation-concordance.html:707: +* At _site/pt/jisc-tna-parceria.html:88: 'a' tag is missing a reference -* At _site/translation-concordance.html:713: +* At _site/pt/jisc-tna-parceria.html:107: 'a' tag is missing a reference -* At _site/translation-concordance.html:723: +* At _site/pt/jisc-tna-parceria.html:144: 'a' tag is missing a reference -* At _site/translation-concordance.html:730: +* At _site/pt/jisc-tna-parceria.html:203: 'a' tag is missing a reference -* At _site/translation-concordance.html:733: +* At _site/pt/licoes-politica-remocao.html:88: 'a' tag is missing a reference -* At _site/translation-concordance.html:752: +* At _site/pt/licoes-politica-remocao.html:107: 'a' tag is missing a reference -* At _site/translation-concordance.html:765: +* At _site/pt/licoes-politica-remocao.html:144: 'a' tag is missing a reference -* At _site/translation-concordance.html:768: +* At _site/pt/licoes-politica-remocao.html:203: 'a' tag is missing a reference -* At _site/translation-concordance.html:771: +* At _site/pt/licoes/HTML-lista-palavras-1.html:121: 'a' tag is missing a reference -* At _site/translation-concordance.html:781: +* At _site/pt/licoes/HTML-lista-palavras-1.html:140: 'a' tag is missing a reference -* At _site/translation-concordance.html:788: +* At _site/pt/licoes/HTML-lista-palavras-1.html:177: 'a' tag is missing a reference -* At _site/translation-concordance.html:791: +* At _site/pt/licoes/HTML-lista-palavras-1.html:236: 'a' tag is missing a reference -* At _site/translation-concordance.html:809: +* At _site/pt/licoes/HTML-lista-palavras-2.html:121: 'a' tag is missing a reference -* At _site/translation-concordance.html:825: +* At _site/pt/licoes/HTML-lista-palavras-2.html:140: 'a' tag is missing a reference -* At _site/translation-concordance.html:828: +* At _site/pt/licoes/HTML-lista-palavras-2.html:177: 'a' tag is missing a reference -* At _site/translation-concordance.html:831: +* At _site/pt/licoes/HTML-lista-palavras-2.html:236: 'a' tag is missing a reference -* At _site/translation-concordance.html:866: +* At _site/pt/licoes/algoritmos-agrupamento-scikit-learn-python.html:135: 'a' tag is missing a reference -* At _site/translation-concordance.html:885: +* At _site/pt/licoes/algoritmos-agrupamento-scikit-learn-python.html:154: 'a' tag is missing a reference -* At _site/translation-concordance.html:902: +* At _site/pt/licoes/algoritmos-agrupamento-scikit-learn-python.html:191: 'a' tag is missing a reference -* At _site/translation-concordance.html:905: +* At _site/pt/licoes/algoritmos-agrupamento-scikit-learn-python.html:250: 'a' tag is missing a reference -* At _site/translation-concordance.html:908: +* At _site/pt/licoes/analise-correspondencia-pesquisa-historica-R.html:135: 'a' tag is missing a reference -* At _site/translation-concordance.html:921: +* At _site/pt/licoes/analise-correspondencia-pesquisa-historica-R.html:154: 'a' tag is missing a reference -* At _site/translation-concordance.html:924: +* At _site/pt/licoes/analise-correspondencia-pesquisa-historica-R.html:191: 'a' tag is missing a reference -* At _site/translation-concordance.html:927: +* At _site/pt/licoes/analise-correspondencia-pesquisa-historica-R.html:250: 'a' tag is missing a reference -* At _site/translation-concordance.html:940: +* At _site/pt/licoes/analise-sentimento-R-syuzhet.html:119: 'a' tag is missing a reference -* At _site/translation-concordance.html:943: +* At _site/pt/licoes/analise-sentimento-R-syuzhet.html:138: 'a' tag is missing a reference -* At _site/translation-concordance.html:946: +* At _site/pt/licoes/analise-sentimento-R-syuzhet.html:175: 'a' tag is missing a reference -* At _site/translation-concordance.html:962: +* At _site/pt/licoes/analise-sentimento-R-syuzhet.html:234: 'a' tag is missing a reference -* At _site/translation-concordance.html:965: +* At _site/pt/licoes/analise-sentimento-exploracao-dados.html:119: 'a' tag is missing a reference -* At _site/translation-concordance.html:978: +* At _site/pt/licoes/analise-sentimento-exploracao-dados.html:138: 'a' tag is missing a reference -* At _site/translation-concordance.html:981: +* At _site/pt/licoes/analise-sentimento-exploracao-dados.html:175: 'a' tag is missing a reference -* At _site/translation-concordance.html:984: +* At _site/pt/licoes/analise-sentimento-exploracao-dados.html:234: 'a' tag is missing a reference -* At _site/translation-concordance.html:997: +* At _site/pt/licoes/aplicacao-web-interativa-r-shiny-leaflet.html:119: 'a' tag is missing a reference -* At _site/translation-concordance.html:1000: +* At _site/pt/licoes/aplicacao-web-interativa-r-shiny-leaflet.html:138: 'a' tag is missing a reference -* At _site/translation-concordance.html:1003: +* At _site/pt/licoes/aplicacao-web-interativa-r-shiny-leaflet.html:175: 'a' tag is missing a reference -* At _site/translation-concordance.html:1019: +* At _site/pt/licoes/aplicacao-web-interativa-r-shiny-leaflet.html:234: 'a' tag is missing a reference -* At _site/translation-concordance.html:1022: +* At _site/pt/licoes/autoria-sustentavel-texto-simples-pandoc-markdown.html:121: 'a' tag is missing a reference -* At _site/translation-concordance.html:1051: +* At _site/pt/licoes/autoria-sustentavel-texto-simples-pandoc-markdown.html:140: 'a' tag is missing a reference -* At _site/translation-concordance.html:1057: +* At _site/pt/licoes/autoria-sustentavel-texto-simples-pandoc-markdown.html:177: 'a' tag is missing a reference -* At _site/translation-concordance.html:1060: +* At _site/pt/licoes/autoria-sustentavel-texto-simples-pandoc-markdown.html:236: 'a' tag is missing a reference -* At _site/translation-concordance.html:1073: +* At _site/pt/licoes/camadas-vetoriais-qgis.html:123: 'a' tag is missing a reference -* At _site/translation-concordance.html:1076: +* At _site/pt/licoes/camadas-vetoriais-qgis.html:142: 'a' tag is missing a reference -* At _site/translation-concordance.html:1079: +* At _site/pt/licoes/camadas-vetoriais-qgis.html:179: 'a' tag is missing a reference -* At _site/translation-concordance.html:1092: +* At _site/pt/licoes/camadas-vetoriais-qgis.html:238: 'a' tag is missing a reference -* At _site/translation-concordance.html:1095: +* At _site/pt/licoes/contagem-mineracao-dados-investigacao-unix.html:121: 'a' tag is missing a reference -* At _site/translation-concordance.html:1098: +* At _site/pt/licoes/contagem-mineracao-dados-investigacao-unix.html:140: 'a' tag is missing a reference -* At _site/translation-concordance.html:1111: +* At _site/pt/licoes/contagem-mineracao-dados-investigacao-unix.html:177: 'a' tag is missing a reference -* At _site/translation-concordance.html:1117: +* At _site/pt/licoes/contagem-mineracao-dados-investigacao-unix.html:236: 'a' tag is missing a reference -* At _site/translation-concordance.html:1130: +* At _site/pt/licoes/contar-frequencias-palavras-python.html:121: 'a' tag is missing a reference -* At _site/translation-concordance.html:1133: +* At _site/pt/licoes/contar-frequencias-palavras-python.html:140: 'a' tag is missing a reference -* At _site/translation-concordance.html:1136: +* At _site/pt/licoes/contar-frequencias-palavras-python.html:177: 'a' tag is missing a reference -* At _site/translation-concordance.html:1152: +* At _site/pt/licoes/contar-frequencias-palavras-python.html:236: 'a' tag is missing a reference -* At _site/translation-concordance.html:1171: +* At _site/pt/licoes/criacao-visualizacao-ficheiros-html-python.html:121: 'a' tag is missing a reference -* At _site/translation-concordance.html:1174: +* At _site/pt/licoes/criacao-visualizacao-ficheiros-html-python.html:140: 'a' tag is missing a reference -* At _site/translation-concordance.html:1184: +* At _site/pt/licoes/criacao-visualizacao-ficheiros-html-python.html:177: 'a' tag is missing a reference -* At _site/translation-concordance.html:1190: +* At _site/pt/licoes/criacao-visualizacao-ficheiros-html-python.html:236: 'a' tag is missing a reference -* At _site/translation-concordance.html:1193: +* At _site/pt/licoes/criar-exposicao-omeka.html:121: 'a' tag is missing a reference -* At _site/translation-concordance.html:1207: +* At _site/pt/licoes/criar-exposicao-omeka.html:140: 'a' tag is missing a reference -* At _site/translation-concordance.html:1226: +* At _site/pt/licoes/criar-exposicao-omeka.html:177: 'a' tag is missing a reference -* At _site/translation-concordance.html:1229: +* At _site/pt/licoes/criar-exposicao-omeka.html:236: 'a' tag is missing a reference -* At _site/translation-concordance.html:1245: +* At _site/pt/licoes/download-automatico-wget.html:119: 'a' tag is missing a reference -* At _site/translation-concordance.html:1248: +* At _site/pt/licoes/download-automatico-wget.html:138: 'a' tag is missing a reference -* At _site/translation-concordance.html:1264: +* At _site/pt/licoes/download-automatico-wget.html:175: 'a' tag is missing a reference -* At _site/translation-concordance.html:1267: +* At _site/pt/licoes/download-automatico-wget.html:234: 'a' tag is missing a reference -* At _site/translation-concordance.html:1270: +* At _site/pt/licoes/download-multiplos-registros-query-strings.html:119: 'a' tag is missing a reference -* At _site/translation-concordance.html:1283: +* At _site/pt/licoes/download-multiplos-registros-query-strings.html:138: 'a' tag is missing a reference -* At _site/translation-concordance.html:1286: +* At _site/pt/licoes/download-multiplos-registros-query-strings.html:175: 'a' tag is missing a reference -* At _site/translation-concordance.html:1289: +* At _site/pt/licoes/download-multiplos-registros-query-strings.html:234: 'a' tag is missing a reference -* At _site/translation-concordance.html:1302: +* At _site/pt/licoes/download-paginas-web-python.html:121: 'a' tag is missing a reference -* At _site/translation-concordance.html:1305: +* At _site/pt/licoes/download-paginas-web-python.html:140: 'a' tag is missing a reference -* At _site/translation-concordance.html:1308: +* At _site/pt/licoes/download-paginas-web-python.html:177: 'a' tag is missing a reference -* At _site/translation-concordance.html:1356: +* At _site/pt/licoes/download-paginas-web-python.html:236: 'a' tag is missing a reference -* At _site/translation-concordance.html:1362: +* At _site/pt/licoes/explorar-analisar-dados-rede-python.html:125: 'a' tag is missing a reference -* At _site/translation-concordance.html:1365: +* At _site/pt/licoes/explorar-analisar-dados-rede-python.html:144: 'a' tag is missing a reference -* At _site/translation-concordance.html:1378: +* At _site/pt/licoes/explorar-analisar-dados-rede-python.html:181: 'a' tag is missing a reference -* At _site/translation-concordance.html:1384: +* At _site/pt/licoes/explorar-analisar-dados-rede-python.html:240: 'a' tag is missing a reference -* At _site/translation-concordance.html:1397: +* At _site/pt/licoes/extrair-paginas-ilustradas-com-python.html:119: 'a' tag is missing a reference -* At _site/translation-concordance.html:1400: +* At _site/pt/licoes/extrair-paginas-ilustradas-com-python.html:138: 'a' tag is missing a reference -* At _site/translation-concordance.html:1416: +* At _site/pt/licoes/extrair-paginas-ilustradas-com-python.html:175: 'a' tag is missing a reference -* At _site/translation-concordance.html:1419: +* At _site/pt/licoes/extrair-paginas-ilustradas-com-python.html:234: 'a' tag is missing a reference -* At _site/translation-concordance.html:1422: +* At _site/pt/licoes/extrair-palavras-chave.html:119: 'a' tag is missing a reference -* At _site/translation-concordance.html:1439: +* At _site/pt/licoes/extrair-palavras-chave.html:138: 'a' tag is missing a reference -* At _site/translation-concordance.html:1452: +* At _site/pt/licoes/extrair-palavras-chave.html:175: 'a' tag is missing a reference -* At _site/translation-concordance.html:1458: +* At _site/pt/licoes/extrair-palavras-chave.html:234: 'a' tag is missing a reference -* At _site/translation-concordance.html:1461: +* At _site/pt/licoes/geocodificando-qgis.html:119: 'a' tag is missing a reference -* At _site/translation-concordance.html:1475: +* At _site/pt/licoes/geocodificando-qgis.html:138: 'a' tag is missing a reference -* At _site/translation-concordance.html:1478: +* At _site/pt/licoes/geocodificando-qgis.html:175: 'a' tag is missing a reference -* At _site/translation-concordance.html:1481: +* At _site/pt/licoes/geocodificando-qgis.html:234: 'a' tag is missing a reference -* At _site/translation-concordance.html:1497: +* At _site/pt/licoes/geocodificando-qgis.html:599: - 'a' tag is missing a reference + [url](https://www.oracle.com/java/technologies/downloads/#java8) is an invalid URL -* At _site/translation-concordance.html:1500: +* At _site/pt/licoes/georreferenciamento-qgis.html:123: 'a' tag is missing a reference -* At _site/translation-concordance.html:1533: +* At _site/pt/licoes/georreferenciamento-qgis.html:142: 'a' tag is missing a reference -* At _site/translation-concordance.html:1536: +* At _site/pt/licoes/georreferenciamento-qgis.html:179: 'a' tag is missing a reference -* At _site/translation-concordance.html:1549: +* At _site/pt/licoes/georreferenciamento-qgis.html:238: 'a' tag is missing a reference -* At _site/translation-concordance.html:1552: +* At _site/pt/licoes/git-ferramenta-metodologica-projetos-historia-1.html:117: 'a' tag is missing a reference -* At _site/translation-concordance.html:1555: +* At _site/pt/licoes/git-ferramenta-metodologica-projetos-historia-1.html:136: 'a' tag is missing a reference -* At _site/translation-concordance.html:1575: +* At _site/pt/licoes/git-ferramenta-metodologica-projetos-historia-1.html:173: 'a' tag is missing a reference -* At _site/translation-concordance.html:1578: +* At _site/pt/licoes/git-ferramenta-metodologica-projetos-historia-1.html:232: 'a' tag is missing a reference -* At _site/translation-concordance.html:1595: +* At _site/pt/licoes/index.html:88: 'a' tag is missing a reference -* At _site/translation-concordance.html:1598: +* At _site/pt/licoes/index.html:107: 'a' tag is missing a reference -* At _site/translation-concordance.html:1611: +* At _site/pt/licoes/index.html:144: 'a' tag is missing a reference -* At _site/translation-concordance.html:1614: +* At _site/pt/licoes/index.html:203: 'a' tag is missing a reference -* At _site/translation-concordance.html:1617: +* At _site/pt/licoes/instalacao-linux.html:121: 'a' tag is missing a reference -* At _site/translation-concordance.html:1630: +* At _site/pt/licoes/instalacao-linux.html:140: 'a' tag is missing a reference -* At _site/translation-concordance.html:1633: +* At _site/pt/licoes/instalacao-linux.html:177: 'a' tag is missing a reference -* At _site/translation-concordance.html:1636: +* At _site/pt/licoes/instalacao-linux.html:236: 'a' tag is missing a reference -* At _site/translation-concordance.html:1649: +* At _site/pt/licoes/instalacao-mac.html:121: 'a' tag is missing a reference -* At _site/translation-concordance.html:1652: +* At _site/pt/licoes/instalacao-mac.html:140: 'a' tag is missing a reference -* At _site/translation-concordance.html:1655: +* At _site/pt/licoes/instalacao-mac.html:177: 'a' tag is missing a reference -* At _site/translation-concordance.html:1687: +* At _site/pt/licoes/instalacao-mac.html:236: 'a' tag is missing a reference -* At _site/translation-concordance.html:1690: +* At _site/pt/licoes/instalacao-modulos-python-pip.html:119: 'a' tag is missing a reference -* At _site/translation-concordance.html:1693: +* At _site/pt/licoes/instalacao-modulos-python-pip.html:138: 'a' tag is missing a reference -* At _site/translation-concordance.html:1706: +* At _site/pt/licoes/instalacao-modulos-python-pip.html:175: 'a' tag is missing a reference -* At _site/translation-concordance.html:1709: +* At _site/pt/licoes/instalacao-modulos-python-pip.html:234: 'a' tag is missing a reference -* At _site/translation-concordance.html:1712: +* At _site/pt/licoes/instalacao-windows.html:121: 'a' tag is missing a reference -* At _site/translation-concordance.html:1725: +* At _site/pt/licoes/instalacao-windows.html:140: 'a' tag is missing a reference -* At _site/translation-concordance.html:1728: +* At _site/pt/licoes/instalacao-windows.html:177: 'a' tag is missing a reference -* At _site/translation-concordance.html:1731: +* At _site/pt/licoes/instalacao-windows.html:236: 'a' tag is missing a reference -* At _site/translation-concordance.html:1744: +* At _site/pt/licoes/introducao-ao-markdown.html:119: 'a' tag is missing a reference -* At _site/translation-concordance.html:1747: +* At _site/pt/licoes/introducao-ao-markdown.html:138: 'a' tag is missing a reference -* At _site/translation-concordance.html:1750: +* At _site/pt/licoes/introducao-ao-markdown.html:175: 'a' tag is missing a reference -* At _site/translation-concordance.html:1763: +* At _site/pt/licoes/introducao-ao-markdown.html:234: 'a' tag is missing a reference -* At _site/translation-concordance.html:1766: +* At _site/pt/licoes/introducao-codificacao-textos-tei-1.html:119: 'a' tag is missing a reference -* At _site/translation-concordance.html:1769: +* At _site/pt/licoes/introducao-codificacao-textos-tei-1.html:138: 'a' tag is missing a reference -* At _site/translation-concordance.html:1801: +* At _site/pt/licoes/introducao-codificacao-textos-tei-1.html:175: 'a' tag is missing a reference -* At _site/translation-concordance.html:1804: +* At _site/pt/licoes/introducao-codificacao-textos-tei-1.html:234: 'a' tag is missing a reference -* At _site/translation-concordance.html:1807: +* At _site/pt/licoes/introducao-dados-abertos-conectados.html:119: 'a' tag is missing a reference -* At _site/translation-concordance.html:1823: +* At _site/pt/licoes/introducao-dados-abertos-conectados.html:138: 'a' tag is missing a reference -* At _site/translation-concordance.html:1842: +* At _site/pt/licoes/introducao-dados-abertos-conectados.html:175: 'a' tag is missing a reference -* At _site/translation-concordance.html:1845: +* At _site/pt/licoes/introducao-dados-abertos-conectados.html:234: 'a' tag is missing a reference -* At _site/translation-concordance.html:1858: +* At _site/pt/licoes/introducao-estilometria-python.html:119: 'a' tag is missing a reference -* At _site/translation-concordance.html:1861: +* At _site/pt/licoes/introducao-estilometria-python.html:138: 'a' tag is missing a reference -* At _site/translation-concordance.html:1864: +* At _site/pt/licoes/introducao-estilometria-python.html:175: 'a' tag is missing a reference -* At _site/translation-concordance.html:1878: +* At _site/pt/licoes/introducao-estilometria-python.html:234: 'a' tag is missing a reference -* At _site/translation-concordance.html:1884: +* At _site/pt/licoes/introducao-instalacao-python.html:121: 'a' tag is missing a reference -* At _site/translation-concordance.html:1894: +* At _site/pt/licoes/introducao-instalacao-python.html:140: 'a' tag is missing a reference -* At _site/translation-concordance.html:1901: +* At _site/pt/licoes/introducao-instalacao-python.html:177: 'a' tag is missing a reference -* At _site/translation-concordance.html:1904: +* At _site/pt/licoes/introducao-instalacao-python.html:236: 'a' tag is missing a reference -* At _site/translation-concordance.html:1914: +* At _site/pt/licoes/introducao-jupyter-notebooks.html:123: 'a' tag is missing a reference -* At _site/translation-concordance.html:1920: +* At _site/pt/licoes/introducao-jupyter-notebooks.html:142: 'a' tag is missing a reference -* At _site/translation-concordance.html:1933: +* At _site/pt/licoes/introducao-jupyter-notebooks.html:179: 'a' tag is missing a reference -* At _site/translation-concordance.html:1939: +* At _site/pt/licoes/introducao-jupyter-notebooks.html:238: 'a' tag is missing a reference -* At _site/translation-concordance.html:1942: +* At _site/pt/licoes/introducao-linha-comando-bash.html:121: 'a' tag is missing a reference -* At _site/translation-concordance.html:1952: +* At _site/pt/licoes/introducao-linha-comando-bash.html:140: 'a' tag is missing a reference -* At _site/translation-concordance.html:1958: +* At _site/pt/licoes/introducao-linha-comando-bash.html:177: 'a' tag is missing a reference -* At _site/translation-concordance.html:1961: +* At _site/pt/licoes/introducao-linha-comando-bash.html:236: 'a' tag is missing a reference -* At _site/translation-concordance.html:1978: +* At _site/pt/licoes/introducao-mysql-r.html:119: 'a' tag is missing a reference -* At _site/translation-concordance.html:1981: +* At _site/pt/licoes/introducao-mysql-r.html:138: 'a' tag is missing a reference -* At _site/translation-concordance.html:1991: +* At _site/pt/licoes/introducao-mysql-r.html:175: 'a' tag is missing a reference -* At _site/translation-concordance.html:1994: +* At _site/pt/licoes/introducao-mysql-r.html:234: 'a' tag is missing a reference -* At _site/translation-concordance.html:2000: +* At _site/pt/licoes/introducao-omeka-net.html:119: 'a' tag is missing a reference -* At _site/translation-concordance.html:2035: +* At _site/pt/licoes/introducao-omeka-net.html:138: 'a' tag is missing a reference -* At _site/translation-concordance.html:2038: +* At _site/pt/licoes/introducao-omeka-net.html:175: 'a' tag is missing a reference -* At _site/translation-concordance.html:2052: +* At _site/pt/licoes/introducao-omeka-net.html:234: 'a' tag is missing a reference -* At _site/translation-concordance.html:2055: +* At _site/pt/licoes/investigar-literatura-lusofona-literateca.html:117: 'a' tag is missing a reference -* At _site/translation-concordance.html:2058: +* At _site/pt/licoes/investigar-literatura-lusofona-literateca.html:136: 'a' tag is missing a reference -* At _site/translation-concordance.html:2072: +* At _site/pt/licoes/investigar-literatura-lusofona-literateca.html:173: 'a' tag is missing a reference -* At _site/translation-concordance.html:2088: +* At _site/pt/licoes/investigar-literatura-lusofona-literateca.html:232: 'a' tag is missing a reference -* At _site/translation-concordance.html:2091: +* At _site/pt/licoes/limpar-dados-openrefine.html:123: 'a' tag is missing a reference -* At _site/translation-concordance.html:2094: +* At _site/pt/licoes/limpar-dados-openrefine.html:142: 'a' tag is missing a reference -* At _site/translation-concordance.html:2110: +* At _site/pt/licoes/limpar-dados-openrefine.html:179: 'a' tag is missing a reference -* At _site/translation-concordance.html:2113: +* At _site/pt/licoes/limpar-dados-openrefine.html:238: 'a' tag is missing a reference -* At _site/translation-concordance.html:2116: +* At _site/pt/licoes/manipulacao-transformacao-dados-r.html:119: 'a' tag is missing a reference -* At _site/translation-concordance.html:2129: +* At _site/pt/licoes/manipulacao-transformacao-dados-r.html:138: 'a' tag is missing a reference -* At _site/translation-concordance.html:2151: +* At _site/pt/licoes/manipulacao-transformacao-dados-r.html:175: 'a' tag is missing a reference -* At _site/translation-concordance.html:2167: +* At _site/pt/licoes/manipulacao-transformacao-dados-r.html:234: 'a' tag is missing a reference -* At _site/translation-concordance.html:2170: +* At _site/pt/licoes/manipular-strings-python.html:121: 'a' tag is missing a reference -* At _site/translation-concordance.html:2173: +* At _site/pt/licoes/manipular-strings-python.html:140: 'a' tag is missing a reference -* At _site/translation-concordance.html:2205: +* At _site/pt/licoes/manipular-strings-python.html:177: 'a' tag is missing a reference -* At _site/translation-concordance.html:2208: +* At _site/pt/licoes/manipular-strings-python.html:236: 'a' tag is missing a reference -* At _site/translation-concordance.html:2211: +* At _site/pt/licoes/nocoes-basicas-R-dados-tabulares.html:119: 'a' tag is missing a reference -* At _site/translation-concordance.html:2227: +* At _site/pt/licoes/nocoes-basicas-R-dados-tabulares.html:138: 'a' tag is missing a reference -* At _site/translation-concordance.html:2262: +* At _site/pt/licoes/nocoes-basicas-R-dados-tabulares.html:175: 'a' tag is missing a reference -* At _site/translation-concordance.html:2265: +* At _site/pt/licoes/nocoes-basicas-R-dados-tabulares.html:234: 'a' tag is missing a reference -* At _site/translation-concordance.html:2268: +* At _site/pt/licoes/nocoes-basicas-paginas-web-html.html:121: 'a' tag is missing a reference -* At _site/translation-concordance.html:2281: +* At _site/pt/licoes/nocoes-basicas-paginas-web-html.html:140: 'a' tag is missing a reference -* At _site/translation-concordance.html:2284: +* At _site/pt/licoes/nocoes-basicas-paginas-web-html.html:177: 'a' tag is missing a reference -* At _site/translation-concordance.html:2287: +* At _site/pt/licoes/nocoes-basicas-paginas-web-html.html:236: 'a' tag is missing a reference -* At _site/translation-concordance.html:2319: +* At _site/pt/licoes/normalizacao-dados-textuais-python.html:121: 'a' tag is missing a reference -* At _site/translation-concordance.html:2322: +* At _site/pt/licoes/normalizacao-dados-textuais-python.html:140: 'a' tag is missing a reference -* At _site/translation-concordance.html:2325: +* At _site/pt/licoes/normalizacao-dados-textuais-python.html:177: 'a' tag is missing a reference -* At _site/translation-concordance.html:2341: +* At _site/pt/licoes/normalizacao-dados-textuais-python.html:236: 'a' tag is missing a reference -* At _site/translation-concordance.html:2360: +* At _site/pt/licoes/palavras-chave-contexto-usando-n-grams-python.html:121: 'a' tag is missing a reference -* At _site/translation-concordance.html:2363: +* At _site/pt/licoes/palavras-chave-contexto-usando-n-grams-python.html:140: 'a' tag is missing a reference -* At _site/translation-concordance.html:2373: +* At _site/pt/licoes/palavras-chave-contexto-usando-n-grams-python.html:177: 'a' tag is missing a reference -* At _site/translation-concordance.html:2376: +* At _site/pt/licoes/palavras-chave-contexto-usando-n-grams-python.html:236: 'a' tag is missing a reference -* At _site/translation-concordance.html:2382: +* At _site/pt/licoes/preservar-os-seus-dados-de-investigacao.html:119: 'a' tag is missing a reference -* At _site/translation-concordance.html:2411: +* At _site/pt/licoes/preservar-os-seus-dados-de-investigacao.html:138: 'a' tag is missing a reference -* At _site/translation-concordance.html:2417: +* At _site/pt/licoes/preservar-os-seus-dados-de-investigacao.html:175: 'a' tag is missing a reference -* At _site/translation-concordance.html:2420: +* At _site/pt/licoes/preservar-os-seus-dados-de-investigacao.html:234: 'a' tag is missing a reference -* At _site/translation-concordance.html:2433: +* At _site/pt/licoes/processamento-basico-texto-r.html:121: 'a' tag is missing a reference -* At _site/translation-concordance.html:2436: +* At _site/pt/licoes/processamento-basico-texto-r.html:140: 'a' tag is missing a reference -* At _site/translation-concordance.html:2474: +* At _site/pt/licoes/processamento-basico-texto-r.html:177: 'a' tag is missing a reference -* At _site/translation-concordance.html:2487: +* At _site/pt/licoes/processamento-basico-texto-r.html:236: 'a' tag is missing a reference -* At _site/translation-concordance.html:2493: +* At _site/pt/licoes/qgis-camadas.html:123: 'a' tag is missing a reference -* At _site/translation-concordance.html:2496: +* At _site/pt/licoes/qgis-camadas.html:142: 'a' tag is missing a reference -* At _site/translation-concordance.html:2512: +* At _site/pt/licoes/qgis-camadas.html:179: 'a' tag is missing a reference -* At _site/translation-concordance.html:2515: +* At _site/pt/licoes/qgis-camadas.html:238: 'a' tag is missing a reference -* At _site/translation-concordance.html:2529: +* At _site/pt/licoes/reutilizacao-codigo-modularidade-python.html:121: 'a' tag is missing a reference -* At _site/translation-concordance.html:2532: +* At _site/pt/licoes/reutilizacao-codigo-modularidade-python.html:140: 'a' tag is missing a reference -* At _site/translation-concordance.html:2551: +* At _site/pt/licoes/reutilizacao-codigo-modularidade-python.html:177: 'a' tag is missing a reference -* At _site/translation-concordance.html:2567: +* At _site/pt/licoes/reutilizacao-codigo-modularidade-python.html:236: 'a' tag is missing a reference -* At _site/translation-concordance.html:2570: +* At _site/pt/licoes/saida-dados-ficheiro-html-python.html:121: 'a' tag is missing a reference -* At _site/translation-concordance.html:2573: +* At _site/pt/licoes/saida-dados-ficheiro-html-python.html:140: 'a' tag is missing a reference -* At _site/translation-concordance.html:2586: +* At _site/pt/licoes/saida-dados-ficheiro-html-python.html:177: 'a' tag is missing a reference -* At _site/translation-concordance.html:2589: +* At _site/pt/licoes/saida-dados-ficheiro-html-python.html:236: 'a' tag is missing a reference -* At _site/translation-concordance.html:2605: +* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:119: 'a' tag is missing a reference -* At _site/translation-concordance.html:2608: +* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:138: 'a' tag is missing a reference -* At _site/translation-concordance.html:2611: +* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:175: 'a' tag is missing a reference -* At _site/translation-concordance.html:2621: +* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:234: 'a' tag is missing a reference -* At _site/translation-concordance.html:2624: +* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:1118: 'a' tag is missing a reference -* At _site/translation-concordance.html:2627: +* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:1119: 'a' tag is missing a reference -* At _site/translation-concordance.html:2667: +* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:1120: 'a' tag is missing a reference -* At _site/translation-concordance.html:2670: +* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:1121: 'a' tag is missing a reference -* At _site/translation-concordance.html:2683: +* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:1122: 'a' tag is missing a reference -* At _site/translation-concordance.html:2686: +* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:1123: 'a' tag is missing a reference -* At _site/translation-concordance.html:2689: +* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:1124: 'a' tag is missing a reference -* At _site/translation-concordance.html:2702: +* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:1128: 'a' tag is missing a reference -* At _site/translation-concordance.html:2705: +* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:1130: 'a' tag is missing a reference -* At _site/translation-concordance.html:2708: +* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:1132: 'a' tag is missing a reference -* At _site/translation-concordance.html:2724: +* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:1134: 'a' tag is missing a reference -* At _site/translation-concordance.html:2727: +* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:1136: 'a' tag is missing a reference -* At _site/translation-concordance.html:2740: +* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:1138: 'a' tag is missing a reference -* At _site/translation-concordance.html:2743: +* At _site/pt/licoes/sumarizacao-narrativas-web-python.html:119: 'a' tag is missing a reference -* At _site/translation-concordance.html:2746: +* At _site/pt/licoes/sumarizacao-narrativas-web-python.html:138: 'a' tag is missing a reference -* At _site/translation-concordance.html:2756: +* At _site/pt/licoes/sumarizacao-narrativas-web-python.html:175: 'a' tag is missing a reference -* At _site/translation-concordance.html:2759: +* At _site/pt/licoes/sumarizacao-narrativas-web-python.html:234: 'a' tag is missing a reference -* At _site/translation-concordance.html:2781: +* At _site/pt/licoes/trabalhando-ficheiros-texto-python.html:121: 'a' tag is missing a reference -* At _site/translation-concordance.html:2784: +* At _site/pt/licoes/trabalhando-ficheiros-texto-python.html:140: 'a' tag is missing a reference -* At _site/translation-concordance.html:2797: +* At _site/pt/licoes/trabalhando-ficheiros-texto-python.html:177: 'a' tag is missing a reference -* At _site/translation-concordance.html:2800: +* At _site/pt/licoes/trabalhando-ficheiros-texto-python.html:236: 'a' tag is missing a reference -* At _site/translation-concordance.html:2803: +* At _site/pt/licoes/transcricao-automatica-grafias-nao-latinas.html:135: 'a' tag is missing a reference -* At _site/translation-concordance.html:2816: +* At _site/pt/licoes/transcricao-automatica-grafias-nao-latinas.html:154: 'a' tag is missing a reference -* At _site/translation-concordance.html:2819: +* At _site/pt/licoes/transcricao-automatica-grafias-nao-latinas.html:191: 'a' tag is missing a reference -* At _site/translation-concordance.html:2822: +* At _site/pt/licoes/transcricao-automatica-grafias-nao-latinas.html:250: 'a' tag is missing a reference -* At _site/translation-concordance.html:2835: +* At _site/pt/licoes/visualizacao-animacao-tabelas-historicas-R.html:119: 'a' tag is missing a reference -* At _site/translation-concordance.html:2841: +* At _site/pt/licoes/visualizacao-animacao-tabelas-historicas-R.html:138: 'a' tag is missing a reference -* At _site/translation-concordance.html:2873: +* At _site/pt/licoes/visualizacao-animacao-tabelas-historicas-R.html:175: 'a' tag is missing a reference -* At _site/translation-concordance.html:2876: +* At _site/pt/licoes/visualizacao-animacao-tabelas-historicas-R.html:234: 'a' tag is missing a reference -* At _site/translation-concordance.html:2879: +* At _site/pt/licoes/visualizacao-basica-dados-tabulares-r.html:117: 'a' tag is missing a reference -* At _site/translation-concordance.html:2893: +* At _site/pt/licoes/visualizacao-basica-dados-tabulares-r.html:136: 'a' tag is missing a reference -* At _site/translation-concordance.html:2896: +* At _site/pt/licoes/visualizacao-basica-dados-tabulares-r.html:173: 'a' tag is missing a reference -* At _site/translation-concordance.html:2899: +* At _site/pt/licoes/visualizacao-basica-dados-tabulares-r.html:232: 'a' tag is missing a reference -* At _site/translation-concordance.html:2909: +* At _site/pt/pesquisa.html:88: 'a' tag is missing a reference -* At _site/translation-concordance.html:2915: +* At _site/pt/pesquisa.html:107: 'a' tag is missing a reference -* At _site/translation-concordance.html:2918: +* At _site/pt/pesquisa.html:144: 'a' tag is missing a reference -* At _site/translation-concordance.html:2931: +* At _site/pt/pesquisa.html:203: 'a' tag is missing a reference -* At _site/translation-concordance.html:2934: +* At _site/pt/politica-de-privacidade.html:88: 'a' tag is missing a reference -* At _site/translation-concordance.html:2966: +* At _site/pt/politica-de-privacidade.html:107: 'a' tag is missing a reference -* At _site/translation-concordance.html:2969: +* At _site/pt/politica-de-privacidade.html:144: 'a' tag is missing a reference -* At _site/translation-concordance.html:2972: +* At _site/pt/politica-de-privacidade.html:203: 'a' tag is missing a reference -* At _site/translation-concordance.html:2985: +* At _site/pt/ppi.html:88: 'a' tag is missing a reference -* At _site/translation-concordance.html:2992: +* At _site/pt/ppi.html:107: 'a' tag is missing a reference -* At _site/translation-concordance.html:3008: +* At _site/pt/ppi.html:144: 'a' tag is missing a reference -* At _site/translation-concordance.html:3011: +* At _site/pt/ppi.html:203: 'a' tag is missing a reference -* At _site/translation-concordance.html:3014: +* At _site/pt/reportar-um-erro.html:88: 'a' tag is missing a reference -* At _site/translation-concordance.html:3046: +* At _site/pt/reportar-um-erro.html:107: 'a' tag is missing a reference -* At _site/translation-concordance.html:3049: +* At _site/pt/reportar-um-erro.html:144: 'a' tag is missing a reference -* At _site/translation-concordance.html:3052: +* At _site/pt/reportar-um-erro.html:203: 'a' tag is missing a reference -* At _site/translation-concordance.html:3115: +* At _site/pt/sobre.html:88: 'a' tag is missing a reference -* At _site/translation-concordance.html:3118: +* At _site/pt/sobre.html:107: 'a' tag is missing a reference -* At _site/translation-concordance.html:3121: +* At _site/pt/sobre.html:144: 'a' tag is missing a reference -* At _site/translation-concordance.html:3420: +* At _site/pt/sobre.html:203: 'a' tag is missing a reference -* At _site/translation-concordance.html:3423: +* At _site/pt/vagas.html:88: 'a' tag is missing a reference -* At _site/translation-concordance.html:3426: +* At _site/pt/vagas.html:107: 'a' tag is missing a reference -* At _site/translation-concordance.html:3458: +* At _site/pt/vagas.html:144: 'a' tag is missing a reference -* At _site/translation-concordance.html:3461: +* At _site/pt/vagas.html:203: 'a' tag is missing a reference -* At _site/translation-concordance.html:3464: +* At _site/translation-concordance.html:86: 'a' tag is missing a reference -* At _site/translation-concordance.html:3496: +* At _site/translation-concordance.html:105: 'a' tag is missing a reference -* At _site/translation-concordance.html:3499: +* At _site/translation-concordance.html:142: 'a' tag is missing a reference -* At _site/translation-concordance.html:3502: +* At _site/translation-concordance.html:168: 'a' tag is missing a reference @@ -17148,54 +8928,6 @@ For the Links check, the following failures were found: 'a' tag is missing a reference -* At _site/troubleshooting.html:303: - - http://stackoverflow.com/ is not an HTTPS link - -* At _site/troubleshooting.html:303: - - http://mail.python.org/mailman/listinfo/tutor is not an HTTPS link - -* At _site/troubleshooting.html:322: - - http://web.archive.org/web/20130101093828/http://stackoverflow.com/faq is not an HTTPS link - -* At _site/troubleshooting.html:373: - - http://wiki.python.org/moin/BeginnersGuide/NonProgrammers is not an HTTPS link - -* At _site/troubleshooting.html:375: - - http://learnpython.org/ is not an HTTPS link - -* At _site/troubleshooting.html:379: - - http://www.w3schools.com/html/default.asp is not an HTTPS link - -* At _site/troubleshooting.html:387: - - http://wiki.python.org/moin/BeginnersGuide/Programmers is not an HTTPS link - -* At _site/troubleshooting.html:389: - - http://docs.python.org/ is not an HTTPS link - -* At _site/troubleshooting.html:392: - - http://www.diveintopython.net is not an HTTPS link - -* At _site/troubleshooting.html:398: - - http://www.worldcat.org/oclc/156890981 is not an HTTPS link - -* At _site/troubleshooting.html:399: - - http://www.worldcat.org/oclc/65765375 is not an HTTPS link - -* At _site/troubleshooting.html:400: - - http://www.worldcat.org/oclc/59007845 is not an HTTPS link - For the Links > External check, the following failures were found: * At _site/404.html:303: @@ -17208,107 +8940,107 @@ For the Links > External check, the following failures were found: * At _site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html:199: - External link http://www.sheffield.ac.uk/hri/ failed: Forbidden (status code 403) + External link https://www.sheffield.ac.uk/hri/ failed: Forbidden (status code 403) * At _site/assets/mapping-with-python-leaflet/exercises/exercise00 - original/mymap.html:3: - External link http://cdn.leafletjs.com/leaflet-0.6.4/leaflet.css failed with something very wrong. + External link https://cdn.leafletjs.com/leaflet-0.6.4/leaflet.css failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/assets/mapping-with-python-leaflet/exercises/exercise00 - original/mymap.html:4: - External link http://cdn.leafletjs.com/leaflet-0.6.4/leaflet.js failed with something very wrong. + External link https://cdn.leafletjs.com/leaflet-0.6.4/leaflet.js failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/assets/mapping-with-python-leaflet/exercises/exercise01/mymap.html:3: - External link http://cdn.leafletjs.com/leaflet-0.6.4/leaflet.css failed with something very wrong. + External link https://cdn.leafletjs.com/leaflet-0.6.4/leaflet.css failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/assets/mapping-with-python-leaflet/exercises/exercise01/mymap.html:4: - External link http://cdn.leafletjs.com/leaflet-0.6.4/leaflet.js failed with something very wrong. + External link https://cdn.leafletjs.com/leaflet-0.6.4/leaflet.js failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/assets/mapping-with-python-leaflet/exercises/exercise02/mymap.html:3: - External link http://cdn.leafletjs.com/leaflet-0.6.4/leaflet.css failed with something very wrong. + External link https://cdn.leafletjs.com/leaflet-0.6.4/leaflet.css failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/assets/mapping-with-python-leaflet/exercises/exercise02/mymap.html:4: - External link http://cdn.leafletjs.com/leaflet-0.6.4/leaflet.js failed with something very wrong. + External link https://cdn.leafletjs.com/leaflet-0.6.4/leaflet.js failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/assets/mapping-with-python-leaflet/exercises/exercise03/mymap.html:3: - External link http://cdn.leafletjs.com/leaflet-0.6.4/leaflet.css failed with something very wrong. + External link https://cdn.leafletjs.com/leaflet-0.6.4/leaflet.css failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/assets/mapping-with-python-leaflet/exercises/exercise03/mymap.html:4: - External link http://cdn.leafletjs.com/leaflet-0.6.4/leaflet.js failed with something very wrong. + External link https://cdn.leafletjs.com/leaflet-0.6.4/leaflet.js failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/assets/mapping-with-python-leaflet/exercises/exercise04/mymap.html:3: - External link http://cdn.leafletjs.com/leaflet-0.6.4/leaflet.css failed with something very wrong. + External link https://cdn.leafletjs.com/leaflet-0.6.4/leaflet.css failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/assets/mapping-with-python-leaflet/exercises/exercise04/mymap.html:4: - External link http://cdn.leafletjs.com/leaflet-0.6.4/leaflet.js failed with something very wrong. + External link https://cdn.leafletjs.com/leaflet-0.6.4/leaflet.js failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/assets/mapping-with-python-leaflet/exercises/exercise05/mymap.html:3: - External link http://cdn.leafletjs.com/leaflet-0.6.4/leaflet.css failed with something very wrong. + External link https://cdn.leafletjs.com/leaflet-0.6.4/leaflet.css failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/assets/mapping-with-python-leaflet/exercises/exercise05/mymap.html:4: - External link http://cdn.leafletjs.com/leaflet-0.6.4/leaflet.js failed with something very wrong. + External link https://cdn.leafletjs.com/leaflet-0.6.4/leaflet.js failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/assets/mapping-with-python-leaflet/map/mymap-onepage.html:3: - External link http://cdn.leafletjs.com/leaflet-0.6.4/leaflet.css failed with something very wrong. + External link https://cdn.leafletjs.com/leaflet-0.6.4/leaflet.css failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/assets/mapping-with-python-leaflet/map/mymap-onepage.html:4: - External link http://cdn.leafletjs.com/leaflet-0.6.4/leaflet.js failed with something very wrong. + External link https://cdn.leafletjs.com/leaflet-0.6.4/leaflet.js failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/assets/mapping-with-python-leaflet/map/mymap.html:3: - External link http://cdn.leafletjs.com/leaflet-0.6.4/leaflet.css failed with something very wrong. + External link https://cdn.leafletjs.com/leaflet-0.6.4/leaflet.css failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/assets/mapping-with-python-leaflet/map/mymap.html:4: - External link http://cdn.leafletjs.com/leaflet-0.6.4/leaflet.js failed with something very wrong. + External link https://cdn.leafletjs.com/leaflet-0.6.4/leaflet.js failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html:199: - External link http://www.sheffield.ac.uk/hri/ failed: Forbidden (status code 403) + External link https://www.sheffield.ac.uk/hri/ failed: Forbidden (status code 403) * At _site/blog/index.html:506: @@ -17450,12 +9182,30 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/blog/page9/index.html failed (status code 429) +* At _site/en/about.html:264: + + External link https://dhawards.org/dhawards2022/results/ failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + +* At _site/en/about.html:264: + + External link https://dhawards.org/dhawards2016/results/ failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + * At _site/en/about.html:264: External link https://openpublishingawards.org/results/2021/index.html failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) +* At _site/en/about.html:264: + + External link https://dhawards.org/dhawards2017/results/ failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + * At _site/en/about.html:322: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -17476,6 +9226,10 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/author-guidelines.md failed (status code 429) +* At _site/en/contribute.html:298: + + External link https://www.worldcat.org/title/programming-historian-en-espanol/oclc/1061292935&referer=brief_results failed (status code 403) + * At _site/en/contribute.html:353: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -17544,10 +9298,6 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/individual.md failed (status code 429) -* At _site/en/ipp.html:269: - - External link https://www.oecd.org/en/topics/sub-issues/oda-eligibility-and-conditions/dac-list-of-oda-recipients.html failed (status code 403) - * At _site/en/ipp.html:411: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -17572,12 +9322,22 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lesson-retirement-policy.md failed (status code 429) +* At _site/en/lessons/analyzing-documents-with-tfidf.html:1478: + + External link https://jonathanstray.com/a-full-text-visualization-of-the-iraq-war-logs failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + * At _site/en/lessons/analyzing-documents-with-tfidf.html:1478: External link https://www.overviewdocs.com failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) +* At _site/en/lessons/analyzing-documents-with-tfidf.html:1498: + + External link https://doi.org/10.18653/v1/W18-2502 failed (status code 409) + * At _site/en/lessons/analyzing-documents-with-tfidf.html:1528: External link https://www.overviewdocs.com failed with something very wrong. @@ -17588,13 +9348,21 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://www.nytimes.com/2019/01/02/obituaries/karen-sparck-jones-overlooked.html failed (status code 403) +* At _site/en/lessons/analyzing-documents-with-tfidf.html:1651: + + External link https://jonathanstray.com/a-full-text-visualization-of-the-iraq-war-logs failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + * At _site/en/lessons/analyzing-documents-with-tfidf.html:1663: External link https://datascience.stackexchange.com/questions/21950/why-we-should-not-feed-lda-with-tfidf failed (status code 403) * At _site/en/lessons/analyzing-documents-with-tfidf.html:1666: - External link http://journalofdigitalhumanities.org/2-1/words-alone-by-benjamin-m-schmidt/ failed: Moved Permanently (status code 301) + External link https://journalofdigitalhumanities.org/2-1/words-alone-by-benjamin-m-schmidt/ failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/analyzing-documents-with-tfidf.html:2207: @@ -17618,57 +9386,57 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/applied-archival-downloading-with-wget.html:471: - External link http://www.activehistory.ca failed with something very wrong. + External link https://www.activehistory.ca failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/applied-archival-downloading-with-wget.html:631: - External link http://nla.gov.au/nla.ms-ms5393-1-s1-v.jpg failed: Server Error (status code 500) + External link https://nla.gov.au/nla.ms-ms5393-1-s1-v.jpg failed (status code 500) * At _site/en/lessons/applied-archival-downloading-with-wget.html:633: - External link http://nla.gov.au/nla.ms-ms5393-1-s127-v.jpg failed: Server Error (status code 500) + External link https://nla.gov.au/nla.ms-ms5393-1-s127-v.jpg failed (status code 500) * At _site/en/lessons/applied-archival-downloading-with-wget.html:680: - External link http://memory.loc.gov/cgi-bin/ampage?collId=mtj1&fileName=mtj1page001.db&recNum=1&itemLink=/ammem/collections/jefferson_papers/mtjser1.html&linkText=6 failed: got a time out (response code 0) (status code 0) + External link https://memory.loc.gov/cgi-bin/ampage?collId=mtj1&fileName=mtj1page001.db&recNum=1&itemLink=/ammem/collections/jefferson_papers/mtjser1.html&linkText=6 failed (status code 404) * At _site/en/lessons/applied-archival-downloading-with-wget.html:689: - External link http://memory.loc.gov/master/mss/mtj/mtj1/001/0000/ failed: got a time out (response code 0) (status code 0) + External link https://memory.loc.gov/master/mss/mtj/mtj1/001/0000/ failed (status code 403) * At _site/en/lessons/applied-archival-downloading-with-wget.html:701: - External link http://memory.loc.gov/master/mss/mtj/mtj1/001/0000/ failed: got a time out (response code 0) (status code 0) + External link https://memory.loc.gov/master/mss/mtj/mtj1/001/0000/ failed (status code 403) * At _site/en/lessons/applied-archival-downloading-with-wget.html:703: - External link http://memory.loc.gov/master/mss/mtj/mtj1/001/0100/ failed: got a time out (response code 0) (status code 0) + External link https://memory.loc.gov/master/mss/mtj/mtj1/001/0100/ failed (status code 403) * At _site/en/lessons/applied-archival-downloading-with-wget.html:705: - External link http://memory.loc.gov/master/mss/mtj/mtj1/001/0200/ failed: got a time out (response code 0) (status code 0) + External link https://memory.loc.gov/master/mss/mtj/mtj1/001/0200/ failed (status code 403) * At _site/en/lessons/applied-archival-downloading-with-wget.html:709: - External link http://memory.loc.gov/master/mss/mtj/mtj1/001/1400 failed: got a time out (response code 0) (status code 0) + External link https://memory.loc.gov/master/mss/mtj/mtj1/001/1400 failed (status code 403) * At _site/en/lessons/applied-archival-downloading-with-wget.html:735: - External link http://cushing.med.yale.edu/gsdl/collect/mdposter/ failed with something very wrong. + External link https://cushing.med.yale.edu/gsdl/collect/mdposter/ failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/applied-archival-downloading-with-wget.html:748: - External link http://cushing.med.yale.edu/images/mdposter/full/poster0001.jpg failed with something very wrong. + External link https://cushing.med.yale.edu/images/mdposter/full/poster0001.jpg failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/applied-archival-downloading-with-wget.html:753: - External link http://cushing.med.yale.edu/images/mdposter/full/poster0637.jpg failed with something very wrong. + External link https://cushing.med.yale.edu/images/mdposter/full/poster0637.jpg failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) @@ -17710,25 +9478,23 @@ Sometimes, making too many requests at once also breaks things. (status code 0) It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) -* At _site/en/lessons/beginners-guide-to-twitter-data.html:846: - - External link https://digitalfellows.commons.gc.cuny.edu/2019/06/03/finding-the-right-tools-for-mapping/ failed: got a time out (response code 0) (status code 0) - * At _site/en/lessons/beginners-guide-to-twitter-data.html:864: - External link https://tweetsets.library.gwu.edu failed with something very wrong. + External link https://tweetsets.library.gwu.edu/full-dataset/ failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/beginners-guide-to-twitter-data.html:864: - External link https://tweetsets.library.gwu.edu/full-dataset/ failed with something very wrong. + External link https://tweetsets.library.gwu.edu failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/beginners-guide-to-twitter-data.html:868: - External link http://journalofdigitalhumanities.org/1-1/demystifying-networks-by-scott-weingart/ failed: Moved Permanently (status code 301) + External link https://journalofdigitalhumanities.org/1-1/demystifying-networks-by-scott-weingart/ failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/beginners-guide-to-twitter-data.html:2577: @@ -17738,9 +9504,15 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/beginners-guide-to-twitter-data.md failed (status code 429) +* At _site/en/lessons/building-static-sites-with-jekyll-github-pages.html:1474: + + External link https://jekyllthemes.org/ failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + * At _site/en/lessons/building-static-sites-with-jekyll-github-pages.html:1545: - External link http://jekyll-windows.juthilo.com/ failed with something very wrong. + External link https://jekyll-windows.juthilo.com/ failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) @@ -17752,6 +9524,10 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/building-static-sites-with-jekyll-github-pages.md failed (status code 429) +* At _site/en/lessons/calibrating-radiocarbon-dates-r.html:785: + + External link https://calib.org failed: got a time out (response code 0) (status code 0) + * At _site/en/lessons/calibrating-radiocarbon-dates-r.html:1209: External link https://doi.org/10.1126/science.105.2735.576 failed (status code 403) @@ -17780,6 +9556,12 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://powerhouse.com.au/ failed (status code 429) +* At _site/en/lessons/cleaning-data-with-openrefine.html:579: + + External link https://vis.stanford.edu/papers/wrangler/ failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + * At _site/en/lessons/cleaning-data-with-openrefine.html:598: External link https://powerhouse.com.au/ failed (status code 429) @@ -17840,11 +9622,11 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/collaborative-blog-with-jekyll-github.html:1508: - External link https://github.com/scholarslab/scholarslab.org/blob/master/docs/authoring-and-editing.md#markdown--formatting failed (status code 429) + External link https://github.com/scholarslab/scholarslab.org/blob/master/docs/authoring-and-editing.md#markdown--formatting failed: https://github.com/scholarslab/scholarslab.org/blob/master/docs/authoring-and-editing.md exists, but the hash 'markdown--formatting' does not (status code 200) * At _site/en/lessons/collaborative-blog-with-jekyll-github.html:1528: - External link http://jekyll-windows.juthilo.com/ failed with something very wrong. + External link https://jekyll-windows.juthilo.com/ failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) @@ -17928,10 +9710,6 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/ph-submissions/issues/546 failed (status code 429) -* At _site/en/lessons/corpus-analysis-with-spacy.html:1809: - - External link https://github.com/explosion/spaCy/blob/master/spacy/glossary.py failed (status code 429) - * At _site/en/lessons/corpus-analysis-with-spacy.html:2014: External link https://doi.org/10.3366/cor.2012.0015 failed (status code 403) @@ -17960,9 +9738,15 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/ph-submissions/issues/78 failed (status code 429) +* At _site/en/lessons/correspondence-analysis-in-R.html:665: + + External link https://factominer.free.fr/ failed: got a time out (response code 0) (status code 0) + * At _site/en/lessons/correspondence-analysis-in-R.html:1125: - External link http://www.cbc.ca/news/indigenous/mmiw-inquiry-not-reaching-out-to-families-says-advocates-1.4053694 failed: got a time out (response code 0) (status code 0) + External link https://www.cbc.ca/news/indigenous/mmiw-inquiry-not-reaching-out-to-families-says-advocates-1.4053694 failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/correspondence-analysis-in-R.html:1666: @@ -18000,62 +9784,34 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/ph-submissions/issues/106 failed (status code 429) -* At _site/en/lessons/creating-apis-with-python-and-flask.html:725: - - External link http://127.0.0.1:5000/ failed: Forbidden (status code 403) - -* At _site/en/lessons/creating-apis-with-python-and-flask.html:749: - - External link http://127.0.0.1:5000/ failed: Forbidden (status code 403) - -* At _site/en/lessons/creating-apis-with-python-and-flask.html:844: - - External link http://127.0.0.1:5000/api/v1/resources/books/all failed: Forbidden (status code 403) - -* At _site/en/lessons/creating-apis-with-python-and-flask.html:921: - - External link http://127.0.0.1:5000/api/v1/resources/books?id=0 failed: Forbidden (status code 403) - -* At _site/en/lessons/creating-apis-with-python-and-flask.html:930: +* At _site/en/lessons/creating-apis-with-python-and-flask.html:667: - External link http://127.0.0.1:5000/api/v1/resources/books failed: Forbidden (status code 403) + External link https://flask.pocoo.org/ failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) -* At _site/en/lessons/creating-apis-with-python-and-flask.html:1004: +* At _site/en/lessons/creating-apis-with-python-and-flask.html:1006: External link https://pro.europeana.eu/resources/apis failed (status code 403) -* At _site/en/lessons/creating-apis-with-python-and-flask.html:1092: - - External link http://127.0.0.1:5000/api/v1/resources/books/all failed: Forbidden (status code 403) - -* At _site/en/lessons/creating-apis-with-python-and-flask.html:1093: - - External link http://127.0.0.1:5000/api/v1/resources/books?author=Connie+Willis failed: Forbidden (status code 403) +* At _site/en/lessons/creating-apis-with-python-and-flask.html:1239: -* At _site/en/lessons/creating-apis-with-python-and-flask.html:1094: - - External link http://127.0.0.1:5000/api/v1/resources/books?author=Connie+Willis&published=1993 failed: Forbidden (status code 403) - -* At _site/en/lessons/creating-apis-with-python-and-flask.html:1095: - - External link http://127.0.0.1:5000/api/v1/resources/books?published=2010 failed: Forbidden (status code 403) + External link https://hds.essex.ac.uk/ failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) -* At _site/en/lessons/creating-apis-with-python-and-flask.html:1236: +* At _site/en/lessons/creating-apis-with-python-and-flask.html:1241: External link https://pro.europeana.eu/ failed (status code 403) -* At _site/en/lessons/creating-apis-with-python-and-flask.html:1781: +* At _site/en/lessons/creating-apis-with-python-and-flask.html:1786: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) -* At _site/en/lessons/creating-apis-with-python-and-flask.html:1791: +* At _site/en/lessons/creating-apis-with-python-and-flask.html:1796: External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/creating-apis-with-python-and-flask.md failed (status code 429) -* At _site/en/lessons/creating-guis-in-python-for-digital-humanities-projects.html:333: - - External link https://github.com/programminghistorian/ph-submissions/issues/479 failed (status code 429) - * At _site/en/lessons/creating-guis-in-python-for-digital-humanities-projects.html:1521: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -18090,15 +9846,13 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/creating-network-diagrams-from-historical-sources.html:518: - External link http://martenduering.com/research/covert-networks-during-the-holocaust/ failed: Moved Permanently (status code 301) - -* At _site/en/lessons/creating-network-diagrams-from-historical-sources.html:948: - - External link http://www.cambridge.org/us/academic/subjects/sociology/research-methods-sociology-and-criminology/exploratory-social-network-analysis-pajek-2nd-edition failed (status code 403) + External link https://martenduering.com/research/covert-networks-during-the-holocaust/ failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/creating-network-diagrams-from-historical-sources.html:948: - External link http://pajek.imfm.si/doku.php failed: got a time out (response code 0) (status code 0) + External link https://pajek.imfm.si/doku.php failed: got a time out (response code 0) (status code 0) * At _site/en/lessons/creating-network-diagrams-from-historical-sources.html:1485: @@ -18112,6 +9866,10 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/ph-submissions/issues/301 failed (status code 429) +* At _site/en/lessons/crowdsourced-data-normalization-with-pandas.html:507: + + External link https://transcribe-bentham.ucl.ac.uk/td/Transcribe_Bentham failed: got a time out (response code 0) (status code 0) + * At _site/en/lessons/crowdsourced-data-normalization-with-pandas.html:509: External link https://www.netflixprize.com/ failed with something very wrong. @@ -18120,7 +9878,11 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/crowdsourced-data-normalization-with-pandas.html:883: - External link http://pandas-docs.github.io/pandas-docs-travis/user_guide/timeseries.html#timestamp-limitations failed: Not Found (status code 404) + External link https://pandas-docs.github.io/pandas-docs-travis/user_guide/timeseries.html#timestamp-limitations failed (status code 404) + +* At _site/en/lessons/crowdsourced-data-normalization-with-pandas.html:990: + + External link https://curatingmenus.org/articles/against-cleaning/ failed: got a time out (response code 0) (status code 0) * At _site/en/lessons/crowdsourced-data-normalization-with-pandas.html:1530: @@ -18132,28 +9894,24 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/data-mining-the-internet-archive.html:569: - External link http://archive.org/stream/lettertowilliaml00doug/39999066767938#page/n0/mode/2up failed: http://archive.org/stream/lettertowilliaml00doug/39999066767938 exists, but the hash 'page/n0/mode/2up' does not (status code 200) + External link https://archive.org/stream/lettertowilliaml00doug/39999066767938#page/n0/mode/2up failed: https://archive.org/stream/lettertowilliaml00doug/39999066767938 exists, but the hash 'page/n0/mode/2up' does not (status code 200) * At _site/en/lessons/data-mining-the-internet-archive.html:622: - External link http://internetarchive.readthedocs.io/en/latest/quickstart.html#searching failed (status code 404) + External link https://internetarchive.readthedocs.io/en/latest/quickstart.html#searching failed (status code 404) * At _site/en/lessons/data-mining-the-internet-archive.html:653: - External link http://internetarchive.readthedocs.io/en/latest/quickstart.html#downloading failed (status code 404) + External link https://internetarchive.readthedocs.io/en/latest/quickstart.html#downloading failed (status code 404) * At _site/en/lessons/data-mining-the-internet-archive.html:794: - External link http://internetarchive.readthedocs.io/en/latest/quickstart.html#downloading failed (status code 404) + External link https://internetarchive.readthedocs.io/en/latest/quickstart.html#downloading failed (status code 404) * At _site/en/lessons/data-mining-the-internet-archive.html:802: External link https://archive.org/about/faqs.php#140 failed: https://archive.org/about/faqs.php exists, but the hash '140' does not (status code 200) -* At _site/en/lessons/data-mining-the-internet-archive.html:973: - - External link https://github.com/edsu/pymarc/blob/master/pymarc/marcxml.py failed (status code 429) - * At _site/en/lessons/data-mining-the-internet-archive.html:1625: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -18180,13 +9938,13 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/dealing-with-big-data-and-network-analysis-using-neo4j.html:985: - External link http://localhost:7474/browser/ failed with something very wrong. + External link https://localhost:7474/browser/ failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/dealing-with-big-data-and-network-analysis-using-neo4j.html:1048: - External link http://localhost:7474 failed with something very wrong. + External link https://localhost:7474 failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) @@ -18230,29 +9988,25 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://www.java.com/fr/download/ failed (status code 403) -* At _site/en/lessons/detecting-text-reuse-with-passim.html:1226: - - External link https://github.com/impresso/PH-Passim-tutorial/blob/master/eebo/code/main.py failed (status code 429) - -* At _site/en/lessons/detecting-text-reuse-with-passim.html:1269: - - External link https://github.com/impresso/impresso-pycommons/blob/master/impresso_commons/text/rebuilder.py failed (status code 429) - -* At _site/en/lessons/detecting-text-reuse-with-passim.html:1411: +* At _site/en/lessons/detecting-text-reuse-with-passim.html:1487: - External link https://github.com/impresso/PH-passim-tutorial/blob/master/explore-passim-output.ipynb failed (status code 429) + External link https://dx.doi.org/10.1093/alh/ajv029 failed (status code 403) * At _site/en/lessons/detecting-text-reuse-with-passim.html:1488: - External link http://dx.doi.org/10.1093/alh/ajv028 failed (status code 403) + External link https://dx.doi.org/10.1093/alh/ajv028 failed (status code 403) * At _site/en/lessons/detecting-text-reuse-with-passim.html:1489: - External link http://dx.doi.org/10.1080/1461670x.2020.1761865 failed (status code 403) + External link https://dx.doi.org/10.1080/1461670x.2020.1761865 failed (status code 403) * At _site/en/lessons/detecting-text-reuse-with-passim.html:1495: - External link http://dx.doi.org/10.1145/2682571.2797068 failed (status code 403) + External link https://dx.doi.org/10.1145/2682571.2797068 failed (status code 403) + +* At _site/en/lessons/detecting-text-reuse-with-passim.html:1496: + + External link https://doi.org/10.18653/v1/D17-1290 failed (status code 409) * At _site/en/lessons/detecting-text-reuse-with-passim.html:2470: @@ -18276,7 +10030,7 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/downloading-multiple-records-using-query-strings.html:1660: - External link http://stackoverflow.com/questions/273192/python-best-way-to-create-directory-if-it-doesnt-exist-for-file-write failed (status code 403) + External link https://stackoverflow.com/questions/273192/python-best-way-to-create-directory-if-it-doesnt-exist-for-file-write failed (status code 403) * At _site/en/lessons/downloading-multiple-records-using-query-strings.html:2197: @@ -18290,6 +10044,10 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/ph-submissions/issues/15 failed (status code 429) +* At _site/en/lessons/editing-audio-with-audacity.html:525: + + External link https://web.archive.org/web/20161119231053/https://www.indiana.edu:80/~emusic/acoustics/amplitude.htm failed (status code 404) + * At _site/en/lessons/editing-audio-with-audacity.html:1358: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -18302,6 +10060,18 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/ph-submissions/issues/92 failed (status code 429) +* At _site/en/lessons/exploring-and-analyzing-network-data-with-python.html:580: + + External link https://www.sixdegreesoffrancisbacon.com failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + +* At _site/en/lessons/exploring-and-analyzing-network-data-with-python.html:852: + + External link https://sixdegreesoffrancisbacon.com/ failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + * At _site/en/lessons/exploring-and-analyzing-network-data-with-python.html:2951: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -18334,15 +10104,15 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/extracting-keywords.html:603: - External link http://stackoverflow.com/questions/3056740/gedit-adds-line-at-end-of-file failed (status code 403) + External link https://stackoverflow.com/questions/3056740/gedit-adds-line-at-end-of-file failed (status code 403) * At _site/en/lessons/extracting-keywords.html:660: - External link http://stackoverflow.com/questions/11497376/new-line-python failed (status code 403) + External link https://stackoverflow.com/questions/11497376/new-line-python failed (status code 403) * At _site/en/lessons/extracting-keywords.html:985: - External link http://stackoverflow.com/questions/17315635/csv-new-line-character-seen-in-unquoted-field-error failed (status code 403) + External link https://stackoverflow.com/questions/17315635/csv-new-line-character-seen-in-unquoted-field-error failed (status code 403) * At _site/en/lessons/extracting-keywords.html:1643: @@ -18382,7 +10152,7 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/fetch-and-parse-data-with-openrefine.html:570: - External link http://www.gutenberg.org/wiki/Gutenberg:Feeds failed (status code 404) + External link https://www.gutenberg.org/wiki/Gutenberg:Feeds failed (status code 404) * At _site/en/lessons/fetch-and-parse-data-with-openrefine.html:1086: @@ -18458,25 +10228,35 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/geoparsing-text-with-edinburgh.html:982: - External link http://palimpsest.blogs.edina.ac.uk/ failed with something very wrong. + External link https://palimpsest.blogs.edina.ac.uk/ failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/geoparsing-text-with-edinburgh.html:982: - External link http://litlong.org/ failed with something very wrong. + External link https://litlong.org/ failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/geoparsing-text-with-edinburgh.html:985: - External link http://tradingconsequences.blogs.edina.ac.uk/ failed with something very wrong. + External link https://tradingconsequences.blogs.edina.ac.uk/ failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + +* At _site/en/lessons/geoparsing-text-with-edinburgh.html:993: + + External link https://www.lrec-conf.org/proceedings/lrec2016/pdf/129_Paper.pdf failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/geoparsing-text-with-edinburgh.html:995: - External link http://www.euppublishing.com/doi/pdfplus/10.3366/ijhac.2015.0136 failed (status code 403) + External link https://www.euppublishing.com/doi/pdfplus/10.3366/ijhac.2015.0136 failed (status code 403) + +* At _site/en/lessons/geoparsing-text-with-edinburgh.html:1001: + + External link https://doi.org/10.3115/v1/W14-0617 failed (status code 409) * At _site/en/lessons/geoparsing-text-with-edinburgh.html:1005: @@ -18492,9 +10272,7 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/georeferencing-qgis.html:605: - External link http://www.gov.pe.ca/gis/license_agreement.php3?name=lot_town&file_format=SHP failed with something very wrong. -It's possible libcurl couldn't connect to the server, or perhaps the request timed out. -Sometimes, making too many requests at once also breaks things. (status code 0) + External link https://www.gov.pe.ca/gis/license_agreement.php3?name=lot_town&file_format=SHP failed: Found (status code 302) * At _site/en/lessons/georeferencing-qgis.html:723: @@ -18518,11 +10296,11 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/geospatial-data-analysis.html:495: - External link http://www.ats.ucla.edu/stat/r/default.htm failed: got a time out (response code 0) (status code 0) + External link https://www.ats.ucla.edu/stat/r/default.htm failed: got a time out (response code 0) (status code 0) * At _site/en/lessons/geospatial-data-analysis.html:775: - External link http://www.sciencedirect.com/science/article/pii/S0031405608000073 failed: Forbidden (status code 403) + External link https://www.sciencedirect.com/science/article/pii/S0031405608000073 failed (status code 403) * At _site/en/lessons/geospatial-data-analysis.html:1316: @@ -18558,11 +10336,7 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/getting-started-with-mysql-using-r.html:1686: - External link http://www.jeffblackadar.ca/graham_fellowship/corpus_entities_equity/ failed (status code 301) - -* At _site/en/lessons/getting-started-with-mysql-using-r.html:1706: - - External link https://github.com/jeffblackadar/getting-started-with-mysql/blob/master/newspaper-search-and-store.R failed (status code 429) + External link https://www.jeffblackadar.ca/graham_fellowship/corpus_entities_equity/ failed (status code 301) * At _site/en/lessons/getting-started-with-mysql-using-r.html:2254: @@ -18644,14 +10418,6 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/ph-submissions/issues/6 failed (status code 429) -* At _site/en/lessons/installing-omeka.html:484: - - External link http://www.hostgator.com/ failed: Forbidden (status code 403) - -* At _site/en/lessons/installing-omeka.html:500: - - External link http://support.hostgator.com/articles/hosting-guide/lets-get-started/how-do-i-get-and-use-ssh-access failed (status code 403) - * At _site/en/lessons/installing-omeka.html:1269: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -18662,7 +10428,7 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/installing-python-modules-pip.html:578: - External link http://stackoverflow.com/questions/4750806/how-to-install-pip-on-windows failed (status code 403) + External link https://stackoverflow.com/questions/4750806/how-to-install-pip-on-windows failed (status code 403) * At _site/en/lessons/installing-python-modules-pip.html:1140: @@ -18720,6 +10486,18 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/ph-submissions/issues/348 failed (status code 429) +* At _site/en/lessons/interactive-text-games-using-twine.html:576: + + External link https://www.depressionquest.com/ failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + +* At _site/en/lessons/interactive-text-games-using-twine.html:1076: + + External link https://www.depressionquest.com/ failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + * At _site/en/lessons/interactive-text-games-using-twine.html:1117: External link https://doi.org/10.1177/1461444811410394 failed (status code 403) @@ -18748,6 +10526,10 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/ph-submissions/issues/418 failed (status code 429) +* At _site/en/lessons/interrogating-national-narrative-gpt.html:922: + + External link https://doi.org/10.18653/v1/2020.acl-main.463 failed (status code 409) + * At _site/en/lessons/interrogating-national-narrative-gpt.html:931: External link https://doi.org/10.1080/01419870.2017.1361544 failed (status code 403) @@ -18768,6 +10550,12 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/interrogating-national-narrative-gpt.md failed (status code 429) +* At _site/en/lessons/intro-to-bash.html:738: + + External link https://www.viemu.com/a-why-vi-vim.html failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + * At _site/en/lessons/intro-to-bash.html:2011: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -18780,13 +10568,39 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/ph-submissions/issues/33 failed (status code 429) -* At _site/en/lessons/intro-to-linked-data.html:707: +* At _site/en/lessons/intro-to-linked-data.html:531: + + External link https://linkeddatacatalog.dws.informatik.uni-mannheim.de/state/ failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + +* At _site/en/lessons/intro-to-linked-data.html:673: + + External link https://www.history.ac.uk/projects/digital/tobias failed (status code 403) + +* At _site/en/lessons/intro-to-linked-data.html:707: + + External link https://semanticweb.org/wiki/Main_Page.html failed: got a time out (response code 0) (status code 0) + +* At _site/en/lessons/intro-to-linked-data.html:862: + + External link https://www.history.ac.uk/projects/digital/tobias failed (status code 403) + +* At _site/en/lessons/intro-to-linked-data.html:1038: + + External link https://linkeddata.org/guides-and-tutorials failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + +* At _site/en/lessons/intro-to-linked-data.html:1040: - External link http://semanticweb.org/wiki/Main_Page.html failed: got a time out (response code 0) (status code 0) + External link https://linkeddatacatalog.dws.informatik.uni-mannheim.de/state/ failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) -* At _site/en/lessons/intro-to-linked-data.html:1038: +* At _site/en/lessons/intro-to-linked-data.html:1046: - External link http://linkeddata.org/guides-and-tutorials failed: Internal Server Error (status code 500) + External link https://www.history.ac.uk/projects/digital/tobias failed (status code 403) * At _site/en/lessons/intro-to-linked-data.html:1581: @@ -18814,7 +10628,7 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/intro-to-twitterbots.html:521: - External link http://www.sciencedirect.com/science/article/pii/S0747563213003129 failed: Forbidden (status code 403) + External link https://www.sciencedirect.com/science/article/pii/S0747563213003129 failed (status code 403) * At _site/en/lessons/intro-to-twitterbots.html:537: @@ -18822,7 +10636,7 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/intro-to-twitterbots.html:593: - External link http://twitter.com/tinyarchae failed (status code 400) + External link https://twitter.com/archaeoglitch failed (status code 400) * At _site/en/lessons/intro-to-twitterbots.html:593: @@ -18830,7 +10644,7 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/intro-to-twitterbots.html:593: - External link https://twitter.com/archaeoglitch failed (status code 400) + External link https://twitter.com/tinyarchae failed (status code 400) * At _site/en/lessons/intro-to-twitterbots.html:603: @@ -18838,7 +10652,7 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/intro-to-twitterbots.html:845: - External link http://unicode.org/emoji/charts/full-emoji-list.html failed with something very wrong. + External link https://unicode.org/emoji/charts/full-emoji-list.html failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) @@ -18890,10 +10704,6 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://training.ashleyblewer.com/presentations/ffmpeg.html#10 failed: https://training.ashleyblewer.com/presentations/ffmpeg.html exists, but the hash '10' does not (status code 200) -* At _site/en/lessons/introduction-to-ffmpeg.html:1124: - - External link https://github.com/privatezero/NDSR/blob/master/Demystifying_FFmpeg_Slides.pdf failed (status code 429) - * At _site/en/lessons/introduction-to-ffmpeg.html:1687: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -18928,7 +10738,7 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/introduction-to-populating-a-website-with-api-data.html:732: - External link http://www.europeana.eu/portal/record/90402/RP_P_OB_84_508.html failed: Forbidden (status code 403) + External link https://www.europeana.eu/portal/record/90402/RP_P_OB_84_508.html failed (status code 403) * At _site/en/lessons/introduction-to-populating-a-website-with-api-data.html:748: @@ -18944,20 +10754,16 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/introduction-to-populating-a-website-with-api-data.html:821: - External link http://localhost/dashboard failed with something very wrong. + External link https://localhost/dashboard failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/introduction-to-populating-a-website-with-api-data.html:859: - External link http://localhost/helloworld.php failed with something very wrong. + External link https://localhost/helloworld.php failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) -* At _site/en/lessons/introduction-to-populating-a-website-with-api-data.html:1251: - - External link http://museum-api.pbworks.com/w/page/21933420/Museum%C2%A0APIs failed: got a time out (response code 0) (status code 0) - * At _site/en/lessons/introduction-to-populating-a-website-with-api-data.html:1794: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -18992,7 +10798,7 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/json-and-jq.html:731: - External link http://stackoverflow.com/questions/3135325/why-do-vector-indices-in-r-start-with-1-instead-of-0 failed (status code 403) + External link https://stackoverflow.com/questions/3135325/why-do-vector-indices-in-r-start-with-1-instead-of-0 failed (status code 403) * At _site/en/lessons/json-and-jq.html:814: @@ -19012,15 +10818,15 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/json-and-jq.html:1466: - External link https://stedolan.github.io/jq/manual/#Reduce failed: https://stedolan.github.io/jq/manual/ exists, but the hash 'Reduce' does not (status code 200) + External link https://stedolan.github.io/jq/manual/#Recursion failed: https://stedolan.github.io/jq/manual/ exists, but the hash 'Recursion' does not (status code 200) * At _site/en/lessons/json-and-jq.html:1466: - External link https://stedolan.github.io/jq/manual/#Recursion failed: https://stedolan.github.io/jq/manual/ exists, but the hash 'Recursion' does not (status code 200) + External link https://stedolan.github.io/jq/manual/#Reduce failed: https://stedolan.github.io/jq/manual/ exists, but the hash 'Reduce' does not (status code 200) * At _site/en/lessons/json-and-jq.html:1467: - External link http://stackoverflow.com/questions/tagged/jq failed (status code 403) + External link https://stackoverflow.com/questions/tagged/jq failed (status code 403) * At _site/en/lessons/json-and-jq.html:2003: @@ -19120,19 +10926,19 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/mapping-with-python-leaflet.html:511: - External link http://pandas.pydata.org/pandas-docs/stable/dsintro.html#dataframe failed: http://pandas.pydata.org/pandas-docs/stable/dsintro.html exists, but the hash 'dataframe' does not (status code 200) + External link https://pandas.pydata.org/pandas-docs/stable/dsintro.html#dataframe failed: https://pandas.pydata.org/pandas-docs/stable/dsintro.html exists, but the hash 'dataframe' does not (status code 200) * At _site/en/lessons/mapping-with-python-leaflet.html:535: - External link http://data.london.gov.uk/dataset/historic-census-population failed: Forbidden (status code 403) + External link https://data.london.gov.uk/dataset/historic-census-population failed (status code 403) * At _site/en/lessons/mapping-with-python-leaflet.html:555: - External link http://pandas.pydata.org/pandas-docs/stable/dsintro.html#dataframe failed: http://pandas.pydata.org/pandas-docs/stable/dsintro.html exists, but the hash 'dataframe' does not (status code 200) + External link https://pandas.pydata.org/pandas-docs/stable/dsintro.html#dataframe failed: https://pandas.pydata.org/pandas-docs/stable/dsintro.html exists, but the hash 'dataframe' does not (status code 200) * At _site/en/lessons/mapping-with-python-leaflet.html:573: - External link http://pandas.pydata.org/pandas-docs/stable/install.html#dependencies failed: http://pandas.pydata.org/pandas-docs/stable/install.html exists, but the hash 'dependencies' does not (status code 200) + External link https://pandas.pydata.org/pandas-docs/stable/install.html#dependencies failed: https://pandas.pydata.org/pandas-docs/stable/install.html exists, but the hash 'dependencies' does not (status code 200) * At _site/en/lessons/mapping-with-python-leaflet.html:661: @@ -19144,7 +10950,7 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/mapping-with-python-leaflet.html:1185: - External link http://leafletjs.com/SlavaUkraini/reference-1.2.0.html#geojson-oneachfeature failed (status code 404) + External link https://leafletjs.com/SlavaUkraini/reference-1.2.0.html#geojson-oneachfeature failed (status code 404) * At _site/en/lessons/mapping-with-python-leaflet.html:1960: @@ -19156,7 +10962,13 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/naive-bayesian.html:915: - External link http://support.sas.com/documentation/cdl/en/statug/63033/HTML/default/viewer.htm#statug_introbayes_sect004.htm failed: http://support.sas.com/documentation/cdl/en/statug/63033/HTML/default/viewer.htm exists, but the hash 'statug_introbayes_sect004.htm' does not (status code 200) + External link https://support.sas.com/documentation/cdl/en/statug/63033/HTML/default/viewer.htm#statug_introbayes_sect004.htm failed: https://support.sas.com/documentation/cdl/en/statug/63033/HTML/default/viewer.htm exists, but the hash 'statug_introbayes_sect004.htm' does not (status code 200) + +* At _site/en/lessons/naive-bayesian.html:2014: + + External link https://snowball.tartarus.org/ failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/naive-bayesian.html:2590: @@ -19174,6 +10986,10 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/normalizing-data.md failed (status code 429) +* At _site/en/lessons/ocr-with-google-vision-and-tesseract.html:333: + + External link https://github.com/programminghistorian/ph-submissions/issues/457 failed (status code 429) + * At _site/en/lessons/ocr-with-google-vision-and-tesseract.html:2037: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -19210,18 +11026,6 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/preserving-your-research-data.md failed (status code 429) -* At _site/en/lessons/qgis-layers.html:609: - - External link http://www.gov.pe.ca/gis/download.php3?name=coastline&file_format=SHP failed with something very wrong. -It's possible libcurl couldn't connect to the server, or perhaps the request timed out. -Sometimes, making too many requests at once also breaks things. (status code 0) - -* At _site/en/lessons/qgis-layers.html:642: - - External link http://www.gov.pe.ca/gis/index.php3?number=77865&lang=E failed with something very wrong. -It's possible libcurl couldn't connect to the server, or perhaps the request timed out. -Sometimes, making too many requests at once also breaks things. (status code 0) - * At _site/en/lessons/qgis-layers.html:2876: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -19242,10 +11046,6 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/r-basics-with-tabular-data.md failed (status code 429) -* At _site/en/lessons/research-data-with-unix.html:510: - - External link https://www.worldcat.org/title/unix-and-linux/oclc/308171076&referer=brief_results failed (status code 403) - * At _site/en/lessons/research-data-with-unix.html:1575: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -19266,9 +11066,11 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://digitalarchive.wilsoncenter.org/document/119105 failed (status code 403) -* At _site/en/lessons/retired/OCR-and-Machine-Translation.html:537: +* At _site/en/lessons/retired/OCR-and-Machine-Translation.html:523: - External link https://github.com/tesseract-ocr/tesseract/blob/master/doc/tesseract.1.asc failed (status code 429) + External link https://www.fmwconcepts.com/imagemagick/textcleaner/index.php failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/retired/OCR-and-Machine-Translation.html:540: @@ -19340,31 +11142,31 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/retired/graph-databases-and-SPARQL.html:514: - External link http://labs.europeana.eu/api/linked-open-data-introduction failed with something very wrong. + External link https://labs.europeana.eu/api/linked-open-data-introduction failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/retired/graph-databases-and-SPARQL.html:514: - External link http://collection.britishmuseum.org failed: got a time out (response code 0) (status code 0) + External link https://collection.britishmuseum.org failed: got a time out (response code 0) (status code 0) * At _site/en/lessons/retired/graph-databases-and-SPARQL.html:590: - External link http://palladio.designhumanities.org/ failed with something very wrong. + External link https://palladio.designhumanities.org/ failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/retired/graph-databases-and-SPARQL.html:794: - External link http://collection.britishmuseum.org/sparql failed: got a time out (response code 0) (status code 0) + External link https://collection.britishmuseum.org/sparql failed: got a time out (response code 0) (status code 0) * At _site/en/lessons/retired/graph-databases-and-SPARQL.html:816: - External link http://collection.britishmuseum.org/id/object/PPA82633 failed: got a time out (response code 0) (status code 0) + External link https://collection.britishmuseum.org/id/object/PPA82633 failed: got a time out (response code 0) (status code 0) * At _site/en/lessons/retired/graph-databases-and-SPARQL.html:830: - External link http://collection.britishmuseum.org/sparql?query=SELECT+*%0D%0AWHERE+%7B%0D%0A++%3Chttp://collection.britishmuseum.org/id/object/PPA82633%3E+?p+?o+.%0D%0A++%7D&_implicit=false&_equivalent=false&_form=/sparql failed: got a time out (response code 0) (status code 0) + External link https://collection.britishmuseum.org/sparql?query=SELECT+*%0D%0AWHERE+%7B%0D%0A++%3Chttp://collection.britishmuseum.org/id/object/PPA82633%3E+?p+?o+.%0D%0A++%7D&_implicit=false&_equivalent=false&_form=/sparql failed: got a time out (response code 0) (status code 0) * At _site/en/lessons/retired/graph-databases-and-SPARQL.html:928: @@ -19380,7 +11182,7 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/retired/graph-databases-and-SPARQL.html:1200: - External link http://palladio.designhumanities.org/ failed with something very wrong. + External link https://palladio.designhumanities.org/ failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) @@ -19390,7 +11192,7 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/retired/graph-databases-and-SPARQL.html:1281: - External link http://labs.europeana.eu/api/linked-open-data-SPARQL-endpoint failed with something very wrong. + External link https://labs.europeana.eu/api/linked-open-data-SPARQL-endpoint failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) @@ -19412,7 +11214,7 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/retired/intro-to-augmented-reality-with-unity.html:490: - External link http://www.digi-capital.com/news/2015/04/augmentedvirtual-reality-to-hit-150-billion-disrupting-mobile-by-2020/#.VbetCU1VhHw failed: http://www.digi-capital.com/news/2015/04/augmentedvirtual-reality-to-hit-150-billion-disrupting-mobile-by-2020/ exists, but the hash '.VbetCU1VhHw' does not (status code 200) + External link https://www.digi-capital.com/news/2015/04/augmentedvirtual-reality-to-hit-150-billion-disrupting-mobile-by-2020/#.VbetCU1VhHw failed: https://www.digi-capital.com/news/2015/04/augmentedvirtual-reality-to-hit-150-billion-disrupting-mobile-by-2020/ exists, but the hash '.VbetCU1VhHw' does not (status code 200) * At _site/en/lessons/retired/intro-to-augmented-reality-with-unity.html:510: @@ -19420,7 +11222,7 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/retired/intro-to-augmented-reality-with-unity.html:516: - External link http://docs.unity3d.com/Manual/LearningtheInterface.html failed (status code 404) + External link https://docs.unity3d.com/Manual/LearningtheInterface.html failed (status code 404) * At _site/en/lessons/retired/intro-to-augmented-reality-with-unity.html:520: @@ -19440,7 +11242,7 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/retired/intro-to-augmented-reality-with-unity.html:1335: - External link http://docs.unity3d.com/Manual/Transforms.html failed (status code 404) + External link https://docs.unity3d.com/Manual/Transforms.html failed (status code 404) * At _site/en/lessons/retired/intro-to-augmented-reality-with-unity.html:2160: @@ -19452,7 +11254,7 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/retired/intro-to-beautiful-soup.html:609: - External link http://bioguide.congress.gov/biosearch/biosearch.asp failed: Forbidden (status code 403) + External link https://bioguide.congress.gov/biosearch/biosearch.asp failed (status code 403) * At _site/en/lessons/retired/intro-to-beautiful-soup.html:1720: @@ -19504,11 +11306,7 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/sentiment-analysis.html:549: - External link http://journals.sagepub.com/doi/abs/10.1177/1749975514542486 failed (status code 403) - -* At _site/en/lessons/sentiment-analysis.html:622: - - External link https://github.com/cjhutto/vaderSentiment/blob/master/vaderSentiment/vaderSentiment.py failed (status code 429) + External link https://journals.sagepub.com/doi/abs/10.1177/1749975514542486 failed (status code 403) * At _site/en/lessons/sentiment-analysis.html:1443: @@ -19534,14 +11332,6 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/ph-submissions/issues/605 failed (status code 429) -* At _site/en/lessons/simulating-historical-communication-networks-python.html:1107: - - External link https://github.com/projectmesa/mesa/blob/2.4.x-maintenance/mesa/datacollection.py failed (status code 429) - -* At _site/en/lessons/simulating-historical-communication-networks-python.html:1521: - - External link https://doi.org/10.52842/conf.ecaade.2016.2.485 failed (status code 302) - * At _site/en/lessons/simulating-historical-communication-networks-python.html:1551: External link https://doi.org/10.1177/1059712320922915 failed (status code 403) @@ -19568,33 +11358,43 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/sonification.html:521: - External link http://blog.taracopplestone.co.uk/making-things-photobashing-as-archaeological-remediation/ failed with something very wrong. + External link https://blog.taracopplestone.co.uk/making-things-photobashing-as-archaeological-remediation/ failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/sonification.html:548: - External link http://www.icad.org/Proceedings/2008/Hermann2008.pdf failed with something very wrong. + External link https://www.icad.org/Proceedings/2008/Hermann2008.pdf failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/sonification.html:625: - External link http://www.lynda.com/GarageBand-tutorials/Importing-audio-tracks/156620/164050-4.html failed (status code 404) + External link https://www.lynda.com/GarageBand-tutorials/Importing-audio-tracks/156620/164050-4.html failed (status code 404) + +* At _site/en/lessons/sonification.html:836: + + External link https://abcnotation.com/wiki/abc:standard:v2.1 failed: Not Found (status code 404) * At _site/en/lessons/sonification.html:997: - External link http://puffin.creighton.edu/jesuit/relations/ failed with something very wrong. + External link https://puffin.creighton.edu/jesuit/relations/ failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + +* At _site/en/lessons/sonification.html:1060: + + External link https://www.lilypond.org/ failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/sonification.html:1083: - External link http://www.jstor.org/stable/734136 failed (status code 403) + External link https://www.jstor.org/stable/734136 failed (status code 403) * At _site/en/lessons/sonification.html:1085: - External link http://www.icad.org/Proceedings/2008/Hermann2008.pdf failed with something very wrong. + External link https://www.icad.org/Proceedings/2008/Hermann2008.pdf failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) @@ -19612,7 +11412,7 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/space-place-gazetteers.html:547: - External link http://bombsight.org/#17/51.50595/-0.10680 failed: http://bombsight.org/ exists, but the hash '17/51.50595/-0.10680' does not (status code 200) + External link https://bombsight.org/#17/51.50595/-0.10680 failed: https://bombsight.org/ exists, but the hash '17/51.50595/-0.10680' does not (status code 200) * At _site/en/lessons/space-place-gazetteers.html:1451: @@ -19630,31 +11430,25 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/space-place-gazetteers.md failed (status code 429) -* At _site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html:737: - - External link https://github.com/dhcolumbia/pandoc-workflow/blob/master/pandoctut.bib failed (status code 429) - * At _site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html:1204: External link https://groups.google.com/forum/#!forum/pandoc-discuss failed: https://groups.google.com/forum/ exists, but the hash '!forum/pandoc-discuss' does not (status code 200) * At _site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html:1207: - External link http://stackoverflow.com/questions/tagged/pandoc failed (status code 403) + External link https://stackoverflow.com/questions/tagged/pandoc failed (status code 403) * At _site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html:1218: - External link http://mouapp.com/ failed with something very wrong. + External link https://mouapp.com/ failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) -* At _site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html:1223: - - External link http://www.authorea.com failed: Forbidden (status code 403) - * At _site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html:1224: - External link http://www.draftin.com failed: Service Unavailable (status code 503) + External link https://www.draftin.com failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html:1269: @@ -19680,21 +11474,13 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/temporal-network-analysis-with-r.md failed (status code 429) -* At _site/en/lessons/text-mining-with-extracted-features.html:630: - - External link http://stackoverflow.com/a/19350234/233577 failed (status code 403) - -* At _site/en/lessons/text-mining-with-extracted-features.html:1800: +* At _site/en/lessons/text-mining-with-extracted-features.html:352: - External link https://github.com/htrc/htrc-feature-reader/blob/master/README.ipynb failed (status code 429) + External link https://github.com/programminghistorian/ph-submissions/issues/29 failed (status code 429) -* At _site/en/lessons/text-mining-with-extracted-features.html:1804: - - External link https://github.com/htrc/htrc-feature-reader/blob/master/examples/Within-Book%20Sentiment%20Trends.ipynb failed (status code 429) - -* At _site/en/lessons/text-mining-with-extracted-features.html:1854: +* At _site/en/lessons/text-mining-with-extracted-features.html:630: - External link https://github.com/htrc/htrc-feature-reader/blob/master/examples/ID_to_Rsync_Link.ipynb failed (status code 429) + External link https://stackoverflow.com/a/19350234/233577 failed (status code 403) * At _site/en/lessons/text-mining-with-extracted-features.html:1854: @@ -19710,6 +11496,38 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/text-mining-with-extracted-features.md failed (status code 429) +* At _site/en/lessons/text-mining-youtube-comments.html:337: + + External link https://github.com/programminghistorian/ph-submissions/issues/374 failed (status code 429) + +* At _site/en/lessons/text-mining-youtube-comments.html:524: + + External link https://www.allsides.com failed (status code 403) + +* At _site/en/lessons/text-mining-youtube-comments.html:876: + + External link https://www.wordfish.org/ failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + +* At _site/en/lessons/text-mining-youtube-comments.html:876: + + External link https://www.wordfish.org/software.html failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + +* At _site/en/lessons/text-mining-youtube-comments.html:906: + + External link https://www.wordfish.org/ failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + +* At _site/en/lessons/text-mining-youtube-comments.html:906: + + External link https://www.wordfish.org/software.html failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + * At _site/en/lessons/text-mining-youtube-comments.html:988: External link https://doi.org/10.1111/j.1540-5907.2008.00338.x failed (status code 403) @@ -19750,13 +11568,13 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/text-mining-youtube-comments.md failed (status code 429) -* At _site/en/lessons/topic-modeling-and-mallet.html:545: +* At _site/en/lessons/topic-modeling-and-mallet.html:1225: - External link http://www.worldcat.org/title/reading-machines-toward-an-algorithmic-criticism/oclc/708761605&referer=brief_results failed: Forbidden (status code 403) + External link https://web.archive.org/web/20160704150726/https://www.lisarhody.com:80/some-assembly-required/ failed (status code 404) * At _site/en/lessons/topic-modeling-and-mallet.html:1228: - External link http://dl.acm.org/citation.cfm?id=944937 failed (status code 403) + External link https://dl.acm.org/citation.cfm?id=944937 failed (status code 403) * At _site/en/lessons/topic-modeling-and-mallet.html:2649: @@ -19766,6 +11584,10 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/topic-modeling-and-mallet.md failed (status code 429) +* At _site/en/lessons/transcribing-handwritten-text-with-python-and-azure.html:333: + + External link https://github.com/programminghistorian/ph-submissions/issues/511 failed (status code 429) + * At _site/en/lessons/transcribing-handwritten-text-with-python-and-azure.html:1664: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -19774,6 +11596,10 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/transcribing-handwritten-text-with-python-and-azure.md failed (status code 429) +* At _site/en/lessons/transforming-xml-with-xsl.html:333: + + External link https://github.com/programminghistorian/ph-submissions/issues/11 failed (status code 429) + * At _site/en/lessons/transforming-xml-with-xsl.html:535: External link https://irt.kcl.ac.uk/irt2009/ failed with something very wrong. @@ -19784,13 +11610,15 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://www.java.com/en/download/ failed (status code 403) -* At _site/en/lessons/transforming-xml-with-xsl.html:1274: +* At _site/en/lessons/transforming-xml-with-xsl.html:728: - External link https://stackoverflow.com/questions/16811332/cannot-run-java-from-the-windows-powershell-command-prompt failed (status code 403) + External link https://scissors-and-paste.net failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) -* At _site/en/lessons/transforming-xml-with-xsl.html:1279: +* At _site/en/lessons/transforming-xml-with-xsl.html:1274: - External link https://www.computerhope.com/issues/ch000549.htm failed (status code 403) + External link https://stackoverflow.com/questions/16811332/cannot-run-java-from-the-windows-powershell-command-prompt failed (status code 403) * At _site/en/lessons/transforming-xml-with-xsl.html:1280: @@ -19812,6 +11640,10 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/transliterating.md failed (status code 429) +* At _site/en/lessons/understanding-creating-word-embeddings.html:337: + + External link https://github.com/programminghistorian/ph-submissions/issues/555 failed (status code 429) + * At _site/en/lessons/understanding-creating-word-embeddings.html:989: External link https://doi.org/10.1080/01615440.2020.1760157 failed (status code 403) @@ -19826,7 +11658,13 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/understanding-regular-expressions.html:628: - External link http://archive.org/stream/jstor-4560629/4560629#page/n0/mode/2up failed: http://archive.org/stream/jstor-4560629/4560629 exists, but the hash 'page/n0/mode/2up' does not (status code 200) + External link https://archive.org/stream/jstor-4560629/4560629#page/n0/mode/2up failed: https://archive.org/stream/jstor-4560629/4560629 exists, but the hash 'page/n0/mode/2up' does not (status code 200) + +* At _site/en/lessons/understanding-regular-expressions.html:1430: + + External link https://dh.obdurodon.org/regex.html failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/understanding-regular-expressions.html:1967: @@ -19844,10 +11682,20 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/up-and-running-with-omeka.md failed (status code 429) +* At _site/en/lessons/urban-demographic-data-r-ggplot2.html:335: + + External link https://github.com/programminghistorian/ph-submissions/issues/606 failed (status code 429) + * At _site/en/lessons/urban-demographic-data-r-ggplot2.html:1203: External link https://doi.org/10.1198/jcgs.2009.07098 failed (status code 403) +* At _site/en/lessons/urban-demographic-data-r-ggplot2.html:1227: + + External link https://www.cookbook-r.com/Graphs/ failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + * At _site/en/lessons/urban-demographic-data-r-ggplot2.html:2207: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -19856,9 +11704,13 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/urban-demographic-data-r-ggplot2.md failed (status code 429) +* At _site/en/lessons/using-javascript-to-create-maps.html:340: + + External link https://github.com/programminghistorian/ph-submissions/issues/32 failed (status code 429) + * At _site/en/lessons/using-javascript-to-create-maps.html:744: - External link http://stackoverflow.com/questions/16151018/npm-throws-error-without-sudo/24404451#24404451 failed (status code 403) + External link https://stackoverflow.com/questions/16151018/npm-throws-error-without-sudo/24404451#24404451 failed (status code 403) * At _site/en/lessons/using-javascript-to-create-maps.html:1725: @@ -19884,13 +11736,9 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/viewing-html-files.md failed (status code 429) -* At _site/en/lessons/visualizing-with-bokeh.html:612: +* At _site/en/lessons/visualizing-with-bokeh.html:333: - External link https://github.com/programminghistorian/ph-submissions/tree/gh-pages/assets/visualizing-with-bokeh/visualizing-with-bokeh.ipynb failed (status code 429) - -* At _site/en/lessons/visualizing-with-bokeh.html:626: - - External link https://github.com/programminghistorian/ph-submissions/tree/gh-pages/assets/visualizing-with-bokeh/visualizing-with-bokeh.ipynb failed (status code 429) + External link https://github.com/programminghistorian/ph-submissions/issues/152 failed (status code 429) * At _site/en/lessons/visualizing-with-bokeh.html:723: @@ -19902,11 +11750,11 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/lessons/visualizing-with-bokeh.html:1101: - External link http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases failed: http://pandas.pydata.org/pandas-docs/stable/timeseries.html exists, but the hash 'offset-aliases' does not (status code 200) + External link https://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases failed: https://pandas.pydata.org/pandas-docs/stable/timeseries.html exists, but the hash 'offset-aliases' does not (status code 200) * At _site/en/lessons/visualizing-with-bokeh.html:1190: - External link http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases failed: http://pandas.pydata.org/pandas-docs/stable/timeseries.html exists, but the hash 'offset-aliases' does not (status code 200) + External link https://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases failed: https://pandas.pydata.org/pandas-docs/stable/timeseries.html exists, but the hash 'offset-aliases' does not (status code 200) * At _site/en/lessons/visualizing-with-bokeh.html:1890: @@ -19924,6 +11772,10 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/windows-installation.md failed (status code 429) +* At _site/en/lessons/working-with-batches-of-pdf-files.html:333: + + External link https://github.com/programminghistorian/ph-submissions/issues/258 failed (status code 429) + * At _site/en/lessons/working-with-batches-of-pdf-files.html:707: External link https://manpages.ubuntu.com/manpages/bionic/en/man1/grep.1.html#regular%20expressions failed: https://manpages.ubuntu.com/manpages/bionic/en/man1/grep.1.html exists, but the hash 'regular%20expressions' does not (status code 200) @@ -19962,11 +11814,11 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/project-team.html:308: - External link http://twitter.com/maxcarlons failed (status code 400) + External link https://twitter.com/maxcarlons failed (status code 400) * At _site/en/project-team.html:510: - External link http://twitter.com/cosovschi failed (status code 400) + External link https://twitter.com/cosovschi failed (status code 400) * At _site/en/project-team.html:1270: @@ -19976,99 +11828,101 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/project-team.html:1276: - External link http://twitter.com/nabsiddiqui failed (status code 400) + External link https://twitter.com/nabsiddiqui failed (status code 400) * At _site/en/project-team.html:1629: - External link http://twitter.com/giulia_taurino failed (status code 400) + External link https://twitter.com/giulia_taurino failed (status code 400) * At _site/en/project-team.html:1808: - External link http://twitter.com/alexwermercolan failed (status code 400) + External link https://twitter.com/alexwermercolan failed (status code 400) * At _site/en/project-team.html:2057: - External link http://www.mariajoseafanador.com failed: Moved Permanently (status code 301) + External link https://www.mariajoseafanador.com failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/en/project-team.html:2063: - External link http://twitter.com/mariajoafana failed (status code 400) + External link https://twitter.com/mariajoafana failed (status code 400) * At _site/en/project-team.html:2532: - External link http://twitter.com/IsaGribomont failed (status code 400) + External link https://twitter.com/IsaGribomont failed (status code 400) * At _site/en/project-team.html:2743: - External link http://twitter.com/espejolento failed (status code 400) + External link https://twitter.com/espejolento failed (status code 400) * At _site/en/project-team.html:3034: - External link http://twitter.com/jenniferisve failed (status code 400) + External link https://twitter.com/jenniferisve failed (status code 400) * At _site/en/project-team.html:3359: - External link http://twitter.com/enetreseles failed (status code 400) + External link https://twitter.com/enetreseles failed (status code 400) * At _site/en/project-team.html:3566: - External link http://twitter.com/jgob failed (status code 400) + External link https://twitter.com/jgob failed (status code 400) * At _site/en/project-team.html:3861: - External link http://twitter.com/rivaquiroga failed (status code 400) + External link https://twitter.com/rivaquiroga failed (status code 400) * At _site/en/project-team.html:4802: - External link http://twitter.com/superHH failed (status code 400) + External link https://twitter.com/superHH failed (status code 400) * At _site/en/project-team.html:5188: - External link http://twitter.com/emilienschultz failed (status code 400) + External link https://twitter.com/emilienschultz failed (status code 400) * At _site/en/project-team.html:5315: - External link http://twitter.com/davvalent failed (status code 400) + External link https://twitter.com/davvalent failed (status code 400) * At _site/en/project-team.html:5840: - External link http://twitter.com/danielalvesfcsh failed (status code 400) + External link https://twitter.com/danielalvesfcsh failed (status code 400) * At _site/en/project-team.html:6105: - External link http://twitter.com/ericbrasiln failed (status code 400) + External link https://twitter.com/ericbrasiln failed (status code 400) * At _site/en/project-team.html:6541: - External link http://twitter.com/jimmy_medeiros failed (status code 400) + External link https://twitter.com/jimmy_medeiros failed (status code 400) * At _site/en/project-team.html:7025: - External link http://twitter.com/araceletorres failed (status code 400) + External link https://twitter.com/araceletorres failed (status code 400) * At _site/en/project-team.html:7284: - External link http://twitter.com/j_w_baker failed (status code 400) + External link https://twitter.com/j_w_baker failed (status code 400) * At _site/en/project-team.html:7725: - External link http://twitter.com/Adam_Crymble failed (status code 400) + External link https://twitter.com/Adam_Crymble failed (status code 400) * At _site/en/project-team.html:8256: - External link http://twitter.com/jenniferisve failed (status code 400) + External link https://twitter.com/jenniferisve failed (status code 400) * At _site/en/project-team.html:8587: - External link http://twitter.com/rivaquiroga failed (status code 400) + External link https://twitter.com/rivaquiroga failed (status code 400) * At _site/en/project-team.html:8876: - External link http://twitter.com/amsichani failed (status code 400) + External link https://twitter.com/amsichani failed (status code 400) * At _site/en/project-team.html:9219: - External link http://twitter.com/AnisaHawes failed (status code 400) + External link https://twitter.com/AnisaHawes failed (status code 400) * At _site/en/project-team.html:10039: @@ -20082,17 +11936,9 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://academic.oup.com/jah/article-abstract/103/1/299/1751315 failed (status code 403) -* At _site/en/research.html:265: - - External link http://jitp.commons.gc.cuny.edu/review-of-the-programming-historian/ failed: got a time out (response code 301) (status code 301) - -* At _site/en/research.html:280: - - External link https://novaresearch.unl.pt/files/32228034/Ensinar_Humanidades_Digitais.pdf failed (status code 403) - * At _site/en/research.html:283: - External link http://www.tandfonline.com/doi/full/10.1080/13555502.2017.1301179 failed (status code 403) + External link https://www.tandfonline.com/doi/full/10.1080/13555502.2017.1301179 failed (status code 403) * At _site/en/research.html:327: @@ -20126,6 +11972,12 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://msuglobaldh.org/abstracts/#programming-historian failed: https://msuglobaldh.org/abstracts/ exists, but the hash 'programming-historian' does not (status code 200) +* At _site/en/research.html:395: + + External link https://ixa2.si.ehu.eus/intele/?q=webinars failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + * At _site/en/research.html:483: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -20142,17 +11994,21 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/reviewer-guidelines.md failed (status code 429) +* At _site/en/supporters.html:280: + + External link https://www.sas.ac.uk/ failed (status code 403) + * At _site/en/supporters.html:292: External link https://www.tilburguniversity.edu/ failed (status code 403) -* At _site/en/supporters.html:334: +* At _site/en/supporters.html:304: - External link https://www.sussex.ac.uk/collaborate/business/public-funds#:~:text=Impact%20accelerator%20funds,-From%20law%20to&text=The%20ESRC%20and%20AHRC%20Impact,businesses%20through%20to%20large%20companies failed: https://www.sussex.ac.uk/collaborate/business/public-funds exists, but the hash ':~:text=Impact%20accelerator%20funds,-From%20law%20to&text=The%20ESRC%20and%20AHRC%20Impact,businesses%20through%20to%20large%20companies' does not (status code 200) + External link https://www.history.ac.uk/library-digital failed (status code 403) -* At _site/en/supporters.html:335: +* At _site/en/supporters.html:334: - External link https://www.thebritishacademy.ac.uk/projects/writing-workshops-2018-digital-humanities/ failed (status code 403) + External link https://www.sussex.ac.uk/collaborate/business/public-funds#:~:text=Impact%20accelerator%20funds,-From%20law%20to&text=The%20ESRC%20and%20AHRC%20Impact,businesses%20through%20to%20large%20companies failed: https://www.sussex.ac.uk/collaborate/business/public-funds exists, but the hash ':~:text=Impact%20accelerator%20funds,-From%20law%20to&text=The%20ESRC%20and%20AHRC%20Impact,businesses%20through%20to%20large%20companies' does not (status code 200) * At _site/en/supporters.html:434: @@ -20178,6 +12034,24 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/vacancies.md failed (status code 429) +* At _site/es/acerca-de.html:269: + + External link https://dhawards.org/dhawards2022/results/ failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + +* At _site/es/acerca-de.html:269: + + External link https://dhawards.org/dhawards2016/results/ failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + +* At _site/es/acerca-de.html:269: + + External link https://dhawards.org/dhawards2017/results/ failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + * At _site/es/acerca-de.html:269: External link https://openpublishingawards.org/results/2021/index.html failed with something very wrong. @@ -20192,17 +12066,21 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/acerca-de.md failed (status code 429) +* At _site/es/colaboradores.html:281: + + External link https://www.sas.ac.uk/ failed (status code 403) + * At _site/es/colaboradores.html:293: External link https://www.tilburguniversity.edu/ failed (status code 403) -* At _site/es/colaboradores.html:335: +* At _site/es/colaboradores.html:305: - External link https://www.sussex.ac.uk/collaborate/business/public-funds#:~:text=Impact%20accelerator%20funds,-From%20law%20to&text=The%20ESRC%20and%20AHRC%20Impact,businesses%20through%20to%20large%20companies failed: https://www.sussex.ac.uk/collaborate/business/public-funds exists, but the hash ':~:text=Impact%20accelerator%20funds,-From%20law%20to&text=The%20ESRC%20and%20AHRC%20Impact,businesses%20through%20to%20large%20companies' does not (status code 200) + External link https://www.history.ac.uk/library-digital failed (status code 403) -* At _site/es/colaboradores.html:336: +* At _site/es/colaboradores.html:335: - External link https://www.thebritishacademy.ac.uk/projects/writing-workshops-2018-digital-humanities/ failed (status code 403) + External link https://www.sussex.ac.uk/collaborate/business/public-funds#:~:text=Impact%20accelerator%20funds,-From%20law%20to&text=The%20ESRC%20and%20AHRC%20Impact,businesses%20through%20to%20large%20companies failed: https://www.sussex.ac.uk/collaborate/business/public-funds exists, but the hash ':~:text=Impact%20accelerator%20funds,-From%20law%20to&text=The%20ESRC%20and%20AHRC%20Impact,businesses%20through%20to%20large%20companies' does not (status code 200) * At _site/es/colaboradores.html:435: @@ -20214,10 +12092,14 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/es/contribuciones.html:258: - External link http://www.mecd.gob.es/planes-nacionales/dam/jcr:f20a4ba1-0ed2-445d-9be9-b8b0382562ea/mex-glosario-interpares-total0112.pdf failed with something very wrong. + External link https://www.mecd.gob.es/planes-nacionales/dam/jcr:f20a4ba1-0ed2-445d-9be9-b8b0382562ea/mex-glosario-interpares-total0112.pdf failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) +* At _site/es/contribuciones.html:303: + + External link https://www.worldcat.org/title/programming-historian-en-espanol/oclc/1061292935&referer=brief_results failed (status code 403) + * At _site/es/contribuciones.html:357: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -20240,11 +12122,11 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/es/equipo-de-proyecto.html:306: - External link http://twitter.com/maxcarlons failed (status code 400) + External link https://twitter.com/maxcarlons failed (status code 400) * At _site/es/equipo-de-proyecto.html:508: - External link http://twitter.com/cosovschi failed (status code 400) + External link https://twitter.com/cosovschi failed (status code 400) * At _site/es/equipo-de-proyecto.html:1268: @@ -20254,99 +12136,101 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/es/equipo-de-proyecto.html:1274: - External link http://twitter.com/nabsiddiqui failed (status code 400) + External link https://twitter.com/nabsiddiqui failed (status code 400) * At _site/es/equipo-de-proyecto.html:1627: - External link http://twitter.com/giulia_taurino failed (status code 400) + External link https://twitter.com/giulia_taurino failed (status code 400) * At _site/es/equipo-de-proyecto.html:1806: - External link http://twitter.com/alexwermercolan failed (status code 400) + External link https://twitter.com/alexwermercolan failed (status code 400) * At _site/es/equipo-de-proyecto.html:2055: - External link http://www.mariajoseafanador.com failed: Moved Permanently (status code 301) + External link https://www.mariajoseafanador.com failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/es/equipo-de-proyecto.html:2061: - External link http://twitter.com/mariajoafana failed (status code 400) + External link https://twitter.com/mariajoafana failed (status code 400) * At _site/es/equipo-de-proyecto.html:2530: - External link http://twitter.com/IsaGribomont failed (status code 400) + External link https://twitter.com/IsaGribomont failed (status code 400) * At _site/es/equipo-de-proyecto.html:2741: - External link http://twitter.com/espejolento failed (status code 400) + External link https://twitter.com/espejolento failed (status code 400) * At _site/es/equipo-de-proyecto.html:3032: - External link http://twitter.com/jenniferisve failed (status code 400) + External link https://twitter.com/jenniferisve failed (status code 400) * At _site/es/equipo-de-proyecto.html:3357: - External link http://twitter.com/enetreseles failed (status code 400) + External link https://twitter.com/enetreseles failed (status code 400) * At _site/es/equipo-de-proyecto.html:3564: - External link http://twitter.com/jgob failed (status code 400) + External link https://twitter.com/jgob failed (status code 400) * At _site/es/equipo-de-proyecto.html:3859: - External link http://twitter.com/rivaquiroga failed (status code 400) + External link https://twitter.com/rivaquiroga failed (status code 400) * At _site/es/equipo-de-proyecto.html:4800: - External link http://twitter.com/superHH failed (status code 400) + External link https://twitter.com/superHH failed (status code 400) * At _site/es/equipo-de-proyecto.html:5186: - External link http://twitter.com/emilienschultz failed (status code 400) + External link https://twitter.com/emilienschultz failed (status code 400) * At _site/es/equipo-de-proyecto.html:5313: - External link http://twitter.com/davvalent failed (status code 400) + External link https://twitter.com/davvalent failed (status code 400) * At _site/es/equipo-de-proyecto.html:5838: - External link http://twitter.com/danielalvesfcsh failed (status code 400) + External link https://twitter.com/danielalvesfcsh failed (status code 400) * At _site/es/equipo-de-proyecto.html:6103: - External link http://twitter.com/ericbrasiln failed (status code 400) + External link https://twitter.com/ericbrasiln failed (status code 400) * At _site/es/equipo-de-proyecto.html:6539: - External link http://twitter.com/jimmy_medeiros failed (status code 400) + External link https://twitter.com/jimmy_medeiros failed (status code 400) * At _site/es/equipo-de-proyecto.html:7023: - External link http://twitter.com/araceletorres failed (status code 400) + External link https://twitter.com/araceletorres failed (status code 400) * At _site/es/equipo-de-proyecto.html:7282: - External link http://twitter.com/j_w_baker failed (status code 400) + External link https://twitter.com/j_w_baker failed (status code 400) * At _site/es/equipo-de-proyecto.html:7723: - External link http://twitter.com/Adam_Crymble failed (status code 400) + External link https://twitter.com/Adam_Crymble failed (status code 400) * At _site/es/equipo-de-proyecto.html:8254: - External link http://twitter.com/jenniferisve failed (status code 400) + External link https://twitter.com/jenniferisve failed (status code 400) * At _site/es/equipo-de-proyecto.html:8585: - External link http://twitter.com/rivaquiroga failed (status code 400) + External link https://twitter.com/rivaquiroga failed (status code 400) * At _site/es/equipo-de-proyecto.html:8874: - External link http://twitter.com/amsichani failed (status code 400) + External link https://twitter.com/amsichani failed (status code 400) * At _site/es/equipo-de-proyecto.html:9217: - External link http://twitter.com/AnisaHawes failed (status code 400) + External link https://twitter.com/AnisaHawes failed (status code 400) * At _site/es/equipo-de-proyecto.html:10037: @@ -20368,21 +12252,13 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/ph-submissions/commits/gh-pages failed (status code 429) -* At _site/es/guia-editor.html:365: - - External link https://github.com/programminghistorian/ph-submissions/blob/gh-pages/es/PLANTILLA-TRADUCCION.md failed (status code 429) - -* At _site/es/guia-editor.html:386: - - External link https://github.com/programminghistorian/ph-submissions/blob/gh-pages/es/PLANTILLA-LECCION.md failed (status code 429) - * At _site/es/guia-editor.html:505: External link https://zenodo.org/record/49873#.V0lazGaGa7o failed: https://zenodo.org/record/49873 exists, but the hash '.V0lazGaGa7o' does not (status code 200) * At _site/es/guia-editor.html:617: - External link http://www.europeana.eu/portal/en failed: Forbidden (status code 403) + External link https://www.europeana.eu/portal/en failed (status code 403) * At _site/es/guia-editor.html:645: @@ -20410,7 +12286,7 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/es/guia-para-revisores.html:336: - External link http://www.mecd.gob.es/planes-nacionales/dam/jcr:f20a4ba1-0ed2-445d-9be9-b8b0382562ea/mex-glosario-interpares-total0112.pdf failed with something very wrong. + External link https://www.mecd.gob.es/planes-nacionales/dam/jcr:f20a4ba1-0ed2-445d-9be9-b8b0382562ea/mex-glosario-interpares-total0112.pdf failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) @@ -20422,10 +12298,6 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/guia-para-revisores.md failed (status code 429) -* At _site/es/guia-para-traductores.html:260: - - External link https://github.com/programminghistorian/ph-submissions/blob/gh-pages/es/lista-de-traducciones.md failed (status code 429) - * At _site/es/guia-para-traductores.html:638: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -20442,21 +12314,9 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/index.md failed (status code 429) -* At _site/es/investigacion.html:264: - - External link http://jitp.commons.gc.cuny.edu/review-of-the-programming-historian failed: got a time out (response code 301) (status code 301) - -* At _site/es/investigacion.html:265: - - External link http://jitp.commons.gc.cuny.edu/review-of-the-programming-historian failed: got a time out (response code 301) (status code 301) - -* At _site/es/investigacion.html:280: - - External link https://novaresearch.unl.pt/files/32228034/Ensinar_Humanidades_Digitais.pdf failed (status code 403) - * At _site/es/investigacion.html:283: - External link http://www.tandfonline.com/doi/full/10.1080/13555502.2017.1301179 failed (status code 403) + External link https://www.tandfonline.com/doi/full/10.1080/13555502.2017.1301179 failed (status code 403) * At _site/es/investigacion.html:326: @@ -20490,6 +12350,12 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://msuglobaldh.org/abstracts/#programming-historian failed: https://msuglobaldh.org/abstracts/ exists, but the hash 'programming-historian' does not (status code 200) +* At _site/es/investigacion.html:394: + + External link https://ixa2.si.ehu.eus/intele/?q=webinars failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + * At _site/es/investigacion.html:485: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -20506,10 +12372,6 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/jisc-tna-colaboracion.md failed (status code 429) -* At _site/es/lecciones/administracion-de-datos-en-r.html:337: - - External link https://github.com/programminghistorian/ph-submissions/issues/199 failed (status code 429) - * At _site/es/lecciones/administracion-de-datos-en-r.html:1604: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -20518,14 +12380,16 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/administracion-de-datos-en-r.md failed (status code 429) -* At _site/es/lecciones/analisis-de-corpus-con-antconc.html:337: - - External link https://github.com/programminghistorian/ph-submissions/issues/170 failed (status code 429) - * At _site/es/lecciones/analisis-de-corpus-con-antconc.html:630: External link https://academic.oup.com/dsh/article-abstract/8/4/243/928942 failed (status code 403) +* At _site/es/lecciones/analisis-de-corpus-con-antconc.html:1132: + + External link https://elies.rediris.es/elies18/ failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + * At _site/es/lecciones/analisis-de-corpus-con-antconc.html:1675: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -20534,10 +12398,6 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/analisis-de-corpus-con-antconc.md failed (status code 429) -* At _site/es/lecciones/analisis-de-correspondencia-en-r.html:358: - - External link https://github.com/programminghistorian/ph-submissions/issues/331 failed (status code 429) - * At _site/es/lecciones/analisis-de-correspondencia-en-r.html:1705: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -20546,10 +12406,6 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/analisis-de-correspondencia-en-r.md failed (status code 429) -* At _site/es/lecciones/analisis-de-sentimientos-r.html:333: - - External link https://github.com/programminghistorian/ph-submissions/issues/286 failed (status code 429) - * At _site/es/lecciones/analisis-de-sentimientos-r.html:1544: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -20558,10 +12414,6 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/analisis-de-sentimientos-r.md failed (status code 429) -* At _site/es/lecciones/analisis-redes-sociales-teatro-1.html:333: - - External link https://github.com/programminghistorian/ph-submissions/issues/517 failed (status code 429) - * At _site/es/lecciones/analisis-redes-sociales-teatro-1.html:1448: External link https://doi.org/10.5944/rhd.vol.4.2019.25187 failed: got a time out (response code 302) (status code 302) @@ -20578,14 +12430,14 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/analisis-redes-sociales-teatro-1.md failed (status code 429) -* At _site/es/lecciones/analisis-redes-sociales-teatro-2.html:333: - - External link https://github.com/programminghistorian/ph-submissions/issues/547 failed (status code 429) - * At _site/es/lecciones/analisis-redes-sociales-teatro-2.html:511: External link https://gephi.org/plugins/#/ failed: https://gephi.org/plugins/ exists, but the hash '/' does not (status code 200) +* At _site/es/lecciones/analisis-redes-sociales-teatro-2.html:1101: + + External link https://doi.org/10.1093/llc/fqaa015 failed (status code 403) + * At _site/es/lecciones/analisis-redes-sociales-teatro-2.html:1639: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -20594,10 +12446,6 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/analisis-redes-sociales-teatro-2.md failed (status code 429) -* At _site/es/lecciones/analisis-temporal-red.html:341: - - External link https://github.com/programminghistorian/ph-submissions/issues/218 failed (status code 429) - * At _site/es/lecciones/analisis-temporal-red.html:1764: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -20606,14 +12454,6 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/analisis-temporal-red.md failed (status code 429) -* At _site/es/lecciones/analisis-voyant-tools.html:337: - - External link https://github.com/programminghistorian/ph-submissions/issues/211 failed (status code 429) - -* At _site/es/lecciones/analisis-voyant-tools.html:670: - - External link https://github.com/corpusenespanol/discursos-presidenciales/blob/master/mexico/2007_mx_calderon.txt failed (status code 429) - * At _site/es/lecciones/analisis-voyant-tools.html:1086: External link https://twitter.com/VoyantTools/status/1025458748574326784 failed (status code 400) @@ -20634,43 +12474,27 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/analisis-voyant-tools.md failed (status code 429) -* At _site/es/lecciones/construir-repositorio-de-fuentes.html:337: - - External link https://github.com/programminghistorian/ph-submissions/issues/188 failed (status code 429) - -* At _site/es/lecciones/construir-repositorio-de-fuentes.html:592: - - External link http://localhost/ failed with something very wrong. -It's possible libcurl couldn't connect to the server, or perhaps the request timed out. -Sometimes, making too many requests at once also breaks things. (status code 0) - -* At _site/es/lecciones/construir-repositorio-de-fuentes.html:592: - - External link http://127.0.0.1 failed with something very wrong. -It's possible libcurl couldn't connect to the server, or perhaps the request timed out. -Sometimes, making too many requests at once also breaks things. (status code 0) - -* At _site/es/lecciones/construir-repositorio-de-fuentes.html:622: +* At _site/es/lecciones/construir-repositorio-de-fuentes.html:623: - External link http://localhost/phpmyadmin/ failed with something very wrong. + External link https://localhost/phpmyadmin/ failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) -* At _site/es/lecciones/construir-repositorio-de-fuentes.html:692: +* At _site/es/lecciones/construir-repositorio-de-fuentes.html:693: - External link http://localhost/phpmyadmin failed with something very wrong. + External link https://localhost/phpmyadmin failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) -* At _site/es/lecciones/construir-repositorio-de-fuentes.html:914: +* At _site/es/lecciones/construir-repositorio-de-fuentes.html:915: - External link http://omeka.org/codex/Plugin_Writing_Best_Practices#Plugin_Directory_Structure failed: http://omeka.org/codex/Plugin_Writing_Best_Practices exists, but the hash 'Plugin_Directory_Structure' does not (status code 200) + External link https://omeka.org/codex/Plugin_Writing_Best_Practices#Plugin_Directory_Structure failed: https://omeka.org/codex/Plugin_Writing_Best_Practices exists, but the hash 'Plugin_Directory_Structure' does not (status code 200) -* At _site/es/lecciones/construir-repositorio-de-fuentes.html:1880: +* At _site/es/lecciones/construir-repositorio-de-fuentes.html:1881: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) -* At _site/es/lecciones/construir-repositorio-de-fuentes.html:1890: +* At _site/es/lecciones/construir-repositorio-de-fuentes.html:1891: External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/construir-repositorio-de-fuentes.md failed (status code 429) @@ -20690,6 +12514,12 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/ph-submissions/issues/197 failed (status code 429) +* At _site/es/lecciones/corpus-paralelo-lfaligner.html:501: + + External link https://utils.mucattu.com/iso_639-1.html failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + * At _site/es/lecciones/corpus-paralelo-lfaligner.html:1657: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -20716,15 +12546,13 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html:562: - External link http://martenduering.com/research/covert-networks-during-the-holocaust/ failed: Moved Permanently (status code 301) - -* At _site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html:993: - - External link http://www.cambridge.org/us/academic/subjects/sociology/research-methods-sociology-and-criminology/exploratory-social-network-analysis-pajek-2nd-edition failed (status code 403) + External link https://martenduering.com/research/covert-networks-during-the-holocaust/ failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html:993: - External link http://pajek.imfm.si/doku.php failed: got a time out (response code 0) (status code 0) + External link https://pajek.imfm.si/doku.php failed: got a time out (response code 0) (status code 0) * At _site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html:1533: @@ -20776,7 +12604,7 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/es/lecciones/datos-de-investigacion-con-unix.html:560: - External link https://www.worldcat.org/title/unix-and-linux/oclc/308171076&referer=brief_results failed (status code 403) + External link https://www.worldcat.org/title/unix-y-linux-gua-prctica/oclc/970524006&referer=brief_results failed (status code 403) * At _site/es/lecciones/datos-de-investigacion-con-unix.html:1621: @@ -20846,6 +12674,10 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/ph-submissions/issues/134 failed (status code 429) +* At _site/es/lecciones/editar-audio-con-audacity.html:555: + + External link https://web.archive.org/web/20161119231053/https://www.indiana.edu:80/~emusic/acoustics/amplitude.htm failed (status code 404) + * At _site/es/lecciones/editar-audio-con-audacity.html:1228: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -20864,31 +12696,25 @@ Sometimes, making too many requests at once also breaks things. (status code 0) It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) -* At _site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html:627: - - External link https://github.com/dhcolumbia/pandoc-workflow/blob/master/pandoctut.bib failed (status code 429) - * At _site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html:887: External link https://groups.google.com/forum/#!forum/pandoc-discuss failed: https://groups.google.com/forum/ exists, but the hash '!forum/pandoc-discuss' does not (status code 200) * At _site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html:887: - External link http://stackoverflow.com/questions/tagged/pandoc failed (status code 403) + External link https://stackoverflow.com/questions/tagged/pandoc failed (status code 403) * At _site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html:889: - External link http://mouapp.com/ failed with something very wrong. + External link https://www.draftin.com failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html:889: - External link http://www.authorea.com failed: Forbidden (status code 403) - -* At _site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html:889: - - External link http://www.draftin.com failed: Service Unavailable (status code 503) + External link https://mouapp.com/ failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html:919: @@ -20906,6 +12732,16 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/ph-submissions/issues/407 failed (status code 429) +* At _site/es/lecciones/exhibicion-con-collection-builder.html:584: + + External link https://dna.nust.na/heritage_week/ failed: got a time out (response code 0) (status code 0) + +* At _site/es/lecciones/exhibicion-con-collection-builder.html:592: + + External link https://www.gimp.org.es/descargar-gimp.html failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + * At _site/es/lecciones/exhibicion-con-collection-builder.html:611: External link https://docs.google.com/spreadsheets/d/1Uv9ytll0hysMOH1j-VL1lZx6PWvc1zf3L35sK_4IuzI/edit#gid=0 failed: https://docs.google.com/spreadsheets/d/1Uv9ytll0hysMOH1j-VL1lZx6PWvc1zf3L35sK_4IuzI/edit exists, but the hash 'gid=0' does not (status code 200) @@ -20922,21 +12758,11 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/ph-submissions/issues/509 failed (status code 429) -* At _site/es/lecciones/generadores-aventura.html:488: - - External link https://github.com/srsergiorodriguez/aventura/blob/master/README_es.md failed (status code 429) - -* At _site/es/lecciones/generadores-aventura.html:586: - - External link https://github.com/srsergiorodriguez/aventura/blob/master/README_es.md failed (status code 429) - -* At _site/es/lecciones/generadores-aventura.html:608: - - External link https://github.com/srsergiorodriguez/aventura/blob/master/README.md failed (status code 429) - -* At _site/es/lecciones/generadores-aventura.html:608: +* At _site/es/lecciones/generadores-aventura.html:534: - External link https://github.com/srsergiorodriguez/aventura/blob/master/README_es.md failed (status code 429) + External link https://www.spoonbill.org/n+7/ failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/es/lecciones/generadores-aventura.html:1572: @@ -20972,9 +12798,15 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/es/lecciones/georreferenciar-qgis.html:617: - External link http://www.gov.pe.ca/gis/license_agreement.php3?name=lot_town&file_format=SHP failed with something very wrong. -It's possible libcurl couldn't connect to the server, or perhaps the request timed out. -Sometimes, making too many requests at once also breaks things. (status code 0) + External link https://www.gov.pe.ca/gis/license_agreement.php3?name=lot_town&file_format=SHP failed: Found (status code 302) + +* At _site/es/lecciones/georreferenciar-qgis.html:732: + + External link https://web.archive.org/web/20180922004858/https://www.islandimagined.ca:80/fedora/repository/imagined:208687 failed (status code 404) + +* At _site/es/lecciones/georreferenciar-qgis.html:909: + + External link https://web.archive.org/web/20180922004858/https://www.islandimagined.ca:80/fedora/repository/imagined:208687 failed (status code 404) * At _site/es/lecciones/georreferenciar-qgis.html:2474: @@ -21034,7 +12866,7 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/es/lecciones/instalar-modulos-python-pip.html:587: - External link http://stackoverflow.com/questions/4750806/how-to-install-pip-on-windows failed (status code 403) + External link https://stackoverflow.com/questions/4750806/how-to-install-pip-on-windows failed (status code 403) * At _site/es/lecciones/instalar-modulos-python-pip.html:1141: @@ -21064,6 +12896,12 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/ph-submissions/issues/62 failed (status code 429) +* At _site/es/lecciones/introduccion-a-bash.html:779: + + External link https://www.viemu.com/a-why-vi-vim.html failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + * At _site/es/lecciones/introduccion-a-bash.html:2054: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -21084,10 +12922,6 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://training.ashleyblewer.com/presentations/ffmpeg.html#10 failed: https://training.ashleyblewer.com/presentations/ffmpeg.html exists, but the hash '10' does not (status code 200) -* At _site/es/lecciones/introduccion-a-ffmpeg.html:1171: - - External link https://github.com/privatezero/NDSR/blob/master/Demystifying_FFmpeg_Slides.pdf failed (status code 429) - * At _site/es/lecciones/introduccion-a-ffmpeg.html:1731: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -21124,10 +12958,6 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/introduccion-a-markdown.md failed (status code 429) -* At _site/es/lecciones/introduccion-a-powershell.html:337: - - External link https://github.com/programminghistorian/ph-submissions/issues/146 failed (status code 429) - * At _site/es/lecciones/introduccion-a-powershell.html:1769: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -21176,13 +13006,39 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/ph-submissions/issues/142 failed (status code 429) +* At _site/es/lecciones/introduccion-datos-abiertos-enlazados.html:556: + + External link https://linkeddatacatalog.dws.informatik.uni-mannheim.de/state/ failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + +* At _site/es/lecciones/introduccion-datos-abiertos-enlazados.html:701: + + External link https://www.history.ac.uk/projects/digital/tobias failed (status code 403) + * At _site/es/lecciones/introduccion-datos-abiertos-enlazados.html:733: - External link http://semanticweb.org/wiki/Main_Page.html failed: got a time out (response code 0) (status code 0) + External link https://semanticweb.org/wiki/Main_Page.html failed: got a time out (response code 0) (status code 0) + +* At _site/es/lecciones/introduccion-datos-abiertos-enlazados.html:887: + + External link https://www.history.ac.uk/projects/digital/tobias failed (status code 403) * At _site/es/lecciones/introduccion-datos-abiertos-enlazados.html:1077: - External link http://linkeddata.org/guides-and-tutorials failed: Internal Server Error (status code 500) + External link https://linkeddata.org/guides-and-tutorials failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + +* At _site/es/lecciones/introduccion-datos-abiertos-enlazados.html:1083: + + External link https://linkeddatacatalog.dws.informatik.uni-mannheim.de/state/ failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + +* At _site/es/lecciones/introduccion-datos-abiertos-enlazados.html:1098: + + External link https://www.history.ac.uk/projects/digital/tobias failed (status code 403) * At _site/es/lecciones/introduccion-datos-abiertos-enlazados.html:1635: @@ -21240,6 +13096,12 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://powerhouse.com.au/ failed (status code 429) +* At _site/es/lecciones/limpieza-de-datos-con-OpenRefine.html:591: + + External link https://vis.stanford.edu/papers/wrangler/ failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + * At _site/es/lecciones/limpieza-de-datos-con-OpenRefine.html:597: External link https://powerhouse.com.au/ failed (status code 429) @@ -21252,6 +13114,10 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/limpieza-de-datos-con-OpenRefine.md failed (status code 429) +* At _site/es/lecciones/manipular-cadenas-de-caracteres-en-python.html:339: + + External link https://github.com/programminghistorian/ph-submissions/issues/43 failed (status code 429) + * At _site/es/lecciones/manipular-cadenas-de-caracteres-en-python.html:1722: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -21266,28 +13132,24 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/es/lecciones/mineria-de-datos-en-internet-archive.html:570: - External link http://archive.org/stream/lettertowilliaml00doug/39999066767938#page/n0/mode/2up failed: http://archive.org/stream/lettertowilliaml00doug/39999066767938 exists, but the hash 'page/n0/mode/2up' does not (status code 200) + External link https://archive.org/stream/lettertowilliaml00doug/39999066767938#page/n0/mode/2up failed: https://archive.org/stream/lettertowilliaml00doug/39999066767938 exists, but the hash 'page/n0/mode/2up' does not (status code 200) * At _site/es/lecciones/mineria-de-datos-en-internet-archive.html:595: - External link http://internetarchive.readthedocs.io/en/latest/quickstart.html#searching failed (status code 404) + External link https://internetarchive.readthedocs.io/en/latest/quickstart.html#searching failed (status code 404) * At _site/es/lecciones/mineria-de-datos-en-internet-archive.html:613: - External link http://internetarchive.readthedocs.io/en/latest/quickstart.html#downloading failed (status code 404) + External link https://internetarchive.readthedocs.io/en/latest/quickstart.html#downloading failed (status code 404) * At _site/es/lecciones/mineria-de-datos-en-internet-archive.html:698: - External link http://internetarchive.readthedocs.io/en/latest/quickstart.html#downloading failed (status code 404) + External link https://internetarchive.readthedocs.io/en/latest/quickstart.html#downloading failed (status code 404) * At _site/es/lecciones/mineria-de-datos-en-internet-archive.html:705: External link https://archive.org/about/faqs.php#140 failed: https://archive.org/about/faqs.php exists, but the hash '140' does not (status code 200) -* At _site/es/lecciones/mineria-de-datos-en-internet-archive.html:798: - - External link https://github.com/edsu/pymarc/blob/master/pymarc/marcxml.py failed (status code 429) - * At _site/es/lecciones/mineria-de-datos-en-internet-archive.html:1410: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -21308,10 +13170,6 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/normalizar-datos.md failed (status code 429) -* At _site/es/lecciones/palabras-clave-en-contexto-n-grams.html:339: - - External link https://github.com/programminghistorian/ph-submissions/issues/50 failed (status code 429) - * At _site/es/lecciones/palabras-clave-en-contexto-n-grams.html:1661: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -21406,25 +13264,25 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html:545: - External link http://labs.europeana.eu/api/linked-open-data-introduction failed with something very wrong. + External link https://labs.europeana.eu/api/linked-open-data-introduction failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html:545: - External link http://collection.britishmuseum.org/ failed: got a time out (response code 0) (status code 0) + External link https://collection.britishmuseum.org/ failed: got a time out (response code 0) (status code 0) * At _site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html:754: - External link http://collection.britishmuseum.org/sparql failed: got a time out (response code 0) (status code 0) + External link https://collection.britishmuseum.org/sparql failed: got a time out (response code 0) (status code 0) * At _site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html:772: - External link http://collection.britishmuseum.org/resource?uri=http://collection.britishmuseum.org/id/object/PPA82633 failed: got a time out (response code 0) (status code 0) + External link https://collection.britishmuseum.org/resource?uri=https://collection.britishmuseum.org/id/object/PPA82633 failed: got a time out (response code 0) (status code 0) * At _site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html:782: - External link http://collection.britishmuseum.org/sparql?query=SELECT+*%0D%0AWHERE+%7B%0D%0A++%3Chttp://collection.britishmuseum.org/id/object/PPA82633%3E+?p+?o+.%0D%0A++%7D&_implicit=false&_equivalent=false&_form=/sparql failed: got a time out (response code 0) (status code 0) + External link https://collection.britishmuseum.org/sparql?query=SELECT+*%0D%0AWHERE+%7B%0D%0A++%3Chttp://collection.britishmuseum.org/id/object/PPA82633%3E+?p+?o+.%0D%0A++%7D&_implicit=false&_equivalent=false&_form=/sparql failed: got a time out (response code 0) (status code 0) * At _site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html:859: @@ -21438,23 +13296,19 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://collection.britishmuseum.org/sparql#query=PREFIX+bmo:+%3Chttp://www.researchspace.org/ontology/%3E%0APREFIX+skos:+%3Chttp://www.w3.org/2004/02/skos/core%23%3E%0APREFIX+ecrm:+%3Chttp://www.cidoc-crm.org/cidoc-crm/%3E%0APREFIX+xsd:+%3Chttp://www.w3.org/2001/XMLSchema%23%3E%0A%0ASELECT+?type+(COUNT(?type)+as+?n)%0AWHERE+%7B%0A++%23+We+still+need+to+indicate+the+?object_type+variable,%0A++%23+however+we+will+not+require+it+to+match+%22print%22+this+time%0A%0A++?object+bmo:PX_object_type+?object_type+.%0A++?object_type+skos:prefLabel+?type+.%0A%0A++%23+Once+again,+we+will+also+filter+by+date%0A++?object+ecrm:P108i_was_produced_by+?production+.%0A++?production+ecrm:P9_consists_of+?date_node+.%0A++?date_node+ecrm:P4_has_time-span+?timespan+.%0A++?timespan+ecrm:P82a_begin_of_the_begin+?date+.%0A++FILTER(?date+%3E=+%221580-01-01%22%5E%5Exsd:date+&&%0A+++++++++?date+%3C=+%221600-01-01%22%5E%5Exsd:date)%0A%7D%0A%23+The+GROUP+BY+command+designates+the+variable+to+tally+by,%0A%23+and+the+ORDER+BY+DESC()+command+sorts+the+results+by%0A%23+descending+number.%0AGROUP+BY+?type%0AORDER+BY+DESC(?n) failed: got a time out (response code 0) (status code 0) -* At _site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html:1064: - - External link http://www.getty.edu/research/ failed: Permanent Redirect (status code 308) - * At _site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html:1099: External link https://collection.britishmuseum.org/sparql?query=%23+Return+object+links+and+creation+date%0D%0APREFIX+bmo:+%3Chttp://collection.britishmuseum.org/id/ontology/%3E%0D%0APREFIX+skos:+%3Chttp://www.w3.org/2004/02/skos/core%23%3E%0D%0APREFIX+ecrm:+%3Chttp://erlangen-crm.org/current/%3E%0D%0APREFIX+xsd:+%3Chttp://www.w3.org/2001/XMLSchema%23%3E%0D%0ASELECT+DISTINCT+?object+?date+?image%0D%0AWHERE+%7B%0D%0A%0D%0A++%23+We'll+use+our+previous+command+to+search+only+for+objects+of+type+%22print%22%0D%0A++?object+bmo:PX_object_type+?object_type+.%0D%0A++?object_type+skos:prefLabel+%22print%22+.%0D%0A%0D%0A++%23+We+need+to+link+though+several+nodes+to+find+the+creation+date+associated%0D%0A++%23+with+an+object%0D%0A++?object+ecrm:P108i_was_produced_by+?production+.%0D%0A++?production+ecrm:P9_consists_of+?date_node+.%0D%0A++?date_node+ecrm:P4_has_time-span+?timespan+.%0D%0A++?timespan+ecrm:P82a_begin_of_the_begin+?date+.%0D%0A%0D%0A++%23+Yes,+we+need+to+connect+quite+a+few+dots+to+get+to+the+date+node!+Now+that%0D%0A++%23+we+have+it,+we+can+filter+our+results.+Because+we+are+filtering+a+date,+we%0D%0A++%23+must+attach+the+xsd:date+tag+to+our+date+strings+so+that+SPARQL+knows+how+to%0D%0A++%23+parse+them.%0D%0A%0D%0A++FILTER(?date+%3E=+%221580-01-01%22%5E%5Exsd:date+%26%26+?date+%3C=+%221600-01-01%22%5E%5Exsd:date)%0D%0A++%0D%0A++?object+bmo:PX_has_main_representation+?image+.%0D%0A%7D%0D%0ALIMIT+100#query=%23+Return+object+links+and+creation+date%0APREFIX+bmo:+%3Chttp://www.researchspace.org/ontology/%3E%0APREFIX+skos:+%3Chttp://www.w3.org/2004/02/skos/core%23%3E%0APREFIX+xsd:+%3Chttp://www.w3.org/2001/XMLSchema%23%3E%0APREFIX+ecrm:+%3Chttp://www.cidoc-crm.org/cidoc-crm/%3E%0ASELECT+DISTINCT+?object+?date+?image%0AWHERE+%7B%0A++%0A++%23+We'll+use+our+previous+command+to+search+only+for+objects+of+type+%22print%22%0A++?object+bmo:PX_object_type+?object_type+.%0A++?object_type+skos:prefLabel+%22print%22+.%0A%0A++%23+We+need+to+link+though+several+nodes+to+find+the+creation+date+associated%0A++%23+with+an+object%0A++?object+ecrm:P108i_was_produced_by+?production+.%0A++?production+ecrm:P9_consists_of+?date_node+.%0A++?date_node+ecrm:P4_has_time-span+?timespan+.%0A++?timespan+ecrm:P82a_begin_of_the_begin+?date+.%0A%0A++%0A++%23+Yes,+we+need+to+connect+quite+a+few+dots+to+get+to+the+date+node!+Now+that%0A++%23+we+have+it,+we+can+filter+our+results.+Because+we+are+filtering+a+date,+we%0A++%23+must+attach+the+xsd:date+tag+to+our+date+strings+so+that+SPARQL+knows+how+to%0A++%23+parse+them.%0A%0A++FILTER(?date+%3E=+%221580-01-01%22%5E%5Exsd:date+&&+?date+%3C=+%221600-01-01%22%5E%5Exsd:date)%0A++%0A++?object+bmo:PX_has_main_representation+?image+.%0A%7D%0ALIMIT+100 failed: got a time out (response code 0) (status code 0) * At _site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html:1133: - External link http://labs.europeana.eu/api/linked-open-data-SPARQL-endpoint failed with something very wrong. + External link https://labs.europeana.eu/api/linked-open-data-SPARQL-endpoint failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html:1134: - External link http://vocab.getty.edu/queries#Finding_Subjects failed: http://vocab.getty.edu/queries exists, but the hash 'Finding_Subjects' does not (status code 200) + External link https://vocab.getty.edu/queries#Finding_Subjects failed: https://vocab.getty.edu/queries exists, but the hash 'Finding_Subjects' does not (status code 200) * At _site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html:1672: @@ -21488,7 +13342,7 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/es/lecciones/reutilizando-colecciones-digitales-glam-labs.html:497: - External link http://labs.bl.uk failed with something very wrong. + External link https://labs.bl.uk failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) @@ -21558,9 +13412,15 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/ph-submissions/issues/303 failed (status code 429) +* At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:1690: + + External link https://jekyllthemes.org/ failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + * At _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html:1755: - External link http://jekyll-windows.juthilo.com/ failed with something very wrong. + External link https://jekyll-windows.juthilo.com/ failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) @@ -21576,13 +13436,13 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/ph-submissions/issues/191#issuecomment-432826840 failed (status code 429) -* At _site/es/lecciones/topic-modeling-y-mallet.html:571: +* At _site/es/lecciones/topic-modeling-y-mallet.html:1009: - External link http://www.worldcat.org/title/reading-machines-toward-an-algorithmic-criticism/oclc/708761605&referer=brief_results failed: Forbidden (status code 403) + External link https://web.archive.org/web/20160704150726/https://www.lisarhody.com:80/some-assembly-required/ failed (status code 404) * At _site/es/lecciones/topic-modeling-y-mallet.html:1011: - External link http://dl.acm.org/citation.cfm?id=944937 failed (status code 403) + External link https://dl.acm.org/citation.cfm?id=944937 failed (status code 403) * At _site/es/lecciones/topic-modeling-y-mallet.html:2469: @@ -21630,9 +13490,11 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://stackoverflow.com/questions/16811332/cannot-run-java-from-the-windows-powershell-command-prompt failed (status code 403) -* At _site/es/lecciones/transformacion-datos-xml-xsl.html:1338: +* At _site/es/lecciones/transformacion-datos-xml-xsl.html:777: - External link https://www.computerhope.com/issues/ch000549.htm failed (status code 403) + External link https://scissors-and-paste.net failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/es/lecciones/transformacion-datos-xml-xsl.html:1338: @@ -21686,10 +13548,6 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/visualizacion-y-animacion-de-tablas-historicas-con-R.md failed (status code 429) -* At _site/es/pia.html:271: - - External link https://www.oecd.org/en/topics/sub-issues/oda-eligibility-and-conditions/dac-list-of-oda-recipients.html failed (status code 403) - * At _site/es/pia.html:422: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -21738,6 +13596,24 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://fr.wikipedia.org/wiki/Libre_acc%C3%A8s_(%C3%A9dition_scientifique)#La_voie_diamant failed: https://fr.wikipedia.org/wiki/Libre_acc%C3%A8s_(%C3%A9dition_scientifique) exists, but the hash 'La_voie_diamant' does not (status code 200) +* At _site/fr/apropos.html:266: + + External link https://dhawards.org/dhawards2017/results/ failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + +* At _site/fr/apropos.html:266: + + External link https://dhawards.org/dhawards2016/results/ failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + +* At _site/fr/apropos.html:266: + + External link https://dhawards.org/dhawards2022/results/ failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + * At _site/fr/apropos.html:266: External link https://openpublishingawards.org/results/2021/index.html failed with something very wrong. @@ -21804,6 +13680,10 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/fr/consignes-traducteurs.md failed (status code 429) +* At _site/fr/contribuer.html:298: + + External link https://www.worldcat.org/title/programming-historian-en-espanol/oclc/1061292935&referer=brief_results failed (status code 403) + * At _site/fr/contribuer.html:353: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -21826,11 +13706,11 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/fr/equipe-projet.html:310: - External link http://twitter.com/maxcarlons failed (status code 400) + External link https://twitter.com/maxcarlons failed (status code 400) * At _site/fr/equipe-projet.html:512: - External link http://twitter.com/cosovschi failed (status code 400) + External link https://twitter.com/cosovschi failed (status code 400) * At _site/fr/equipe-projet.html:1272: @@ -21840,99 +13720,101 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/fr/equipe-projet.html:1278: - External link http://twitter.com/nabsiddiqui failed (status code 400) + External link https://twitter.com/nabsiddiqui failed (status code 400) * At _site/fr/equipe-projet.html:1631: - External link http://twitter.com/giulia_taurino failed (status code 400) + External link https://twitter.com/giulia_taurino failed (status code 400) * At _site/fr/equipe-projet.html:1810: - External link http://twitter.com/alexwermercolan failed (status code 400) + External link https://twitter.com/alexwermercolan failed (status code 400) * At _site/fr/equipe-projet.html:2059: - External link http://www.mariajoseafanador.com failed: Moved Permanently (status code 301) + External link https://www.mariajoseafanador.com failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/fr/equipe-projet.html:2065: - External link http://twitter.com/mariajoafana failed (status code 400) + External link https://twitter.com/mariajoafana failed (status code 400) * At _site/fr/equipe-projet.html:2534: - External link http://twitter.com/IsaGribomont failed (status code 400) + External link https://twitter.com/IsaGribomont failed (status code 400) * At _site/fr/equipe-projet.html:2745: - External link http://twitter.com/espejolento failed (status code 400) + External link https://twitter.com/espejolento failed (status code 400) * At _site/fr/equipe-projet.html:3036: - External link http://twitter.com/jenniferisve failed (status code 400) + External link https://twitter.com/jenniferisve failed (status code 400) * At _site/fr/equipe-projet.html:3361: - External link http://twitter.com/enetreseles failed (status code 400) + External link https://twitter.com/enetreseles failed (status code 400) * At _site/fr/equipe-projet.html:3568: - External link http://twitter.com/jgob failed (status code 400) + External link https://twitter.com/jgob failed (status code 400) * At _site/fr/equipe-projet.html:3863: - External link http://twitter.com/rivaquiroga failed (status code 400) + External link https://twitter.com/rivaquiroga failed (status code 400) * At _site/fr/equipe-projet.html:4804: - External link http://twitter.com/superHH failed (status code 400) + External link https://twitter.com/superHH failed (status code 400) * At _site/fr/equipe-projet.html:5190: - External link http://twitter.com/emilienschultz failed (status code 400) + External link https://twitter.com/emilienschultz failed (status code 400) * At _site/fr/equipe-projet.html:5317: - External link http://twitter.com/davvalent failed (status code 400) + External link https://twitter.com/davvalent failed (status code 400) * At _site/fr/equipe-projet.html:5842: - External link http://twitter.com/danielalvesfcsh failed (status code 400) + External link https://twitter.com/danielalvesfcsh failed (status code 400) * At _site/fr/equipe-projet.html:6107: - External link http://twitter.com/ericbrasiln failed (status code 400) + External link https://twitter.com/ericbrasiln failed (status code 400) * At _site/fr/equipe-projet.html:6543: - External link http://twitter.com/jimmy_medeiros failed (status code 400) + External link https://twitter.com/jimmy_medeiros failed (status code 400) * At _site/fr/equipe-projet.html:7027: - External link http://twitter.com/araceletorres failed (status code 400) + External link https://twitter.com/araceletorres failed (status code 400) * At _site/fr/equipe-projet.html:7286: - External link http://twitter.com/j_w_baker failed (status code 400) + External link https://twitter.com/j_w_baker failed (status code 400) * At _site/fr/equipe-projet.html:7727: - External link http://twitter.com/Adam_Crymble failed (status code 400) + External link https://twitter.com/Adam_Crymble failed (status code 400) * At _site/fr/equipe-projet.html:8258: - External link http://twitter.com/jenniferisve failed (status code 400) + External link https://twitter.com/jenniferisve failed (status code 400) * At _site/fr/equipe-projet.html:8589: - External link http://twitter.com/rivaquiroga failed (status code 400) + External link https://twitter.com/rivaquiroga failed (status code 400) * At _site/fr/equipe-projet.html:8878: - External link http://twitter.com/amsichani failed (status code 400) + External link https://twitter.com/amsichani failed (status code 400) * At _site/fr/equipe-projet.html:9221: - External link http://twitter.com/AnisaHawes failed (status code 400) + External link https://twitter.com/AnisaHawes failed (status code 400) * At _site/fr/equipe-projet.html:10041: @@ -21974,13 +13856,47 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://academic.oup.com/dsh/article-abstract/8/4/243/928942 failed (status code 403) +* At _site/fr/lecons/analyse-corpus-antconc.html:1092: + + External link https://explorationdecorpus.corpusecrits.huma-num.fr/antconc/ failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + +* At _site/fr/lecons/analyse-corpus-antconc.html:1093: + + External link https://cid.ens-lyon.fr/ac_article.asp?fic=antconc.asp failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + +* At _site/fr/lecons/analyse-corpus-antconc.html:1095: + + External link https://ancilla.unice.fr/ failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + +* At _site/fr/lecons/analyse-corpus-antconc.html:1095: + + External link https://textometrie.ens-lyon.fr/?lang=fr failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + * At _site/fr/lecons/analyse-corpus-antconc.html:1095: - External link http://www.lexi-co.com/ failed: got a time out (response code 301) (status code 301) + External link https://iramuteq.org/ failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/fr/lecons/analyse-corpus-antconc.html:1095: - External link http://ancilla.unice.fr/ failed: got a time out (response code 0) (status code 0) + External link https://www.lexi-co.com/ failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + +* At _site/fr/lecons/analyse-corpus-antconc.html:1099: + + External link https://lexicometrica.univ-paris3.fr/livre/st94/st94-tdm.html failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/fr/lecons/analyse-corpus-antconc.html:1639: @@ -22034,6 +13950,10 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/ph-submissions/issues/329 failed (status code 429) +* At _site/fr/lecons/calibration-radiocarbone-avec-r.html:709: + + External link https://calib.org failed: got a time out (response code 0) (status code 0) + * At _site/fr/lecons/calibration-radiocarbone-avec-r.html:715: External link https://fr.wikipedia.org/wiki/Suaire_de_Turin#La_datation_par_le_carbone_14_(1988-1989) failed: https://fr.wikipedia.org/wiki/Suaire_de_Turin#La_datation_par_le_carbone_14_(1988-1989) exists, but the hash 'La_datation_par_le_carbone_14_(1988-1989)' does not (status code 200) @@ -22052,7 +13972,13 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/fr/lecons/comprendre-les-expressions-regulieres.html:668: - External link http://archive.org/stream/jstor-4560629/4560629#page/n0/mode/2up failed: http://archive.org/stream/jstor-4560629/4560629 exists, but the hash 'page/n0/mode/2up' does not (status code 200) + External link https://archive.org/stream/jstor-4560629/4560629#page/n0/mode/2up failed: https://archive.org/stream/jstor-4560629/4560629 exists, but the hash 'page/n0/mode/2up' does not (status code 200) + +* At _site/fr/lecons/comprendre-les-expressions-regulieres.html:1466: + + External link https://dh.obdurodon.org/regex.html failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/fr/lecons/comprendre-les-expressions-regulieres.html:2005: @@ -22134,29 +14060,25 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://www.java.com/fr/download/ failed (status code 403) -* At _site/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.html:1269: - - External link https://github.com/impresso/PH-Passim-tutorial/blob/master/eebo/code/main.py failed (status code 429) - -* At _site/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.html:1312: - - External link https://github.com/impresso/impresso-pycommons/blob/master/impresso_commons/text/rebuilder.py failed (status code 429) - -* At _site/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.html:1454: +* At _site/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.html:1530: - External link https://github.com/impresso/PH-passim-tutorial/blob/master/explore-passim-output.ipynb failed (status code 429) + External link https://dx.doi.org/10.1093/alh/ajv029 failed (status code 403) * At _site/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.html:1531: - External link http://dx.doi.org/10.1093/alh/ajv028 failed (status code 403) + External link https://dx.doi.org/10.1093/alh/ajv028 failed (status code 403) * At _site/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.html:1532: - External link http://dx.doi.org/10.1080/1461670x.2020.1761865 failed (status code 403) + External link https://dx.doi.org/10.1080/1461670x.2020.1761865 failed (status code 403) * At _site/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.html:1538: - External link http://dx.doi.org/10.1145/2682571.2797068 failed (status code 403) + External link https://dx.doi.org/10.1145/2682571.2797068 failed (status code 403) + +* At _site/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.html:1539: + + External link https://doi.org/10.18653/v1/D17-1290 failed (status code 409) * At _site/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.html:2515: @@ -22202,10 +14124,6 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/fr/lecons/generer-jeu-donnees-texte-ocr.md failed (status code 429) -* At _site/fr/lecons/gestion-manipulation-donnees-r.html:337: - - External link https://github.com/programminghistorian/ph-submissions/issues/625 failed (status code 429) - * At _site/fr/lecons/gestion-manipulation-donnees-r.html:1593: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -22276,7 +14194,7 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/fr/lecons/intro-aux-bots-twitter.html:564: - External link http://www.sciencedirect.com/science/article/pii/S0747563213003129 failed: Forbidden (status code 403) + External link https://www.sciencedirect.com/science/article/pii/S0747563213003129 failed (status code 403) * At _site/fr/lecons/intro-aux-bots-twitter.html:587: @@ -22284,15 +14202,15 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/fr/lecons/intro-aux-bots-twitter.html:655: - External link http://twitter.com/tinyarchae failed (status code 400) + External link https://twitter.com/archaeoglitch failed (status code 400) * At _site/fr/lecons/intro-aux-bots-twitter.html:655: - External link https://twitter.com/archaeoglitch failed (status code 400) + External link https://twitter.com/botarchaeo failed (status code 400) * At _site/fr/lecons/intro-aux-bots-twitter.html:655: - External link https://twitter.com/botarchaeo failed (status code 400) + External link https://twitter.com/tinyarchae failed (status code 400) * At _site/fr/lecons/intro-aux-bots-twitter.html:665: @@ -22300,7 +14218,7 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/fr/lecons/intro-aux-bots-twitter.html:907: - External link http://unicode.org/emoji/charts/full-emoji-list.html failed with something very wrong. + External link https://unicode.org/emoji/charts/full-emoji-list.html failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) @@ -22356,6 +14274,10 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/fr/lecons/introduction-a-heurist.md failed (status code 429) +* At _site/fr/lecons/introduction-a-la-stylometrie-avec-python.html:337: + + External link https://github.com/programminghistorian/ph-submissions/issues/231 failed (status code 429) + * At _site/fr/lecons/introduction-a-la-stylometrie-avec-python.html:1910: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -22404,14 +14326,16 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/fr/lecons/manipuler-chaines-caracteres-python.md failed (status code 429) -* At _site/fr/lecons/nettoyer-ses-donnees-avec-openrefine.html:341: - - External link https://github.com/programminghistorian/ph-submissions/issues/223 failed (status code 429) - * At _site/fr/lecons/nettoyer-ses-donnees-avec-openrefine.html:577: External link https://powerhouse.com.au/ failed (status code 429) +* At _site/fr/lecons/nettoyer-ses-donnees-avec-openrefine.html:587: + + External link https://vis.stanford.edu/papers/wrangler/ failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + * At _site/fr/lecons/nettoyer-ses-donnees-avec-openrefine.html:592: External link https://api.maas.museum/docs failed with something very wrong. @@ -22442,10 +14366,6 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/ph-submissions/issues/616 failed (status code 429) -* At _site/fr/lecons/preserver-logiciels-recherche.html:789: - - External link https://github.com/torvalds/linux/blob/master/fs/ext4/resize.c failed (status code 429) - * At _site/fr/lecons/preserver-logiciels-recherche.html:967: External link https://gitlab.com/users/sign_in failed (status code 403) @@ -22486,25 +14406,19 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/ph-submissions/issues/307 failed (status code 429) -* At _site/fr/lecons/redaction-durable-avec-pandoc-et-markdown.html:628: - - External link https://github.com/dhcolumbia/pandoc-workflow/blob/master/pandoctut.bib failed (status code 429) - * At _site/fr/lecons/redaction-durable-avec-pandoc-et-markdown.html:894: - External link http://stackoverflow.com/questions/tagged/pandoc failed (status code 403) - -* At _site/fr/lecons/redaction-durable-avec-pandoc-et-markdown.html:896: - - External link http://www.draftin.com failed: Service Unavailable (status code 503) + External link https://stackoverflow.com/questions/tagged/pandoc failed (status code 403) * At _site/fr/lecons/redaction-durable-avec-pandoc-et-markdown.html:896: - External link http://www.authorea.com failed: Forbidden (status code 403) + External link https://mouapp.com/ failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/fr/lecons/redaction-durable-avec-pandoc-et-markdown.html:896: - External link http://mouapp.com/ failed with something very wrong. + External link https://www.draftin.com failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) @@ -22572,17 +14486,21 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/fr/lecons/travailler-avec-des-fichiers-texte.md failed (status code 429) +* At _site/fr/nos-soutiens.html:282: + + External link https://www.sas.ac.uk/ failed (status code 403) + * At _site/fr/nos-soutiens.html:294: External link https://www.tilburguniversity.edu/ failed (status code 403) -* At _site/fr/nos-soutiens.html:337: +* At _site/fr/nos-soutiens.html:306: - External link https://www.sussex.ac.uk/collaborate/business/public-funds#:~:text=Impact%20accelerator%20funds,-From%20law%20to&text=The%20ESRC%20and%20AHRC%20Impact,businesses%20through%20to%20large%20companies failed: https://www.sussex.ac.uk/collaborate/business/public-funds exists, but the hash ':~:text=Impact%20accelerator%20funds,-From%20law%20to&text=The%20ESRC%20and%20AHRC%20Impact,businesses%20through%20to%20large%20companies' does not (status code 200) + External link https://www.history.ac.uk/library-digital failed (status code 403) -* At _site/fr/nos-soutiens.html:338: +* At _site/fr/nos-soutiens.html:337: - External link https://www.thebritishacademy.ac.uk/projects/writing-workshops-2018-digital-humanities/ failed (status code 403) + External link https://www.sussex.ac.uk/collaborate/business/public-funds#:~:text=Impact%20accelerator%20funds,-From%20law%20to&text=The%20ESRC%20and%20AHRC%20Impact,businesses%20through%20to%20large%20companies failed: https://www.sussex.ac.uk/collaborate/business/public-funds exists, but the hash ':~:text=Impact%20accelerator%20funds,-From%20law%20to&text=The%20ESRC%20and%20AHRC%20Impact,businesses%20through%20to%20large%20companies' does not (status code 200) * At _site/fr/nos-soutiens.html:438: @@ -22592,10 +14510,6 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/fr/nos-soutiens.md failed (status code 429) -* At _site/fr/pi.html:271: - - External link https://www.oecd.org/fr/topics/sub-issues/oda-eligibility-and-conditions/dac-list-of-oda-recipients.html failed (status code 403) - * At _site/fr/pi.html:417: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -22644,17 +14558,9 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://academic.oup.com/jah/article-abstract/103/1/299/1751315 failed (status code 403) -* At _site/fr/recherche.html:267: - - External link http://jitp.commons.gc.cuny.edu/review-of-the-programming-historian/ failed: got a time out (response code 301) (status code 301) - -* At _site/fr/recherche.html:282: - - External link https://novaresearch.unl.pt/files/32228034/Ensinar_Humanidades_Digitais.pdf failed (status code 403) - * At _site/fr/recherche.html:285: - External link http://www.tandfonline.com/doi/full/10.1080/13555502.2017.1301179 failed (status code 403) + External link https://www.tandfonline.com/doi/full/10.1080/13555502.2017.1301179 failed (status code 403) * At _site/fr/recherche.html:328: @@ -22688,6 +14594,12 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://msuglobaldh.org/abstracts/#programming-historian failed: https://msuglobaldh.org/abstracts/ exists, but the hash 'programming-historian' does not (status code 200) +* At _site/fr/recherche.html:396: + + External link https://ixa2.si.ehu.eus/intele/?q=webinars failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + * At _site/fr/recherche.html:482: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -22716,6 +14628,12 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2022-12-16-2022-in-review.md failed (status code 429) +* At _site/posts/DH-Award-2017.html:281: + + External link https://dhawards.org/dhawards2017/results/ failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + * At _site/posts/DH-Award-2017.html:783: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -22726,7 +14644,7 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/posts/FR-team.html:285: - External link http://www.iperionch.eu/ failed with something very wrong. + External link https://www.iperionch.eu/ failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) @@ -22754,10 +14672,6 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2022-04-12-PH-TNA-JISC-event-2-annoucement.md failed (status code 429) -* At _site/posts/PH-commitment-to-diversity.html:283: - - External link http://www.aauw.org/research/why-so-few/ failed: Not Found (status code 404) - * At _site/posts/PH-commitment-to-diversity.html:283: External link https://www.surveymonkey.co.uk/r/SFSRHHD failed: Not Found (status code 404) @@ -22768,7 +14682,7 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/posts/PH-commitment-to-diversity.html:377: - External link http://femtechnet.org/ failed with something very wrong. + External link https://femtechnet.org/ failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) @@ -22802,7 +14716,7 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/posts/PH-espanol-in-DH2018.html:297: - External link https://github.com/programminghistorian/jekyll/issues?q=french+label:french failed (status code 429) + External link https://github.com/programminghistorian/jekyll/issues?q=french+label:french failed (status code 404) * At _site/posts/PH-espanol-in-DH2018.html:1232: @@ -22814,19 +14728,21 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/posts/Uses-Of-The-Programming-Historian.html:293: - External link http://lj.libraryjournal.com/2014/09/opinion/not-dead-yet/connecting-researchers-to-new-digital-tools-not-dead-yet/#_ failed: Bad Gateway (status code 502) + External link https://lj.libraryjournal.com/2014/09/opinion/not-dead-yet/connecting-researchers-to-new-digital-tools-not-dead-yet/#_ failed (status code 502) * At _site/posts/Uses-Of-The-Programming-Historian.html:295: - External link http://journalofdigitalhumanities.org/2-1/dh-contribution-to-topic-modeling/ failed: Moved Permanently (status code 301) + External link https://journalofdigitalhumanities.org/2-1/dh-contribution-to-topic-modeling/ failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/posts/Uses-Of-The-Programming-Historian.html:342: - External link http://digitalhumanities.rice.edu/fall-2013-syllabus/ failed (status code 404) + External link https://digitalhumanities.rice.edu/fall-2013-syllabus/ failed (status code 404) * At _site/posts/Uses-Of-The-Programming-Historian.html:344: - External link http://dh.chadblack.net/info/syllabus/ failed with something very wrong. + External link https://dh.chadblack.net/info/syllabus/ failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) @@ -22868,11 +14784,11 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/posts/adding-to-library-catalogue.html:281: - External link http://www.twitter.com/Literature_Geek failed (status code 400) + External link https://twitter.com/proghist failed (status code 400) * At _site/posts/adding-to-library-catalogue.html:281: - External link https://twitter.com/proghist failed (status code 400) + External link https://www.twitter.com/Literature_Geek failed (status code 400) * At _site/posts/adding-to-library-catalogue.html:790: @@ -22900,11 +14816,11 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/posts/announcing-new-team-spanish-language-editors.html:287: - External link https://en.neogranadina.org failed: got a time out (response code 0) (status code 0) + External link https://twitter.com/mariajoafana failed (status code 400) * At _site/posts/announcing-new-team-spanish-language-editors.html:287: - External link https://twitter.com/mariajoafana failed (status code 400) + External link https://en.neogranadina.org failed: got a time out (response code 0) (status code 0) * At _site/posts/announcing-new-team-spanish-language-editors.html:289: @@ -22916,19 +14832,19 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/posts/announcing-new-team-spanish-language-editors.html:293: - External link https://twitter.com/proghist failed (status code 400) + External link https://twitter.com/mariajoafana failed (status code 400) * At _site/posts/announcing-new-team-spanish-language-editors.html:293: - External link https://twitter.com/mariajoafana failed (status code 400) + External link https://twitter.com/proghist failed (status code 400) * At _site/posts/announcing-new-team-spanish-language-editors.html:293: - External link https://twitter.com/victor_gayol failed (status code 400) + External link https://twitter.com/RojasCastroA failed (status code 400) * At _site/posts/announcing-new-team-spanish-language-editors.html:293: - External link https://twitter.com/RojasCastroA failed (status code 400) + External link https://twitter.com/victor_gayol failed (status code 400) * At _site/posts/announcing-new-team-spanish-language-editors.html:788: @@ -22964,11 +14880,23 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/posts/bienvenue-ph-fr.html:283: - External link https://github.com/programminghistorian/jekyll/issues/850 failed (status code 429) + External link https://github.com/programminghistorian/ph-submissions/issues?q=is:issue+is:open+label:French failed (status code 404) * At _site/posts/bienvenue-ph-fr.html:283: - External link https://github.com/programminghistorian/ph-submissions/issues?q=is:issue+is:open+label:French failed (status code 429) + External link https://github.com/programminghistorian/jekyll/issues/850 failed (status code 429) + +* At _site/posts/bienvenue-ph-fr.html:285: + + External link https://dsharp.library.cmu.edu/ failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + +* At _site/posts/bienvenue-ph-fr.html:289: + + External link https://archives.mundaneum.org/fr/versions-digitalisees/schema-de-paul-otlet-documentation-et-telecommunication failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/posts/bienvenue-ph-fr.html:783: @@ -23102,14 +15030,20 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2019-12-30-bulletin-de-information.md failed (status code 429) -* At _site/posts/bulletin-issue-01.html:377: +* At _site/posts/bulletin-issue-01.html:287: - External link https://www.linkedin.com/company/prog-hist/ failed (status code 999) + External link https://dhawards.org/dhawards2022/results/ failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/posts/bulletin-issue-01.html:377: External link https://twitter.com/ProgHist failed (status code 400) +* At _site/posts/bulletin-issue-01.html:377: + + External link https://www.linkedin.com/company/prog-hist/ failed (status code 999) + * At _site/posts/bulletin-issue-01.html:1310: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -23158,6 +15092,12 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://campus.dariah.eu/source/programming-historian/page/1 failed (status code 404) +* At _site/posts/bulletin-issue-04.html:325: + + External link https://dhawards.org/dhawards2022/results/ failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + * At _site/posts/bulletin-issue-04.html:351: External link https://twitter.com/ProgHist failed (status code 400) @@ -23206,10 +15146,6 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://academic.oup.com/jah/article-abstract/103/1/299/1751315 failed (status code 403) -* At _site/posts/call-for-editors-en.html:288: - - External link http://jitp.commons.gc.cuny.edu/review-of-the-programming-historian/ failed: got a time out (response code 301) (status code 301) - * At _site/posts/call-for-editors-en.html:817: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -23222,10 +15158,6 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://academic.oup.com/jah/article-abstract/103/1/299/1751315 failed (status code 403) -* At _site/posts/call-for-editors.html:288: - - External link http://jitp.commons.gc.cuny.edu/review-of-the-programming-historian/ failed: got a time out (response code 301) (status code 301) - * At _site/posts/call-for-editors.html:817: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -23338,7 +15270,7 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/posts/corpus-linguistics-in-action.html:430: - External link http://www.euppublishing.com/doi/full/10.3366/cor.2016.0102 failed (status code 403) + External link https://www.euppublishing.com/doi/full/10.3366/cor.2016.0102 failed (status code 403) * At _site/posts/corpus-linguistics-in-action.html:1798: @@ -23358,7 +15290,7 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/posts/december-newsletter.html:304: - External link http://ahlist.org/wp-content/uploads/2021/11/AHLIST-2021-PROGRAM_Virtual_FINAL.pdf failed: Not Found (status code 404) + External link https://ahlist.org/wp-content/uploads/2021/11/AHLIST-2021-PROGRAM_Virtual_FINAL.pdf failed (status code 404) * At _site/posts/december-newsletter.html:320: @@ -23374,6 +15306,12 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2021-12-22-december-newsletter.md failed (status code 429) +* At _site/posts/dh-award-2016.html:281: + + External link https://dhawards.org/dhawards2016/results/ failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + * At _site/posts/dh-award-2016.html:795: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -23410,10 +15348,6 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/issues/1682 failed (status code 429) -* At _site/posts/dois-for-ph.html:294: - - External link https://github.com/programminghistorian/jekyll/blob/4c5201ceb456deab677866886255bbd54500a9de/_layouts/crossref.xml failed (status code 429) - * At _site/posts/dois-for-ph.html:853: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -23454,10 +15388,6 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2024-11-15-en-call-for-proposals.md failed (status code 429) -* At _site/posts/es-buscamos-revisores.html:321: - - External link https://github.com/programminghistorian/jekyll/blob/gh-pages/CODE_OF_CONDUCT.md failed (status code 429) - * At _site/posts/es-buscamos-revisores.html:1270: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -23500,19 +15430,19 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/posts/history-of-protest.html:285: - External link http://politicalmeetingsmapper.co.uk failed with something very wrong. + External link https://labs.bl.uk/British+Library+Labs+Competition failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/posts/history-of-protest.html:285: - External link http://labs.bl.uk/British+Library+Labs+Competition failed with something very wrong. + External link https://politicalmeetingsmapper.co.uk failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/posts/history-of-protest.html:289: - External link http://www.tandfonline.com/doi/full/10.1080/13555502.2017.1301179 failed (status code 403) + External link https://www.tandfonline.com/doi/full/10.1080/13555502.2017.1301179 failed (status code 403) * At _site/posts/history-of-protest.html:783: @@ -23520,19 +15450,7 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/posts/history-of-protest.html:793: - External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2017-03-31-history-of-protest.md failed (status code 429) - -* At _site/posts/how-we-moved-to-github.html:305: - - External link https://github.com/programminghistorian/oldsite/blob/master/original_html/data-mining-the-internet-archive.html failed (status code 429) - -* At _site/posts/how-we-moved-to-github.html:338: - - External link https://github.com/programminghistorian/oldsite/blob/master/prep_for_pandoc.py failed (status code 429) - -* At _site/posts/how-we-moved-to-github.html:347: - - External link https://github.com/programminghistorian/oldsite/blob/master/modified_html/data-mining-the-internet-archive.html failed (status code 429) + External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2017-03-31-history-of-protest.md failed (status code 429) * At _site/posts/how-we-moved-to-github.html:544: @@ -23540,7 +15458,7 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/posts/how-we-moved-to-github.html:551: - External link https://github.com/programminghistorian/jekyll/blob/master/modified_html/data-mining-the-internet-archive.html#L50 failed (status code 429) + External link https://github.com/programminghistorian/jekyll/blob/master/modified_html/data-mining-the-internet-archive.html#L50 failed (status code 404) * At _site/posts/how-we-moved-to-github.html:564: @@ -23548,11 +15466,11 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/posts/how-we-moved-to-github.html:647: - External link https://github.com/programminghistorian/jekyll/blob/master/process_with_pandoc.sh failed (status code 429) + External link https://github.com/programminghistorian/jekyll/commits/master/lessons failed (status code 429) * At _site/posts/how-we-moved-to-github.html:647: - External link https://github.com/programminghistorian/jekyll/commits/master/lessons failed (status code 429) + External link https://github.com/programminghistorian/jekyll/blob/master/process_with_pandoc.sh failed (status code 429) * At _site/posts/how-we-moved-to-github.html:649: @@ -23580,11 +15498,7 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/posts/infrastructure-at-ph.html:304: - External link https://github.com/programminghistorian/jekyll/blob/gh-pages/_build/build.sh#L15-L40 failed (status code 429) - -* At _site/posts/infrastructure-at-ph.html:325: - - External link https://github.com/programminghistorian/jekyll/blob/gh-pages/_plugins/validate_yaml.rb failed (status code 429) + External link https://github.com/programminghistorian/jekyll/blob/gh-pages/_build/build.sh#L15-L40 failed: https://github.com/programminghistorian/jekyll/blob/gh-pages/_build/build.sh exists, but the hash 'L15-L40' does not (status code 200) * At _site/posts/infrastructure-at-ph.html:341: @@ -23766,10 +15680,22 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://twitter.com/UCLDH failed (status code 400) +* At _site/posts/newsletter-april-21.html:297: + + External link https://ixa2.si.ehu.eus/intele/?q=webinars failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + * At _site/posts/newsletter-april-21.html:328: External link https://programminghistorian.org/en/support-us#institutional-partner-programme failed: https://programminghistorian.org/en/support-us exists, but the hash 'institutional-partner-programme' does not (status code 200) +* At _site/posts/newsletter-april-21.html:367: + + External link https://dhawards.org/dhawards2020/results/ failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + * At _site/posts/newsletter-april-21.html:870: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -23792,11 +15718,11 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/posts/newsletter-march20.html:314: - External link https://twitter.com/KU_Leuven failed (status code 400) + External link https://programminghistorian.org/en/support-us#institutional-partner-programme failed: https://programminghistorian.org/en/support-us exists, but the hash 'institutional-partner-programme' does not (status code 200) * At _site/posts/newsletter-march20.html:314: - External link https://programminghistorian.org/en/support-us#institutional-partner-programme failed: https://programminghistorian.org/en/support-us exists, but the hash 'institutional-partner-programme' does not (status code 200) + External link https://twitter.com/KU_Leuven failed (status code 400) * At _site/posts/newsletter-march20.html:844: @@ -23822,6 +15748,10 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://programminghistorian.org/en/support-us#institutional-partner-programme failed: https://programminghistorian.org/en/support-us exists, but the hash 'institutional-partner-programme' does not (status code 200) +* At _site/posts/newsletter-year20.html:325: + + External link https://www.history.ac.uk/library failed (status code 403) + * At _site/posts/newsletter-year20.html:341: External link https://twitter.com/ProgHist failed (status code 400) @@ -23900,7 +15830,7 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/posts/presentando-al-nuevo-equipo-de-editores-de-contenidos-en-espanol.html:293: - External link https://twitter.com/proghist failed (status code 400) + External link https://twitter.com/victor_gayol failed (status code 400) * At _site/posts/presentando-al-nuevo-equipo-de-editores-de-contenidos-en-espanol.html:293: @@ -23908,11 +15838,11 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/posts/presentando-al-nuevo-equipo-de-editores-de-contenidos-en-espanol.html:293: - External link https://twitter.com/victor_gayol failed (status code 400) + External link https://twitter.com/mariajoafana failed (status code 400) * At _site/posts/presentando-al-nuevo-equipo-de-editores-de-contenidos-en-espanol.html:293: - External link https://twitter.com/mariajoafana failed (status code 400) + External link https://twitter.com/proghist failed (status code 400) * At _site/posts/presentando-al-nuevo-equipo-de-editores-de-contenidos-en-espanol.html:791: @@ -23958,17 +15888,13 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2015-06-02-programming-historian-live-london.md failed (status code 429) -* At _site/posts/promoting-digital-archives.html:279: - - External link https://dp.la/info/2016/02/01/color-our-collections/ failed (status code 404) - * At _site/posts/promoting-digital-archives.html:279: External link https://twitter.com/search?q=%23colorourcollections&src=typd failed (status code 400) -* At _site/posts/promoting-digital-archives.html:302: +* At _site/posts/promoting-digital-archives.html:279: - External link https://www.canva.com/ failed (status code 403) + External link https://dp.la/info/2016/02/01/color-our-collections/ failed (status code 404) * At _site/posts/promoting-digital-archives.html:309: @@ -24030,21 +15956,17 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2017-06-12-roundup2017a.md failed (status code 429) -* At _site/posts/september-newsletter.html:301: - - External link https://novaresearch.unl.pt/files/32228034/Ensinar_Humanidades_Digitais.pdf failed (status code 403) - * At _site/posts/september-newsletter.html:303: External link https://ach.org/blog/2021/09/13/fall-2021-programming-historian-book-club/ failed (status code 404) * At _site/posts/september-newsletter.html:321: - External link https://www.fct.pt/apoios/veraocomciencia/index.phtml.pt failed (status code 404) + External link https://www.fct.pt/fct.phtml.en failed (status code 404) * At _site/posts/september-newsletter.html:321: - External link https://www.fct.pt/fct.phtml.en failed (status code 404) + External link https://www.fct.pt/apoios/veraocomciencia/index.phtml.pt failed (status code 404) * At _site/posts/september-newsletter.html:1254: @@ -24062,13 +15984,13 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2017-06-18-sonic-word-clouds.md failed (status code 429) -* At _site/posts/spanish-editor.html:287: +* At _site/posts/spanish-editor.html:280: - External link https://academic.oup.com/jah/article/103/1/299/1751315 failed (status code 403) + External link https://github.com/programminghistorian/jekyll/issues/246 failed (status code 429) -* At _site/posts/spanish-editor.html:288: +* At _site/posts/spanish-editor.html:287: - External link http://jitp.commons.gc.cuny.edu/review-of-the-programming-historian/ failed: got a time out (response code 301) (status code 301) + External link https://academic.oup.com/jah/article/103/1/299/1751315 failed (status code 403) * At _site/posts/spanish-editor.html:803: @@ -24110,6 +16032,10 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://twitter.com/BlondeHistorian failed (status code 400) +* At _site/posts/ucl-placement-2021.html:285: + + External link https://github.com/programminghistorian/jekyll/issues/2072 failed (status code 429) + * At _site/posts/ucl-placement-2021.html:797: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -24144,11 +16070,23 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/posts/welcome-ph-fr.html:283: - External link https://github.com/programminghistorian/ph-submissions/issues?q=is:issue+is:open+label:French failed (status code 429) + External link https://github.com/programminghistorian/jekyll/issues/850 failed (status code 429) * At _site/posts/welcome-ph-fr.html:283: - External link https://github.com/programminghistorian/jekyll/issues/850 failed (status code 429) + External link https://github.com/programminghistorian/ph-submissions/issues?q=is:issue+is:open+label:French failed (status code 404) + +* At _site/posts/welcome-ph-fr.html:285: + + External link https://dsharp.library.cmu.edu/ failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + +* At _site/posts/welcome-ph-fr.html:289: + + External link https://archives.mundaneum.org/fr/versions-digitalisees/schema-de-paul-otlet-documentation-et-telecommunication failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/posts/welcome-ph-fr.html:783: @@ -24174,17 +16112,21 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2018-11-09-welcome-zoe-leblanc.md failed (status code 429) +* At _site/pt/apoiadores.html:282: + + External link https://www.sas.ac.uk/ failed (status code 403) + * At _site/pt/apoiadores.html:294: External link https://www.tilburguniversity.edu/ failed (status code 403) -* At _site/pt/apoiadores.html:336: +* At _site/pt/apoiadores.html:306: - External link https://www.sussex.ac.uk/collaborate/business/public-funds#:~:text=Impact%20accelerator%20funds,-From%20law%20to&text=The%20ESRC%20and%20AHRC%20Impact,businesses%20through%20to%20large%20companies failed: https://www.sussex.ac.uk/collaborate/business/public-funds exists, but the hash ':~:text=Impact%20accelerator%20funds,-From%20law%20to&text=The%20ESRC%20and%20AHRC%20Impact,businesses%20through%20to%20large%20companies' does not (status code 200) + External link https://www.history.ac.uk/library-digital failed (status code 403) -* At _site/pt/apoiadores.html:337: +* At _site/pt/apoiadores.html:336: - External link https://www.thebritishacademy.ac.uk/projects/writing-workshops-2018-digital-humanities/ failed (status code 403) + External link https://www.sussex.ac.uk/collaborate/business/public-funds#:~:text=Impact%20accelerator%20funds,-From%20law%20to&text=The%20ESRC%20and%20AHRC%20Impact,businesses%20through%20to%20large%20companies failed: https://www.sussex.ac.uk/collaborate/business/public-funds exists, but the hash ':~:text=Impact%20accelerator%20funds,-From%20law%20to&text=The%20ESRC%20and%20AHRC%20Impact,businesses%20through%20to%20large%20companies' does not (status code 200) * At _site/pt/apoiadores.html:437: @@ -24194,6 +16136,10 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/pt/apoiadores.md failed (status code 429) +* At _site/pt/contribua.html:292: + + External link https://www.worldcat.org/title/programming-historian-en-espanol/oclc/1061292935&referer=brief_results failed (status code 403) + * At _site/pt/contribua.html:347: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -24264,11 +16210,11 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/pt/equipe.html:310: - External link http://twitter.com/maxcarlons failed (status code 400) + External link https://twitter.com/maxcarlons failed (status code 400) * At _site/pt/equipe.html:512: - External link http://twitter.com/cosovschi failed (status code 400) + External link https://twitter.com/cosovschi failed (status code 400) * At _site/pt/equipe.html:1272: @@ -24278,99 +16224,101 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/pt/equipe.html:1278: - External link http://twitter.com/nabsiddiqui failed (status code 400) + External link https://twitter.com/nabsiddiqui failed (status code 400) * At _site/pt/equipe.html:1631: - External link http://twitter.com/giulia_taurino failed (status code 400) + External link https://twitter.com/giulia_taurino failed (status code 400) * At _site/pt/equipe.html:1810: - External link http://twitter.com/alexwermercolan failed (status code 400) + External link https://twitter.com/alexwermercolan failed (status code 400) * At _site/pt/equipe.html:2059: - External link http://www.mariajoseafanador.com failed: Moved Permanently (status code 301) + External link https://www.mariajoseafanador.com failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/pt/equipe.html:2065: - External link http://twitter.com/mariajoafana failed (status code 400) + External link https://twitter.com/mariajoafana failed (status code 400) * At _site/pt/equipe.html:2534: - External link http://twitter.com/IsaGribomont failed (status code 400) + External link https://twitter.com/IsaGribomont failed (status code 400) * At _site/pt/equipe.html:2745: - External link http://twitter.com/espejolento failed (status code 400) + External link https://twitter.com/espejolento failed (status code 400) * At _site/pt/equipe.html:3036: - External link http://twitter.com/jenniferisve failed (status code 400) + External link https://twitter.com/jenniferisve failed (status code 400) * At _site/pt/equipe.html:3361: - External link http://twitter.com/enetreseles failed (status code 400) + External link https://twitter.com/enetreseles failed (status code 400) * At _site/pt/equipe.html:3568: - External link http://twitter.com/jgob failed (status code 400) + External link https://twitter.com/jgob failed (status code 400) * At _site/pt/equipe.html:3863: - External link http://twitter.com/rivaquiroga failed (status code 400) + External link https://twitter.com/rivaquiroga failed (status code 400) * At _site/pt/equipe.html:4804: - External link http://twitter.com/superHH failed (status code 400) + External link https://twitter.com/superHH failed (status code 400) * At _site/pt/equipe.html:5190: - External link http://twitter.com/emilienschultz failed (status code 400) + External link https://twitter.com/emilienschultz failed (status code 400) * At _site/pt/equipe.html:5317: - External link http://twitter.com/davvalent failed (status code 400) + External link https://twitter.com/davvalent failed (status code 400) * At _site/pt/equipe.html:5842: - External link http://twitter.com/danielalvesfcsh failed (status code 400) + External link https://twitter.com/danielalvesfcsh failed (status code 400) * At _site/pt/equipe.html:6107: - External link http://twitter.com/ericbrasiln failed (status code 400) + External link https://twitter.com/ericbrasiln failed (status code 400) * At _site/pt/equipe.html:6543: - External link http://twitter.com/jimmy_medeiros failed (status code 400) + External link https://twitter.com/jimmy_medeiros failed (status code 400) * At _site/pt/equipe.html:7027: - External link http://twitter.com/araceletorres failed (status code 400) + External link https://twitter.com/araceletorres failed (status code 400) * At _site/pt/equipe.html:7286: - External link http://twitter.com/j_w_baker failed (status code 400) + External link https://twitter.com/j_w_baker failed (status code 400) * At _site/pt/equipe.html:7727: - External link http://twitter.com/Adam_Crymble failed (status code 400) + External link https://twitter.com/Adam_Crymble failed (status code 400) * At _site/pt/equipe.html:8258: - External link http://twitter.com/jenniferisve failed (status code 400) + External link https://twitter.com/jenniferisve failed (status code 400) * At _site/pt/equipe.html:8589: - External link http://twitter.com/rivaquiroga failed (status code 400) + External link https://twitter.com/rivaquiroga failed (status code 400) * At _site/pt/equipe.html:8878: - External link http://twitter.com/amsichani failed (status code 400) + External link https://twitter.com/amsichani failed (status code 400) * At _site/pt/equipe.html:9221: - External link http://twitter.com/AnisaHawes failed (status code 400) + External link https://twitter.com/AnisaHawes failed (status code 400) * At _site/pt/equipe.html:10041: @@ -24412,6 +16360,10 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/pt/licoes-politica-remocao.md failed (status code 429) +* At _site/pt/licoes/HTML-lista-palavras-1.html:339: + + External link https://github.com/programminghistorian/ph-submissions/issues/442 failed (status code 429) + * At _site/pt/licoes/HTML-lista-palavras-1.html:1648: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -24420,6 +16372,10 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/pt/licoes/HTML-lista-palavras-1.md failed (status code 429) +* At _site/pt/licoes/HTML-lista-palavras-2.html:339: + + External link https://github.com/programminghistorian/ph-submissions/issues/443 failed (status code 429) + * At _site/pt/licoes/HTML-lista-palavras-2.html:1752: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -24428,6 +16384,10 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/pt/licoes/HTML-lista-palavras-2.md failed (status code 429) +* At _site/pt/licoes/algoritmos-agrupamento-scikit-learn-python.html:353: + + External link https://github.com/programminghistorian/ph-submissions/issues/578 failed (status code 429) + * At _site/pt/licoes/algoritmos-agrupamento-scikit-learn-python.html:2693: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -24436,6 +16396,14 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/pt/licoes/algoritmos-agrupamento-scikit-learn-python.md failed (status code 429) +* At _site/pt/licoes/analise-correspondencia-pesquisa-historica-R.html:353: + + External link https://github.com/programminghistorian/ph-submissions/issues/422 failed (status code 429) + +* At _site/pt/licoes/analise-correspondencia-pesquisa-historica-R.html:703: + + External link https://factominer.free.fr/ failed: got a time out (response code 0) (status code 0) + * At _site/pt/licoes/analise-correspondencia-pesquisa-historica-R.html:1709: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -24444,6 +16412,14 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/pt/licoes/analise-correspondencia-pesquisa-historica-R.md failed (status code 429) +* At _site/pt/licoes/analise-sentimento-R-syuzhet.html:337: + + External link https://github.com/programminghistorian/ph-submissions/issues/467 failed (status code 429) + +* At _site/pt/licoes/analise-sentimento-R-syuzhet.html:572: + + External link https://myrabr.com/blog/analise-de-sentimento/ failed: got a time out (response code 0) (status code 0) + * At _site/pt/licoes/analise-sentimento-R-syuzhet.html:1598: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -24452,9 +16428,9 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/pt/licoes/analise-sentimento-R-syuzhet.md failed (status code 429) -* At _site/pt/licoes/analise-sentimento-exploracao-dados.html:659: +* At _site/pt/licoes/analise-sentimento-exploracao-dados.html:337: - External link https://github.com/cjhutto/vaderSentiment/blob/master/vaderSentiment/vaderSentiment.py failed (status code 429) + External link https://github.com/programminghistorian/ph-submissions/issues/375 failed (status code 429) * At _site/pt/licoes/analise-sentimento-exploracao-dados.html:1478: @@ -24464,6 +16440,10 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/pt/licoes/analise-sentimento-exploracao-dados.md failed (status code 429) +* At _site/pt/licoes/aplicacao-web-interativa-r-shiny-leaflet.html:337: + + External link https://github.com/programminghistorian/ph-submissions/issues/513 failed (status code 429) + * At _site/pt/licoes/aplicacao-web-interativa-r-shiny-leaflet.html:1680: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -24474,19 +16454,21 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/pt/licoes/autoria-sustentavel-texto-simples-pandoc-markdown.html:866: - External link https://stackoverflow.com/questions/tagged/pandoc failed (status code 403) + External link https://groups.google.com/forum/#!forum/pandoc-discuss failed: https://groups.google.com/forum/ exists, but the hash '!forum/pandoc-discuss' does not (status code 200) * At _site/pt/licoes/autoria-sustentavel-texto-simples-pandoc-markdown.html:866: - External link https://groups.google.com/forum/#!forum/pandoc-discuss failed: https://groups.google.com/forum/ exists, but the hash '!forum/pandoc-discuss' does not (status code 200) + External link https://stackoverflow.com/questions/tagged/pandoc failed (status code 403) * At _site/pt/licoes/autoria-sustentavel-texto-simples-pandoc-markdown.html:868: - External link https://www.authorea.com/ failed (status code 403) + External link https://www.draftin.com/ failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/pt/licoes/autoria-sustentavel-texto-simples-pandoc-markdown.html:868: - External link http://www.draftin.com/ failed: Service Unavailable (status code 503) + External link https://www.authorea.com/ failed (status code 403) * At _site/pt/licoes/autoria-sustentavel-texto-simples-pandoc-markdown.html:896: @@ -24516,10 +16498,6 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/ph-submissions/issues/440 failed (status code 429) -* At _site/pt/licoes/contagem-mineracao-dados-investigacao-unix.html:558: - - External link https://www.worldcat.org/title/unix-and-linux/oclc/308171076&referer=brief_results failed (status code 403) - * At _site/pt/licoes/contagem-mineracao-dados-investigacao-unix.html:1621: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -24582,7 +16560,7 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/pt/licoes/download-multiplos-registros-query-strings.html:1294: - External link http://stackoverflow.com/questions/273192/python-best-way-to-create-directory-if-it-doesnt-exist-for-file-write failed (status code 403) + External link https://stackoverflow.com/questions/273192/python-best-way-to-create-directory-if-it-doesnt-exist-for-file-write failed (status code 403) * At _site/pt/licoes/download-multiplos-registros-query-strings.html:1832: @@ -24608,6 +16586,18 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/ph-submissions/issues/446 failed (status code 429) +* At _site/pt/licoes/explorar-analisar-dados-rede-python.html:621: + + External link https://www.sixdegreesoffrancisbacon.com failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + +* At _site/pt/licoes/explorar-analisar-dados-rede-python.html:897: + + External link https://sixdegreesoffrancisbacon.com/ failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + * At _site/pt/licoes/explorar-analisar-dados-rede-python.html:3009: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -24662,9 +16652,7 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/pt/licoes/georreferenciamento-qgis.html:612: - External link http://www.gov.pe.ca/gis/license_agreement.php3?name=lot_town&file_format=SHP failed with something very wrong. -It's possible libcurl couldn't connect to the server, or perhaps the request timed out. -Sometimes, making too many requests at once also breaks things. (status code 0) + External link https://www.gov.pe.ca/gis/license_agreement.php3?name=lot_town&file_format=SHP failed: Found (status code 302) * At _site/pt/licoes/georreferenciamento-qgis.html:725: @@ -24682,18 +16670,6 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/ph-submissions/issues/577 failed (status code 429) -* At _site/pt/licoes/git-ferramenta-metodologica-projetos-historia-1.html:625: - - External link https://www.canva.com/ failed (status code 403) - -* At _site/pt/licoes/git-ferramenta-metodologica-projetos-historia-1.html:987: - - External link https://www.canva.com/ failed (status code 403) - -* At _site/pt/licoes/git-ferramenta-metodologica-projetos-historia-1.html:1121: - - External link https://www.canva.com/ failed (status code 403) - * At _site/pt/licoes/git-ferramenta-metodologica-projetos-historia-1.html:1448: External link https://doi.org/10.1080/00031305.2017.1399928 failed (status code 403) @@ -24782,10 +16758,6 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/ph-submissions/issues/470 failed (status code 429) -* At _site/pt/licoes/introducao-codificacao-textos-tei-1.html:667: - - External link https://github.com/rogalmic/vscode-xml-complete failed (status code 429) - * At _site/pt/licoes/introducao-codificacao-textos-tei-1.html:1577: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -24798,21 +16770,39 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/ph-submissions/issues/428 failed (status code 429) +* At _site/pt/licoes/introducao-dados-abertos-conectados.html:575: + + External link https://linkeddatacatalog.dws.informatik.uni-mannheim.de/state/ failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + * At _site/pt/licoes/introducao-dados-abertos-conectados.html:717: External link https://gtr.ukri.org/projects?ref=AH/N003446/1#/tabOverview failed: https://gtr.ukri.org/projects?ref=AH/N003446/1 exists, but the hash '/tabOverview' does not (status code 200) * At _site/pt/licoes/introducao-dados-abertos-conectados.html:750: - External link http://semanticweb.org/wiki/Main_Page.html failed: got a time out (response code 0) (status code 0) + External link https://semanticweb.org/wiki/Main_Page.html failed: got a time out (response code 0) (status code 0) * At _site/pt/licoes/introducao-dados-abertos-conectados.html:905: External link https://pt.wikipedia.org/wiki/%C3%81rvore_(estrutura_de_dados)#Terminologia failed: https://pt.wikipedia.org/wiki/%C3%81rvore_(estrutura_de_dados) exists, but the hash 'Terminologia' does not (status code 200) +* At _site/pt/licoes/introducao-dados-abertos-conectados.html:905: + + External link https://www.history.ac.uk/research/digital-history failed (status code 403) + * At _site/pt/licoes/introducao-dados-abertos-conectados.html:1081: - External link http://linkeddata.org/guides-and-tutorials failed: Internal Server Error (status code 500) + External link https://linkeddata.org/guides-and-tutorials failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + +* At _site/pt/licoes/introducao-dados-abertos-conectados.html:1083: + + External link https://linkeddatacatalog.dws.informatik.uni-mannheim.de/state/ failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/pt/licoes/introducao-dados-abertos-conectados.html:1089: @@ -24836,11 +16826,11 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/pt/licoes/introducao-estilometria-python.html:1440: - External link https://doi.org/10.1093/llc/fqx017 failed (status code 403) + External link https://doi.org/10.1093/llc/fqv023 failed (status code 403) * At _site/pt/licoes/introducao-estilometria-python.html:1440: - External link https://doi.org/10.1093/llc/fqv023 failed (status code 403) + External link https://doi.org/10.1093/llc/fqx017 failed (status code 403) * At _site/pt/licoes/introducao-estilometria-python.html:1479: @@ -24962,6 +16952,12 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/ph-submissions/issues/397 failed (status code 429) +* At _site/pt/licoes/manipulacao-transformacao-dados-r.html:1018: + + External link https://www.ggplot2.org failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + * At _site/pt/licoes/manipulacao-transformacao-dados-r.html:1565: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -25046,10 +17042,6 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/pt/licoes/preservar-os-seus-dados-de-investigacao.md failed (status code 429) -* At _site/pt/licoes/processamento-basico-texto-r.html:339: - - External link https://github.com/programminghistorian/ph-submissions/issues/381 failed (status code 429) - * At _site/pt/licoes/processamento-basico-texto-r.html:562: External link https://www.rstudio.com/products/rstudio/#Desktop failed: https://www.rstudio.com/products/rstudio/ exists, but the hash 'Desktop' does not (status code 200) @@ -25066,12 +17058,6 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/ph-submissions/issues/566 failed (status code 429) -* At _site/pt/licoes/qgis-camadas.html:607: - - External link http://www.gov.pe.ca/gis/download.php3?name=coastline&file_format=SHP failed with something very wrong. -It's possible libcurl couldn't connect to the server, or perhaps the request timed out. -Sometimes, making too many requests at once also breaks things. (status code 0) - * At _site/pt/licoes/qgis-camadas.html:2719: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -25116,51 +17102,57 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/pt/licoes/som-dados-sonificacao-historiadores.html:564: - External link http://blog.taracopplestone.co.uk/making-things-photobashing-as-archaeological-remediation/ failed with something very wrong. + External link https://blog.taracopplestone.co.uk/making-things-photobashing-as-archaeological-remediation/ failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/pt/licoes/som-dados-sonificacao-historiadores.html:591: - External link http://www.icad.org/Proceedings/2008/Hermann2008.pdf failed with something very wrong. + External link https://www.icad.org/Proceedings/2008/Hermann2008.pdf failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/pt/licoes/som-dados-sonificacao-historiadores.html:606: - External link http://www.icad.org/Proceedings/2008/Hermann2008.pdf failed with something very wrong. + External link https://www.icad.org/Proceedings/2008/Hermann2008.pdf failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/pt/licoes/som-dados-sonificacao-historiadores.html:672: - External link http://www.lynda.com/GarageBand-tutorials/Importing-audio-tracks/156620/164050-4.html failed (status code 404) + External link https://www.lynda.com/GarageBand-tutorials/Importing-audio-tracks/156620/164050-4.html failed (status code 404) * At _site/pt/licoes/som-dados-sonificacao-historiadores.html:882: - External link http://www.electronics.dit.ie/staff/tscarff/Music_technology/midi/midi_note_numbers_for_octaves.html failed with something very wrong. + External link https://www.electronics.dit.ie/staff/tscarff/Music_technology/midi/midi_note_numbers_for_octaves.html failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/pt/licoes/som-dados-sonificacao-historiadores.html:1045: - External link http://puffin.creighton.edu/jesuit/relations/ failed with something very wrong. + External link https://puffin.creighton.edu/jesuit/relations/ failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + +* At _site/pt/licoes/som-dados-sonificacao-historiadores.html:1109: + + External link https://www.lilypond.org/ failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/pt/licoes/som-dados-sonificacao-historiadores.html:1132: - External link http://www.jstor.org/stable/734136 failed (status code 403) + External link https://www.jstor.org/stable/734136 failed (status code 403) * At _site/pt/licoes/som-dados-sonificacao-historiadores.html:1134: - External link http://www.icad.org/Proceedings/2008/Hermann2008.pdf failed with something very wrong. + External link https://www.icad.org/Proceedings/2008/Hermann2008.pdf failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/pt/licoes/som-dados-sonificacao-historiadores.html:1136: - External link http://motherboard.vice.com/read/the-strange-acoustic-phenomenon-behind-these-wacked-out-versions-of-pop-songs failed with something very wrong. + External link https://motherboard.vice.com/read/the-strange-acoustic-phenomenon-behind-these-wacked-out-versions-of-pop-songs failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) @@ -25180,37 +17172,23 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://dl.acm.org/doi/10.1145/1145581.1145623 failed (status code 403) -* At _site/pt/licoes/sumarizacao-narrativas-web-python.html:543: - - External link https://github.com/arquivo/pwa-technologies/wiki/Arquivo.pt-API failed (status code 429) - -* At _site/pt/licoes/sumarizacao-narrativas-web-python.html:544: - - External link https://github.com/LIAAD/TemporalSummarizationFramework failed (status code 429) - * At _site/pt/licoes/sumarizacao-narrativas-web-python.html:549: External link https://www.arquivo.pt failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) -* At _site/pt/licoes/sumarizacao-narrativas-web-python.html:555: - - External link https://arquivo.pt/api failed (status code 429) - -* At _site/pt/licoes/sumarizacao-narrativas-web-python.html:559: - - External link https://github.com/arquivo/ failed (status code 429) - * At _site/pt/licoes/sumarizacao-narrativas-web-python.html:570: External link https://www.arquivo.pt failed with something very wrong. It's possible libcurl couldn't connect to the server, or perhaps the request timed out. Sometimes, making too many requests at once also breaks things. (status code 0) -* At _site/pt/licoes/sumarizacao-narrativas-web-python.html:597: +* At _site/pt/licoes/sumarizacao-narrativas-web-python.html:822: - External link https://github.com/arquivo/pwa-technologies/wiki/Arquivo.pt-API failed (status code 429) + External link https://yake.inesctec.pt failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/pt/licoes/sumarizacao-narrativas-web-python.html:1987: @@ -25274,19 +17252,11 @@ Sometimes, making too many requests at once also breaks things. (status code 0) * At _site/pt/pesquisa.html:263: - External link http://jah.oxfordjournals.org/content/103/1/299.2.full failed (status code 403) - -* At _site/pt/pesquisa.html:264: - - External link http://jitp.commons.gc.cuny.edu/review-of-the-programming-historian/ failed: got a time out (response code 301) (status code 301) - -* At _site/pt/pesquisa.html:279: - - External link https://novaresearch.unl.pt/files/32228034/Ensinar_Humanidades_Digitais.pdf failed (status code 403) + External link https://jah.oxfordjournals.org/content/103/1/299.2.full failed (status code 403) * At _site/pt/pesquisa.html:282: - External link http://www.tandfonline.com/doi/full/10.1080/13555502.2017.1301179 failed (status code 403) + External link https://www.tandfonline.com/doi/full/10.1080/13555502.2017.1301179 failed (status code 403) * At _site/pt/pesquisa.html:293: @@ -25324,6 +17294,12 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://msuglobaldh.org/abstracts/#programming-historian failed: https://msuglobaldh.org/abstracts/ exists, but the hash 'programming-historian' does not (status code 200) +* At _site/pt/pesquisa.html:390: + + External link https://ixa2.si.ehu.eus/intele/?q=webinars failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + * At _site/pt/pesquisa.html:477: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -25340,10 +17316,6 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/pt/politica-de-privacidade.md failed (status code 429) -* At _site/pt/ppi.html:271: - - External link https://www.oecd.org/en/topics/sub-issues/oda-eligibility-and-conditions/dac-list-of-oda-recipients.html failed (status code 403) - * At _site/pt/ppi.html:420: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -25364,6 +17336,18 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/pt/reportar-um-erro.md failed (status code 429) +* At _site/pt/sobre.html:266: + + External link https://dhawards.org/dhawards2022/results/ failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + +* At _site/pt/sobre.html:266: + + External link https://dhawards.org/dhawards2016/results/ failed with something very wrong. +It's possible libcurl couldn't connect to the server, or perhaps the request timed out. +Sometimes, making too many requests at once also breaks things. (status code 0) + * At _site/pt/sobre.html:324: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -25380,17 +17364,17 @@ Sometimes, making too many requests at once also breaks things. (status code 0) External link https://github.com/programminghistorian/jekyll/commits/gh-pages/pt/vagas.md failed (status code 429) -* At _site/translation-concordance.html:3569: +* At _site/translation-concordance.html:5641: External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) -* At _site/translation-concordance.html:3579: +* At _site/translation-concordance.html:5651: External link https://github.com/programminghistorian/jekyll/commits/gh-pages/translation-concordance.md failed (status code 429) * At _site/troubleshooting.html:392: - External link http://www.diveintopython.net failed (status code 403) + External link https://www.diveintopython.net failed (status code 403) * At _site/troubleshooting.html:452: @@ -25440,19 +17424,19 @@ For the Links > Internal check, the following failures were found: * At _site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html:81: - internally linking to images.jsp?doc=178006280087, which does not exist + internally linking to static/Punishment.jsp#death, which does not exist * At _site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html:81: - internally linking to static/Verdicts.jsp#guilty, which does not exist + internally linking to images.jsp?doc=178006280090, which does not exist * At _site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html:81: - internally linking to images.jsp?doc=178006280089, which does not exist + internally linking to images.jsp?doc=178006280090, which does not exist * At _site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html:81: - internally linking to static/Punishment.jsp#death, which does not exist + internally linking to images.jsp?doc=178006280089, which does not exist * At _site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html:81: @@ -25460,7 +17444,7 @@ For the Links > Internal check, the following failures were found: * At _site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html:81: - internally linking to browse.jsp?div=t17800628-32, which does not exist + internally linking to images.jsp?doc=178006280088, which does not exist * At _site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html:81: @@ -25468,15 +17452,15 @@ For the Links > Internal check, the following failures were found: * At _site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html:81: - internally linking to browse.jsp?div=t17800628-34, which does not exist + internally linking to images.jsp?doc=178006280087, which does not exist * At _site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html:81: - internally linking to images.jsp?doc=178006280088, which does not exist + internally linking to images.jsp?doc=178006280087, which does not exist * At _site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html:81: - internally linking to images.jsp?doc=178006280084, which does not exist + internally linking to static/Verdicts.jsp#guilty, which does not exist * At _site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html:81: @@ -25484,23 +17468,23 @@ For the Links > Internal check, the following failures were found: * At _site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html:81: - internally linking to images.jsp?doc=178006280087, which does not exist + internally linking to images.jsp?doc=178006280084, which does not exist * At _site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html:81: - internally linking to static/Crimes.jsp#breakingpeace, which does not exist + internally linking to browse.jsp?div=t17800628-34, which does not exist * At _site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html:81: - internally linking to images.jsp?doc=178006280090, which does not exist + internally linking to static/Crimes.jsp#breakingpeace, which does not exist * At _site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html:81: - internally linking to static/Crimes.jsp#riot, which does not exist + internally linking to browse.jsp?div=t17800628-32, which does not exist * At _site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html:81: - internally linking to images.jsp?doc=178006280090, which does not exist + internally linking to static/Crimes.jsp#riot, which does not exist * At _site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html:83: @@ -25538,6 +17522,10 @@ For the Links > Internal check, the following failures were found: internally linking to css/screen.css, which does not exist +* At _site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html:21: + + internally linking to a.css, which does not exist + * At _site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html:25: internally linking to css/print.css, which does not exist @@ -25568,67 +17556,67 @@ For the Links > Internal check, the following failures were found: * At _site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html:81: - internally linking to browse.jsp?div=t17800628-34, which does not exist + internally linking to images.jsp?doc=178006280090, which does not exist * At _site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html:81: - internally linking to browse.jsp?div=t17800628-32, which does not exist + internally linking to browse.jsp?div=t17800628-34, which does not exist * At _site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html:81: - internally linking to images.jsp?doc=178006280088, which does not exist + internally linking to static/Crimes.jsp#riot, which does not exist * At _site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html:81: - internally linking to images.jsp?doc=178006280088, which does not exist + internally linking to static/Punishment.jsp#death, which does not exist * At _site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html:81: - internally linking to static/Punishment.jsp#death, which does not exist + internally linking to static/Crimes.jsp#breakingpeace, which does not exist * At _site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html:81: - internally linking to static/Verdicts.jsp#guilty, which does not exist + internally linking to browse.jsp?div=t17800628-32, which does not exist * At _site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html:81: - internally linking to images.jsp?doc=178006280089, which does not exist + internally linking to static/Verdicts.jsp#guilty, which does not exist * At _site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html:81: - internally linking to images.jsp?doc=178006280089, which does not exist + internally linking to images.jsp?doc=178006280084, which does not exist * At _site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html:81: - internally linking to static/Crimes.jsp#riot, which does not exist + internally linking to images.jsp?doc=178006280084, which does not exist * At _site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html:81: - internally linking to static/Crimes.jsp#breakingpeace, which does not exist + internally linking to images.jsp?doc=178006280087, which does not exist * At _site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html:81: - internally linking to images.jsp?doc=178006280090, which does not exist + internally linking to images.jsp?doc=178006280087, which does not exist * At _site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html:81: - internally linking to images.jsp?doc=178006280090, which does not exist + internally linking to images.jsp?doc=178006280088, which does not exist * At _site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html:81: - internally linking to images.jsp?doc=178006280084, which does not exist + internally linking to images.jsp?doc=178006280088, which does not exist * At _site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html:81: - internally linking to images.jsp?doc=178006280084, which does not exist + internally linking to images.jsp?doc=178006280089, which does not exist * At _site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html:81: - internally linking to images.jsp?doc=178006280087, which does not exist + internally linking to images.jsp?doc=178006280089, which does not exist * At _site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html:81: - internally linking to images.jsp?doc=178006280087, which does not exist + internally linking to images.jsp?doc=178006280090, which does not exist * At _site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html:83: @@ -26040,5 +18028,9 @@ For the Scripts check, the following failures were found: script is empty and has no src attribute +* At _site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html:26: + + internal script reference a.js does not exist + -HTML-Proofer found 6449 failures! +HTML-Proofer found 4405 failures! diff --git a/htmlproofer-report.csv b/htmlproofer-report.csv index 9ba9a833ee..9748501bc2 100644 --- a/htmlproofer-report.csv +++ b/htmlproofer-report.csv @@ -49,16 +49,6 @@ _site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html,81,'a' tag is mi _site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html,81,'a' tag is missing a reference _site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html,81,'a' tag is missing a reference _site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html,81,'a' tag is missing a reference -_site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html,198,http://markhadley.com is not an HTTPS link -_site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html,199,http://www.sheffield.ac.uk/hri/ is not an HTTPS link -_site/assets/mapping-with-python-leaflet/exercises/exercise00 - original/mymap.html,3,http://cdn.leafletjs.com/leaflet-0.6.4/leaflet.css is not an HTTPS link -_site/assets/mapping-with-python-leaflet/exercises/exercise01/mymap.html,3,http://cdn.leafletjs.com/leaflet-0.6.4/leaflet.css is not an HTTPS link -_site/assets/mapping-with-python-leaflet/exercises/exercise02/mymap.html,3,http://cdn.leafletjs.com/leaflet-0.6.4/leaflet.css is not an HTTPS link -_site/assets/mapping-with-python-leaflet/exercises/exercise03/mymap.html,3,http://cdn.leafletjs.com/leaflet-0.6.4/leaflet.css is not an HTTPS link -_site/assets/mapping-with-python-leaflet/exercises/exercise04/mymap.html,3,http://cdn.leafletjs.com/leaflet-0.6.4/leaflet.css is not an HTTPS link -_site/assets/mapping-with-python-leaflet/exercises/exercise05/mymap.html,3,http://cdn.leafletjs.com/leaflet-0.6.4/leaflet.css is not an HTTPS link -_site/assets/mapping-with-python-leaflet/map/mymap-onepage.html,3,http://cdn.leafletjs.com/leaflet-0.6.4/leaflet.css is not an HTTPS link -_site/assets/mapping-with-python-leaflet/map/mymap.html,3,http://cdn.leafletjs.com/leaflet-0.6.4/leaflet.css is not an HTTPS link _site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html,81,'a' tag is missing a reference _site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html,81,'a' tag is missing a reference _site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html,81,'a' tag is missing a reference @@ -71,8 +61,6 @@ _site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html,81,'a' t _site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html,81,'a' tag is missing a reference _site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html,81,'a' tag is missing a reference _site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html,81,'a' tag is missing a reference -_site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html,198,http://markhadley.com is not an HTTPS link -_site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html,199,http://www.sheffield.ac.uk/hri/ is not an HTTPS link _site/blog/index.html,86,'a' tag is missing a reference _site/blog/index.html,105,'a' tag is missing a reference _site/blog/index.html,142,'a' tag is missing a reference @@ -110,8 +98,6 @@ _site/blog/page17/index.html,86,'a' tag is missing a reference _site/blog/page17/index.html,105,'a' tag is missing a reference _site/blog/page17/index.html,142,'a' tag is missing a reference _site/blog/page17/index.html,168,'a' tag is missing a reference -_site/blog/page17/index.html,346,http://en.wikipedia.org/wiki/Static_web_page is not an HTTPS link -_site/blog/page17/index.html,346,http://pages.github.com is not an HTTPS link _site/blog/page2/index.html,86,'a' tag is missing a reference _site/blog/page2/index.html,105,'a' tag is missing a reference _site/blog/page2/index.html,142,'a' tag is missing a reference @@ -148,11 +134,6 @@ _site/en/about.html,86,'a' tag is missing a reference _site/en/about.html,105,'a' tag is missing a reference _site/en/about.html,142,'a' tag is missing a reference _site/en/about.html,168,'a' tag is missing a reference -_site/en/about.html,264,http://dhawards.org/dhawards2016/results/ is not an HTTPS link -_site/en/about.html,264,http://dhawards.org/dhawards2017/results/ is not an HTTPS link -_site/en/about.html,264,http://humanidadesdigitaleshispanicas.es/ is not an HTTPS link -_site/en/about.html,264,http://dhawards.org/dhawards2022/results/ is not an HTTPS link -_site/en/about.html,278,http://digitalhistoryhacks.blogspot.com/2008/01/programming-historian.html is not an HTTPS link _site/en/author-guidelines.html,86,'a' tag is missing a reference _site/en/author-guidelines.html,105,'a' tag is missing a reference _site/en/author-guidelines.html,142,'a' tag is missing a reference @@ -161,13 +142,10 @@ _site/en/contribute.html,86,'a' tag is missing a reference _site/en/contribute.html,105,'a' tag is missing a reference _site/en/contribute.html,142,'a' tag is missing a reference _site/en/contribute.html,168,'a' tag is missing a reference -_site/en/contribute.html,298,http://www.worldcat.org/title/programming-historian/oclc/951537099 is not an HTTPS link -_site/en/contribute.html,300,http://purdue-primo-prod.hosted.exlibrisgroup.com/primo_library/libweb/action/dlDisplay.do?vid=PURDUE&search_scope=everything&docId=PURDUE_ALMA51671812890001081&fn=permalink is not an HTTPS link _site/en/editor-guidelines.html,86,'a' tag is missing a reference _site/en/editor-guidelines.html,105,'a' tag is missing a reference _site/en/editor-guidelines.html,142,'a' tag is missing a reference _site/en/editor-guidelines.html,168,'a' tag is missing a reference -_site/en/editor-guidelines.html,587,http://www.loc.gov/maps/collections is not an HTTPS link _site/en/events.html,86,'a' tag is missing a reference _site/en/events.html,105,'a' tag is missing a reference _site/en/events.html,142,'a' tag is missing a reference @@ -200,11 +178,6 @@ _site/en/lessons/analyzing-documents-with-tfidf.html,133,'a' tag is missing a re _site/en/lessons/analyzing-documents-with-tfidf.html,152,'a' tag is missing a reference _site/en/lessons/analyzing-documents-with-tfidf.html,189,'a' tag is missing a reference _site/en/lessons/analyzing-documents-with-tfidf.html,215,'a' tag is missing a reference -_site/en/lessons/analyzing-documents-with-tfidf.html,1478,http://jonathanstray.com/a-full-text-visualization-of-the-iraq-war-logs is not an HTTPS link -_site/en/lessons/analyzing-documents-with-tfidf.html,1612,http://scikit-learn.org/stable/install.html is not an HTTPS link -_site/en/lessons/analyzing-documents-with-tfidf.html,1642,http://dhdebates.gc.cuny.edu/debates/text/99 is not an HTTPS link -_site/en/lessons/analyzing-documents-with-tfidf.html,1651,http://jonathanstray.com/a-full-text-visualization-of-the-iraq-war-logs is not an HTTPS link -_site/en/lessons/analyzing-documents-with-tfidf.html,1666,http://journalofdigitalhumanities.org/2-1/words-alone-by-benjamin-m-schmidt/ is not an HTTPS link _site/en/lessons/analyzing-multilingual-text-nltk-spacy-stanza.html,117,'a' tag is missing a reference _site/en/lessons/analyzing-multilingual-text-nltk-spacy-stanza.html,136,'a' tag is missing a reference _site/en/lessons/analyzing-multilingual-text-nltk-spacy-stanza.html,173,'a' tag is missing a reference @@ -213,55 +186,22 @@ _site/en/lessons/applied-archival-downloading-with-wget.html,117,'a' tag is miss _site/en/lessons/applied-archival-downloading-with-wget.html,136,'a' tag is missing a reference _site/en/lessons/applied-archival-downloading-with-wget.html,173,'a' tag is missing a reference _site/en/lessons/applied-archival-downloading-with-wget.html,199,'a' tag is missing a reference -_site/en/lessons/applied-archival-downloading-with-wget.html,471,http://www.activehistory.ca is not an HTTPS link -_site/en/lessons/applied-archival-downloading-with-wget.html,480,http://chronicle.com/blogs/profhacker/download-a-sequential-range-of-urls-with-curl/41055 is not an HTTPS link -_site/en/lessons/applied-archival-downloading-with-wget.html,511,http://data2.archives.ca/e/e061/e001518029.jpg is not an HTTPS link -_site/en/lessons/applied-archival-downloading-with-wget.html,512,http://data2.archives.ca/e/e061/e001518109.jpg is not an HTTPS link -_site/en/lessons/applied-archival-downloading-with-wget.html,526,http://en.wikipedia.org/wiki/Leading_zero is not an HTTPS link -_site/en/lessons/applied-archival-downloading-with-wget.html,627,http://www.nla.gov.au/apps/cdview/?pi=nla.ms-ms5393-1 is not an HTTPS link -_site/en/lessons/applied-archival-downloading-with-wget.html,631,http://nla.gov.au/nla.ms-ms5393-1-s1-v.jpg is not an HTTPS link -_site/en/lessons/applied-archival-downloading-with-wget.html,633,http://nla.gov.au/nla.ms-ms5393-1-s127-v.jpg is not an HTTPS link -_site/en/lessons/applied-archival-downloading-with-wget.html,680,http://memory.loc.gov/cgi-bin/ampage?collId=mtj1&fileName=mtj1page001.db&recNum=1&itemLink=/ammem/collections/jefferson_papers/mtjser1.html&linkText=6 is not an HTTPS link -_site/en/lessons/applied-archival-downloading-with-wget.html,689,http://memory.loc.gov/master/mss/mtj/mtj1/001/0000/ is not an HTTPS link -_site/en/lessons/applied-archival-downloading-with-wget.html,701,http://memory.loc.gov/master/mss/mtj/mtj1/001/0000/ is not an HTTPS link -_site/en/lessons/applied-archival-downloading-with-wget.html,703,http://memory.loc.gov/master/mss/mtj/mtj1/001/0100/ is not an HTTPS link -_site/en/lessons/applied-archival-downloading-with-wget.html,705,http://memory.loc.gov/master/mss/mtj/mtj1/001/0200/ is not an HTTPS link -_site/en/lessons/applied-archival-downloading-with-wget.html,709,http://memory.loc.gov/master/mss/mtj/mtj1/001/1400 is not an HTTPS link -_site/en/lessons/applied-archival-downloading-with-wget.html,735,http://cushing.med.yale.edu/gsdl/collect/mdposter/ is not an HTTPS link -_site/en/lessons/applied-archival-downloading-with-wget.html,748,http://cushing.med.yale.edu/images/mdposter/full/poster0001.jpg is not an HTTPS link -_site/en/lessons/applied-archival-downloading-with-wget.html,753,http://cushing.med.yale.edu/images/mdposter/full/poster0637.jpg is not an HTTPS link _site/en/lessons/automated-downloading-with-wget.html,117,'a' tag is missing a reference _site/en/lessons/automated-downloading-with-wget.html,136,'a' tag is missing a reference _site/en/lessons/automated-downloading-with-wget.html,173,'a' tag is missing a reference _site/en/lessons/automated-downloading-with-wget.html,199,'a' tag is missing a reference -_site/en/lessons/automated-downloading-with-wget.html,679,http://www.gnu.org/software/wget/ is not an HTTPS link -_site/en/lessons/automated-downloading-with-wget.html,680,http://ftp.gnu.org/gnu/wget/ is not an HTTPS link -_site/en/lessons/automated-downloading-with-wget.html,743,http://www.gnu.org/software/wget/manual/wget.html is not an HTTPS link -_site/en/lessons/automated-downloading-with-wget.html,748,http://activehistory.ca/papers/ is not an HTTPS link -_site/en/lessons/automated-downloading-with-wget.html,751,http://activehistory.ca/papers/historypaper-9/ is not an HTTPS link -_site/en/lessons/automated-downloading-with-wget.html,798,http://activehistory.ca/papers/ is not an HTTPS link -_site/en/lessons/automated-downloading-with-wget.html,824,http://activehistory.ca/papers/ is not an HTTPS link -_site/en/lessons/automated-downloading-with-wget.html,825,http://activehistory.ca/papers/historypaper-9/ is not an HTTPS link -_site/en/lessons/automated-downloading-with-wget.html,827,http://uwo.ca is not an HTTPS link -_site/en/lessons/automated-downloading-with-wget.html,966,http://www.gnu.org/software/wget/manual/wget.html is not an HTTPS link _site/en/lessons/basic-text-processing-in-r.html,123,'a' tag is missing a reference _site/en/lessons/basic-text-processing-in-r.html,142,'a' tag is missing a reference _site/en/lessons/basic-text-processing-in-r.html,179,'a' tag is missing a reference _site/en/lessons/basic-text-processing-in-r.html,205,'a' tag is missing a reference -_site/en/lessons/basic-text-processing-in-r.html,1154,http://www.presidency.ucsb.edu/sou.php is not an HTTPS link -_site/en/lessons/basic-text-processing-in-r.html,1157,http://norvig.com/ngrams/ is not an HTTPS link _site/en/lessons/beginners-guide-to-twitter-data.html,124,'a' tag is missing a reference _site/en/lessons/beginners-guide-to-twitter-data.html,143,'a' tag is missing a reference _site/en/lessons/beginners-guide-to-twitter-data.html,180,'a' tag is missing a reference _site/en/lessons/beginners-guide-to-twitter-data.html,206,'a' tag is missing a reference -_site/en/lessons/beginners-guide-to-twitter-data.html,341,http://programminghistorian.github.io/ph-submissions/lessons/beginners-guide-to-twitter-data is not an HTTPS link -_site/en/lessons/beginners-guide-to-twitter-data.html,868,http://journalofdigitalhumanities.org/1-1/demystifying-networks-by-scott-weingart/ is not an HTTPS link -_site/en/lessons/beginners-guide-to-twitter-data.html,1145,http://hdlab.stanford.edu/palladio/ is not an HTTPS link _site/en/lessons/building-static-sites-with-jekyll-github-pages.html,117,'a' tag is missing a reference _site/en/lessons/building-static-sites-with-jekyll-github-pages.html,136,'a' tag is missing a reference _site/en/lessons/building-static-sites-with-jekyll-github-pages.html,173,'a' tag is missing a reference _site/en/lessons/building-static-sites-with-jekyll-github-pages.html,199,'a' tag is missing a reference -_site/en/lessons/building-static-sites-with-jekyll-github-pages.html,489,http://amandavisconti.github.io/JekyllDemo/ is not an HTTPS link _site/en/lessons/building-static-sites-with-jekyll-github-pages.html,496,'a' tag is missing a reference _site/en/lessons/building-static-sites-with-jekyll-github-pages.html,497,'a' tag is missing a reference _site/en/lessons/building-static-sites-with-jekyll-github-pages.html,498,'a' tag is missing a reference @@ -301,9 +241,7 @@ _site/en/lessons/building-static-sites-with-jekyll-github-pages.html,547,'a' tag _site/en/lessons/building-static-sites-with-jekyll-github-pages.html,548,'a' tag is missing a reference _site/en/lessons/building-static-sites-with-jekyll-github-pages.html,549,'a' tag is missing a reference _site/en/lessons/building-static-sites-with-jekyll-github-pages.html,554,'a' tag is missing a reference -_site/en/lessons/building-static-sites-with-jekyll-github-pages.html,556,http://jekyllrb.com/docs/home/ is not an HTTPS link _site/en/lessons/building-static-sites-with-jekyll-github-pages.html,558,'a' tag is missing a reference -_site/en/lessons/building-static-sites-with-jekyll-github-pages.html,570,http://jekyllrb.com/docs/home/ is not an HTTPS link _site/en/lessons/building-static-sites-with-jekyll-github-pages.html,572,'a' tag is missing a reference _site/en/lessons/building-static-sites-with-jekyll-github-pages.html,578,'a' tag is missing a reference _site/en/lessons/building-static-sites-with-jekyll-github-pages.html,607,'a' tag is missing a reference @@ -311,14 +249,11 @@ _site/en/lessons/building-static-sites-with-jekyll-github-pages.html,615,'a' tag _site/en/lessons/building-static-sites-with-jekyll-github-pages.html,621,'a' tag is missing a reference _site/en/lessons/building-static-sites-with-jekyll-github-pages.html,629,'a' tag is missing a reference _site/en/lessons/building-static-sites-with-jekyll-github-pages.html,641,'a' tag is missing a reference -_site/en/lessons/building-static-sites-with-jekyll-github-pages.html,643,http://www.barebones.com/products/textwrangler/download.html is not an HTTPS link _site/en/lessons/building-static-sites-with-jekyll-github-pages.html,647,'a' tag is missing a reference _site/en/lessons/building-static-sites-with-jekyll-github-pages.html,702,'a' tag is missing a reference _site/en/lessons/building-static-sites-with-jekyll-github-pages.html,706,'a' tag is missing a reference _site/en/lessons/building-static-sites-with-jekyll-github-pages.html,716,'a' tag is missing a reference -_site/en/lessons/building-static-sites-with-jekyll-github-pages.html,718,http://brew.sh/ is not an HTTPS link _site/en/lessons/building-static-sites-with-jekyll-github-pages.html,778,'a' tag is missing a reference -_site/en/lessons/building-static-sites-with-jekyll-github-pages.html,780,http://brew.sh/ is not an HTTPS link _site/en/lessons/building-static-sites-with-jekyll-github-pages.html,803,'a' tag is missing a reference _site/en/lessons/building-static-sites-with-jekyll-github-pages.html,823,'a' tag is missing a reference _site/en/lessons/building-static-sites-with-jekyll-github-pages.html,829,'a' tag is missing a reference @@ -331,72 +266,30 @@ _site/en/lessons/building-static-sites-with-jekyll-github-pages.html,1040,'a' ta _site/en/lessons/building-static-sites-with-jekyll-github-pages.html,1163,'a' tag is missing a reference _site/en/lessons/building-static-sites-with-jekyll-github-pages.html,1197,'a' tag is missing a reference _site/en/lessons/building-static-sites-with-jekyll-github-pages.html,1203,'a' tag is missing a reference -_site/en/lessons/building-static-sites-with-jekyll-github-pages.html,1207,http://kramdown.gettalong.org/quickref.html is not an HTTPS link -_site/en/lessons/building-static-sites-with-jekyll-github-pages.html,1209,http://kramdown.gettalong.org/quickref.html is not an HTTPS link -_site/en/lessons/building-static-sites-with-jekyll-github-pages.html,1211,http://www.typora.io/ is not an HTTPS link _site/en/lessons/building-static-sites-with-jekyll-github-pages.html,1213,'a' tag is missing a reference -_site/en/lessons/building-static-sites-with-jekyll-github-pages.html,1247,http://amandavisconti.github.io/JekyllDemo/resume/ is not an HTTPS link _site/en/lessons/building-static-sites-with-jekyll-github-pages.html,1249,'a' tag is missing a reference -_site/en/lessons/building-static-sites-with-jekyll-github-pages.html,1325,http://raw.githubusercontent.com/amandavisconti/JekyllDemo/gh-pages/_posts/2016-02-29-a-post-about-my-research.markdown is not an HTTPS link _site/en/lessons/building-static-sites-with-jekyll-github-pages.html,1327,'a' tag is missing a reference -_site/en/lessons/building-static-sites-with-jekyll-github-pages.html,1448,http://amandavisconti.github.io/JekyllDemo/ is not an HTTPS link _site/en/lessons/building-static-sites-with-jekyll-github-pages.html,1450,'a' tag is missing a reference _site/en/lessons/building-static-sites-with-jekyll-github-pages.html,1459,'a' tag is missing a reference _site/en/lessons/building-static-sites-with-jekyll-github-pages.html,1463,'a' tag is missing a reference -_site/en/lessons/building-static-sites-with-jekyll-github-pages.html,1474,http://jekyllthemes.org/ is not an HTTPS link -_site/en/lessons/building-static-sites-with-jekyll-github-pages.html,1475,http://jekyllthemes.io/ is not an HTTPS link _site/en/lessons/building-static-sites-with-jekyll-github-pages.html,1478,'a' tag is missing a reference -_site/en/lessons/building-static-sites-with-jekyll-github-pages.html,1482,http://jekyllrb.com/docs/plugins/ is not an HTTPS link -_site/en/lessons/building-static-sites-with-jekyll-github-pages.html,1489,http://jekyllrb.com/docs/plugins/ is not an HTTPS link -_site/en/lessons/building-static-sites-with-jekyll-github-pages.html,1489,http://jekyllrb.com/docs/plugins/ is not an HTTPS link _site/en/lessons/building-static-sites-with-jekyll-github-pages.html,1506,'a' tag is missing a reference _site/en/lessons/building-static-sites-with-jekyll-github-pages.html,1527,'a' tag is missing a reference _site/en/lessons/building-static-sites-with-jekyll-github-pages.html,1529,'a' tag is missing a reference -_site/en/lessons/building-static-sites-with-jekyll-github-pages.html,1531,http://stackexchange.com/ is not an HTTPS link _site/en/lessons/building-static-sites-with-jekyll-github-pages.html,1533,'a' tag is missing a reference _site/en/lessons/building-static-sites-with-jekyll-github-pages.html,1539,'a' tag is missing a reference -_site/en/lessons/building-static-sites-with-jekyll-github-pages.html,1544,http://jekyllrb.com/docs/home/ is not an HTTPS link -_site/en/lessons/building-static-sites-with-jekyll-github-pages.html,1545,http://jekyll-windows.juthilo.com/ is not an HTTPS link -_site/en/lessons/building-static-sites-with-jekyll-github-pages.html,1547,http://literaturegeek.com/2015/12/08/WhyJekyllGitHub is not an HTTPS link -_site/en/lessons/building-static-sites-with-jekyll-github-pages.html,1548,http://chronicle.com/blogs/profhacker/jekyll1/60913 is not an HTTPS link -_site/en/lessons/building-static-sites-with-jekyll-github-pages.html,1550,http://ben.balter.com/jekyll-style-guide/ is not an HTTPS link -_site/en/lessons/building-static-sites-with-jekyll-github-pages.html,1551,http://prose.io/ is not an HTTPS link _site/en/lessons/calibrating-radiocarbon-dates-r.html,137,'a' tag is missing a reference _site/en/lessons/calibrating-radiocarbon-dates-r.html,156,'a' tag is missing a reference _site/en/lessons/calibrating-radiocarbon-dates-r.html,193,'a' tag is missing a reference _site/en/lessons/calibrating-radiocarbon-dates-r.html,219,'a' tag is missing a reference -_site/en/lessons/calibrating-radiocarbon-dates-r.html,785,http://calib.org is not an HTTPS link _site/en/lessons/cleaning-data-with-openrefine.html,121,'a' tag is missing a reference _site/en/lessons/cleaning-data-with-openrefine.html,140,'a' tag is missing a reference _site/en/lessons/cleaning-data-with-openrefine.html,177,'a' tag is missing a reference _site/en/lessons/cleaning-data-with-openrefine.html,203,'a' tag is missing a reference -_site/en/lessons/cleaning-data-with-openrefine.html,528,http://openrefine.org is not an HTTPS link -_site/en/lessons/cleaning-data-with-openrefine.html,579,http://vis.stanford.edu/papers/wrangler/ is not an HTTPS link -_site/en/lessons/cleaning-data-with-openrefine.html,579,http://openrefine.org is not an HTTPS link -_site/en/lessons/cleaning-data-with-openrefine.html,584,http://en.wikipedia.org/wiki/Data_profiling is not an HTTPS link -_site/en/lessons/cleaning-data-with-openrefine.html,586,http://en.wikipedia.org/wiki/Named-entity_recognition is not an HTTPS link -_site/en/lessons/cleaning-data-with-openrefine.html,590,http://www.loc.gov/index.html is not an HTTPS link -_site/en/lessons/cleaning-data-with-openrefine.html,590,http://www.oclc.org/home.en.html is not an HTTPS link -_site/en/lessons/cleaning-data-with-openrefine.html,608,http://creativecommons.org/licenses/by-nc/2.5/au/ is not an HTTPS link -_site/en/lessons/cleaning-data-with-openrefine.html,622,http://en.wikipedia.org/wiki/Controlled_vocabulary is not an HTTPS link -_site/en/lessons/cleaning-data-with-openrefine.html,630,http://en.wikipedia.org/wiki/Linked_data is not an HTTPS link -_site/en/lessons/cleaning-data-with-openrefine.html,678,http://en.wikipedia.org/wiki/Faceted_search is not an HTTPS link -_site/en/lessons/cleaning-data-with-openrefine.html,886,http://en.wikipedia.org/wiki/Comma-separated_values is not an HTTPS link _site/en/lessons/cleaning-ocrd-text-with-regular-expressions.html,117,'a' tag is missing a reference _site/en/lessons/cleaning-ocrd-text-with-regular-expressions.html,136,'a' tag is missing a reference _site/en/lessons/cleaning-ocrd-text-with-regular-expressions.html,173,'a' tag is missing a reference _site/en/lessons/cleaning-ocrd-text-with-regular-expressions.html,199,'a' tag is missing a reference -_site/en/lessons/cleaning-ocrd-text-with-regular-expressions.html,462,http://home.heinonline.org/ is not an HTTPS link -_site/en/lessons/cleaning-ocrd-text-with-regular-expressions.html,502,http://www.unixuser.org/~euske/python/pdfminer/index.html is not an HTTPS link -_site/en/lessons/cleaning-ocrd-text-with-regular-expressions.html,617,http://krillapps.com/patterns/ is not an HTTPS link -_site/en/lessons/cleaning-ocrd-text-with-regular-expressions.html,627,http://docs.python.org/2/library/re.html is not an HTTPS link -_site/en/lessons/cleaning-ocrd-text-with-regular-expressions.html,628,http://docs.python.org/2/howto/regex.html#regex-howto is not an HTTPS link -_site/en/lessons/cleaning-ocrd-text-with-regular-expressions.html,636,http://www.gnu.org/software/sed/ is not an HTTPS link -_site/en/lessons/cleaning-ocrd-text-with-regular-expressions.html,636,http://www.gnu.org/software/grep/ is not an HTTPS link -_site/en/lessons/cleaning-ocrd-text-with-regular-expressions.html,719,http://docs.python.org/2/library/re.html#re.search is not an HTTPS link -_site/en/lessons/cleaning-ocrd-text-with-regular-expressions.html,727,http://docs.python.org/2/library/re.html#re.sub is not an HTTPS link -_site/en/lessons/cleaning-ocrd-text-with-regular-expressions.html,754,http://docs.python.org/2/library/re.html#re.VERBOSE is not an HTTPS link -_site/en/lessons/cleaning-ocrd-text-with-regular-expressions.html,789,http://docs.python.org/2/library/re.html#re.compile is not an HTTPS link _site/en/lessons/clustering-visualizing-word-embeddings.html,135,'a' tag is missing a reference _site/en/lessons/clustering-visualizing-word-embeddings.html,154,'a' tag is missing a reference _site/en/lessons/clustering-visualizing-word-embeddings.html,191,'a' tag is missing a reference @@ -414,17 +307,8 @@ _site/en/lessons/collaborative-blog-with-jekyll-github.html,140,'a' tag is missi _site/en/lessons/collaborative-blog-with-jekyll-github.html,177,'a' tag is missing a reference _site/en/lessons/collaborative-blog-with-jekyll-github.html,203,'a' tag is missing a reference _site/en/lessons/collaborative-blog-with-jekyll-github.html,588,'a' tag is missing a reference -_site/en/lessons/collaborative-blog-with-jekyll-github.html,873,http://github.com/join is not an HTTPS link _site/en/lessons/collaborative-blog-with-jekyll-github.html,901,'a' tag is missing a reference _site/en/lessons/collaborative-blog-with-jekyll-github.html,1394,'a' tag is missing a reference -_site/en/lessons/collaborative-blog-with-jekyll-github.html,1512,http://stackexchange.com/ is not an HTTPS link -_site/en/lessons/collaborative-blog-with-jekyll-github.html,1512,http://tinyurl.com/DHslack is not an HTTPS link -_site/en/lessons/collaborative-blog-with-jekyll-github.html,1519,http://literaturegeek.com/2015/12/08/WhyJekyllGitHub is not an HTTPS link -_site/en/lessons/collaborative-blog-with-jekyll-github.html,1521,http://chronicle.com/blogs/profhacker/jekyll1/60913 is not an HTTPS link -_site/en/lessons/collaborative-blog-with-jekyll-github.html,1527,http://jekyllrb.com/docs/home/ is not an HTTPS link -_site/en/lessons/collaborative-blog-with-jekyll-github.html,1528,http://jekyll-windows.juthilo.com/ is not an HTTPS link -_site/en/lessons/collaborative-blog-with-jekyll-github.html,1530,http://ben.balter.com/jekyll-style-guide/ is not an HTTPS link -_site/en/lessons/collaborative-blog-with-jekyll-github.html,1557,http://prose.io/ is not an HTTPS link _site/en/lessons/common-similarity-measures.html,133,'a' tag is missing a reference _site/en/lessons/common-similarity-measures.html,152,'a' tag is missing a reference _site/en/lessons/common-similarity-measures.html,189,'a' tag is missing a reference @@ -441,22 +325,6 @@ _site/en/lessons/corpus-analysis-with-antconc.html,117,'a' tag is missing a refe _site/en/lessons/corpus-analysis-with-antconc.html,136,'a' tag is missing a reference _site/en/lessons/corpus-analysis-with-antconc.html,173,'a' tag is missing a reference _site/en/lessons/corpus-analysis-with-antconc.html,199,'a' tag is missing a reference -_site/en/lessons/corpus-analysis-with-antconc.html,527,http://voyant-tools.org/ is not an HTTPS link -_site/en/lessons/corpus-analysis-with-antconc.html,531,http://voyant-tools.org/ is not an HTTPS link -_site/en/lessons/corpus-analysis-with-antconc.html,531,http://www.laurenceanthony.net/software/antconc/ is not an HTTPS link -_site/en/lessons/corpus-analysis-with-antconc.html,531,http://www.laurenceanthony.net/ is not an HTTPS link -_site/en/lessons/corpus-analysis-with-antconc.html,531,http://hfroehli.ch/2014/05/11/intro-bibliography-corpus-linguistics/ is not an HTTPS link -_site/en/lessons/corpus-analysis-with-antconc.html,538,http://www.laurenceanthony.net/software/antconc/ is not an HTTPS link -_site/en/lessons/corpus-analysis-with-antconc.html,539,http://www.laurenceanthony.net/software/antconc/releases/AntConc324/ is not an HTTPS link -_site/en/lessons/corpus-analysis-with-antconc.html,569,http://notepad-plus-plus.org/ is not an HTTPS link -_site/en/lessons/corpus-analysis-with-antconc.html,569,http://www.barebones.com/products/textwrangler/ is not an HTTPS link -_site/en/lessons/corpus-analysis-with-antconc.html,578,http://www.nltk.org/ is not an HTTPS link -_site/en/lessons/corpus-analysis-with-antconc.html,581,http://www.amazon.com/Developing-Linguistic-Corpora-Practice-Guides/dp/1842172050/ref=sr_1_1 is not an HTTPS link -_site/en/lessons/corpus-analysis-with-antconc.html,911,http://www.wordfrequency.info/free.asp is not an HTTPS link -_site/en/lessons/corpus-analysis-with-antconc.html,961,http://www.lexically.net/downloads/version6/HTML/index.html?keyness_definition.htm is not an HTTPS link -_site/en/lessons/corpus-analysis-with-antconc.html,961,http://www.laurenceanthony.net/software/antconc/releases/AntConc335/help.pdf is not an HTTPS link -_site/en/lessons/corpus-analysis-with-antconc.html,1024,http://hfroehlich.wordpress.com/2014/05/11/intro-bibliography-corpus-linguistics/ is not an HTTPS link -_site/en/lessons/corpus-analysis-with-antconc.html,1025,http://hfroehli.ch/workshops/getting-started-with-antconc/ is not an HTTPS link _site/en/lessons/corpus-analysis-with-spacy.html,117,'a' tag is missing a reference _site/en/lessons/corpus-analysis-with-spacy.html,136,'a' tag is missing a reference _site/en/lessons/corpus-analysis-with-spacy.html,173,'a' tag is missing a reference @@ -465,17 +333,10 @@ _site/en/lessons/correspondence-analysis-in-R.html,137,'a' tag is missing a refe _site/en/lessons/correspondence-analysis-in-R.html,156,'a' tag is missing a reference _site/en/lessons/correspondence-analysis-in-R.html,193,'a' tag is missing a reference _site/en/lessons/correspondence-analysis-in-R.html,219,'a' tag is missing a reference -_site/en/lessons/correspondence-analysis-in-R.html,570,http://www.ourcommons.ca/Committees/en/Home is not an HTTPS link -_site/en/lessons/correspondence-analysis-in-R.html,665,http://factominer.free.fr/ is not an HTTPS link -_site/en/lessons/correspondence-analysis-in-R.html,988,http://www.sthda.com/english/wiki/multiple-correspondence-analysis-essentials-interpretation-and-application-to-investigate-the-associations-between-categories-of-multiple-qualitative-variables-r-software-and-data-mining is not an HTTPS link -_site/en/lessons/correspondence-analysis-in-R.html,1034,http://davetang.org/file/Singular_Value_Decomposition_Tutorial.pdf is not an HTTPS link -_site/en/lessons/correspondence-analysis-in-R.html,1125,http://www.cbc.ca/news/indigenous/mmiw-inquiry-not-reaching-out-to-families-says-advocates-1.4053694 is not an HTTPS link _site/en/lessons/counting-frequencies.html,119,'a' tag is missing a reference _site/en/lessons/counting-frequencies.html,138,'a' tag is missing a reference _site/en/lessons/counting-frequencies.html,175,'a' tag is missing a reference _site/en/lessons/counting-frequencies.html,201,'a' tag is missing a reference -_site/en/lessons/counting-frequencies.html,592,http://docs.python.org/tutorial/datastructures.html#list-comprehensions is not an HTTPS link -_site/en/lessons/counting-frequencies.html,765,http://ir.dcs.gla.ac.uk/resources/linguistic_utils/stop_words is not an HTTPS link _site/en/lessons/creating-an-omeka-exhibit.html,119,'a' tag is missing a reference _site/en/lessons/creating-an-omeka-exhibit.html,138,'a' tag is missing a reference _site/en/lessons/creating-an-omeka-exhibit.html,175,'a' tag is missing a reference @@ -484,35 +345,10 @@ _site/en/lessons/creating-and-viewing-html-files-with-python.html,119,'a' tag is _site/en/lessons/creating-and-viewing-html-files-with-python.html,138,'a' tag is missing a reference _site/en/lessons/creating-and-viewing-html-files-with-python.html,175,'a' tag is missing a reference _site/en/lessons/creating-and-viewing-html-files-with-python.html,201,'a' tag is missing a reference -_site/en/lessons/creating-and-viewing-html-files-with-python.html,529,http://zotero.org is not an HTTPS link -_site/en/lessons/creating-and-viewing-html-files-with-python.html,533,http://www.w3schools.com/html/default.asp is not an HTTPS link -_site/en/lessons/creating-and-viewing-html-files-with-python.html,549,http://www.w3schools.com/tags/tag_doctype.asp is not an HTTPS link _site/en/lessons/creating-apis-with-python-and-flask.html,120,'a' tag is missing a reference _site/en/lessons/creating-apis-with-python-and-flask.html,139,'a' tag is missing a reference _site/en/lessons/creating-apis-with-python-and-flask.html,176,'a' tag is missing a reference _site/en/lessons/creating-apis-with-python-and-flask.html,202,'a' tag is missing a reference -_site/en/lessons/creating-apis-with-python-and-flask.html,605,http://chroniclingamerica.loc.gov/about/api/ is not an HTTPS link -_site/en/lessons/creating-apis-with-python-and-flask.html,607,http://chroniclingamerica.loc.gov/about/api/ is not an HTTPS link -_site/en/lessons/creating-apis-with-python-and-flask.html,626,http://chroniclingamerica.loc.gov/search/pages/results/ is not an HTTPS link -_site/en/lessons/creating-apis-with-python-and-flask.html,635,http://chroniclingamerica.loc.gov/search/pages/results/?format=json&proxtext=fire is not an HTTPS link -_site/en/lessons/creating-apis-with-python-and-flask.html,667,http://flask.pocoo.org/ is not an HTTPS link -_site/en/lessons/creating-apis-with-python-and-flask.html,725,http://127.0.0.1:5000/ is not an HTTPS link -_site/en/lessons/creating-apis-with-python-and-flask.html,749,http://127.0.0.1:5000/ is not an HTTPS link -_site/en/lessons/creating-apis-with-python-and-flask.html,844,http://127.0.0.1:5000/api/v1/resources/books/all is not an HTTPS link -_site/en/lessons/creating-apis-with-python-and-flask.html,921,http://127.0.0.1:5000/api/v1/resources/books?id=0 is not an HTTPS link -_site/en/lessons/creating-apis-with-python-and-flask.html,922,http://127.0.0.1:5000/api/v1/resources/books?id=1 is not an HTTPS link -_site/en/lessons/creating-apis-with-python-and-flask.html,923,http://127.0.0.1:5000/api/v1/resources/books?id=2 is not an HTTPS link -_site/en/lessons/creating-apis-with-python-and-flask.html,924,http://127.0.0.1:5000/api/v1/resources/books?id=3 is not an HTTPS link -_site/en/lessons/creating-apis-with-python-and-flask.html,930,http://127.0.0.1:5000/api/v1/resources/books is not an HTTPS link -_site/en/lessons/creating-apis-with-python-and-flask.html,1002,http://www.doxygen.org/ is not an HTTPS link -_site/en/lessons/creating-apis-with-python-and-flask.html,1002,http://www.sphinx-doc.org/en/stable/ is not an HTTPS link -_site/en/lessons/creating-apis-with-python-and-flask.html,1004,http://api.repo.nypl.org/ is not an HTTPS link -_site/en/lessons/creating-apis-with-python-and-flask.html,1092,http://127.0.0.1:5000/api/v1/resources/books/all is not an HTTPS link -_site/en/lessons/creating-apis-with-python-and-flask.html,1093,http://127.0.0.1:5000/api/v1/resources/books?author=Connie+Willis is not an HTTPS link -_site/en/lessons/creating-apis-with-python-and-flask.html,1094,http://127.0.0.1:5000/api/v1/resources/books?author=Connie+Willis&published=1993 is not an HTTPS link -_site/en/lessons/creating-apis-with-python-and-flask.html,1095,http://127.0.0.1:5000/api/v1/resources/books?published=2010 is not an HTTPS link -_site/en/lessons/creating-apis-with-python-and-flask.html,1228,http://chroniclingamerica.loc.gov/ is not an HTTPS link -_site/en/lessons/creating-apis-with-python-and-flask.html,1234,http://hds.essex.ac.uk/ is not an HTTPS link _site/en/lessons/creating-guis-in-python-for-digital-humanities-projects.html,117,'a' tag is missing a reference _site/en/lessons/creating-guis-in-python-for-digital-humanities-projects.html,136,'a' tag is missing a reference _site/en/lessons/creating-guis-in-python-for-digital-humanities-projects.html,173,'a' tag is missing a reference @@ -521,88 +357,26 @@ _site/en/lessons/creating-mobile-augmented-reality-experiences-in-unity.html,117 _site/en/lessons/creating-mobile-augmented-reality-experiences-in-unity.html,136,'a' tag is missing a reference _site/en/lessons/creating-mobile-augmented-reality-experiences-in-unity.html,173,'a' tag is missing a reference _site/en/lessons/creating-mobile-augmented-reality-experiences-in-unity.html,199,'a' tag is missing a reference -_site/en/lessons/creating-mobile-augmented-reality-experiences-in-unity.html,536,http://www.gizmag.com/ikea-augmented-reality-catalog-app/28703/ is not an HTTPS link -_site/en/lessons/creating-mobile-augmented-reality-experiences-in-unity.html,556,http://web.archive.org/web/20180421163517/http://english.ufl.edu/trace_arcs/ is not an HTTPS link -_site/en/lessons/creating-mobile-augmented-reality-experiences-in-unity.html,616,http://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.html is not an HTTPS link -_site/en/lessons/creating-mobile-augmented-reality-experiences-in-unity.html,858,http://www.gimp.org/ is not an HTTPS link -_site/en/lessons/creating-mobile-augmented-reality-experiences-in-unity.html,1205,http://developer.Android.com/tools/device.html is not an HTTPS link _site/en/lessons/creating-network-diagrams-from-historical-sources.html,117,'a' tag is missing a reference _site/en/lessons/creating-network-diagrams-from-historical-sources.html,136,'a' tag is missing a reference _site/en/lessons/creating-network-diagrams-from-historical-sources.html,173,'a' tag is missing a reference _site/en/lessons/creating-network-diagrams-from-historical-sources.html,199,'a' tag is missing a reference -_site/en/lessons/creating-network-diagrams-from-historical-sources.html,486,http://hdlab.stanford.edu/palladio/ is not an HTTPS link -_site/en/lessons/creating-network-diagrams-from-historical-sources.html,508,http://hal.archives-ouvertes.fr/docs/00/64/93/16/PDF/lemercier_A_zg.pdf is not an HTTPS link -_site/en/lessons/creating-network-diagrams-from-historical-sources.html,508,http://hal.archives-ouvertes.fr/docs/00/64/93/16/PDF/lemercier_A_zg.pdf is not an HTTPS link -_site/en/lessons/creating-network-diagrams-from-historical-sources.html,508,http://historicalnetworkresearch.org/ is not an HTTPS link -_site/en/lessons/creating-network-diagrams-from-historical-sources.html,518,http://web.archive.org/web/20180422010025/http://www.gdw-berlin.de/fileadmin/bilder/publ/publikationen_in_englischer_sprache/2006_Neuman_eng.pdf is not an HTTPS link -_site/en/lessons/creating-network-diagrams-from-historical-sources.html,518,http://martenduering.com/research/covert-networks-during-the-holocaust/ is not an HTTPS link -_site/en/lessons/creating-network-diagrams-from-historical-sources.html,650,http://hdlab.stanford.edu/palladio/ is not an HTTPS link -_site/en/lessons/creating-network-diagrams-from-historical-sources.html,726,http://hdlab.stanford.edu/doc/scenario-simple-map.pdf is not an HTTPS link -_site/en/lessons/creating-network-diagrams-from-historical-sources.html,802,http://en.wikipedia.org/wiki/Bipartite_graph#Examples is not an HTTPS link -_site/en/lessons/creating-network-diagrams-from-historical-sources.html,892,http://en.wikipedia.org/wiki/Directed_graph#Indegree_and_outdegree is not an HTTPS link -_site/en/lessons/creating-network-diagrams-from-historical-sources.html,940,http://nodegoat.net/ is not an HTTPS link -_site/en/lessons/creating-network-diagrams-from-historical-sources.html,940,http://nodegoat.net/cms/UPLOAD/AsmallguidebyYanan11082014.pdf is not an HTTPS link -_site/en/lessons/creating-network-diagrams-from-historical-sources.html,942,http://www.youtube.com/watch?v=xKhYGRpbwOc is not an HTTPS link -_site/en/lessons/creating-network-diagrams-from-historical-sources.html,944,http://www.clementlevallois.net/training.html is not an HTTPS link -_site/en/lessons/creating-network-diagrams-from-historical-sources.html,944,http://www.youtube.com/watch?v=L6hHv6y5GsQ is not an HTTPS link -_site/en/lessons/creating-network-diagrams-from-historical-sources.html,948,http://pajek.imfm.si/doku.php is not an HTTPS link -_site/en/lessons/creating-network-diagrams-from-historical-sources.html,948,http://www.cambridge.org/us/academic/subjects/sociology/research-methods-sociology-and-criminology/exploratory-social-network-analysis-pajek-2nd-edition is not an HTTPS link -_site/en/lessons/creating-network-diagrams-from-historical-sources.html,1035,http://historicalnetworkresearch.org is not an HTTPS link _site/en/lessons/crowdsourced-data-normalization-with-pandas.html,117,'a' tag is missing a reference _site/en/lessons/crowdsourced-data-normalization-with-pandas.html,136,'a' tag is missing a reference _site/en/lessons/crowdsourced-data-normalization-with-pandas.html,173,'a' tag is missing a reference _site/en/lessons/crowdsourced-data-normalization-with-pandas.html,199,'a' tag is missing a reference -_site/en/lessons/crowdsourced-data-normalization-with-pandas.html,507,http://transcribe-bentham.ucl.ac.uk/td/Transcribe_Bentham is not an HTTPS link -_site/en/lessons/crowdsourced-data-normalization-with-pandas.html,562,http://menus.nypl.org/ is not an HTTPS link -_site/en/lessons/crowdsourced-data-normalization-with-pandas.html,883,http://pandas-docs.github.io/pandas-docs-travis/user_guide/timeseries.html#timestamp-limitations is not an HTTPS link -_site/en/lessons/crowdsourced-data-normalization-with-pandas.html,990,http://curatingmenus.org/articles/against-cleaning/ is not an HTTPS link _site/en/lessons/data-mining-the-internet-archive.html,117,'a' tag is missing a reference _site/en/lessons/data-mining-the-internet-archive.html,136,'a' tag is missing a reference _site/en/lessons/data-mining-the-internet-archive.html,173,'a' tag is missing a reference _site/en/lessons/data-mining-the-internet-archive.html,199,'a' tag is missing a reference -_site/en/lessons/data-mining-the-internet-archive.html,490,http://archive.org/ is not an HTTPS link -_site/en/lessons/data-mining-the-internet-archive.html,494,http://activehistory.ca/2013/09/the-internet-archive-rocks-or-two-million-plus-free-sources-to-explore/ is not an HTTPS link -_site/en/lessons/data-mining-the-internet-archive.html,503,http://archive.org/details/bplscas is not an HTTPS link -_site/en/lessons/data-mining-the-internet-archive.html,563,http://archive.org/ is not an HTTPS link -_site/en/lessons/data-mining-the-internet-archive.html,568,http://archive.org/details/lettertowilliaml00doug is not an HTTPS link -_site/en/lessons/data-mining-the-internet-archive.html,569,http://archive.org/stream/lettertowilliaml00doug/39999066767938#page/n0/mode/2up is not an HTTPS link -_site/en/lessons/data-mining-the-internet-archive.html,572,http://archive.org/download/lettertowilliaml00doug is not an HTTPS link -_site/en/lessons/data-mining-the-internet-archive.html,573,http://archive.org/download/lettertowilliaml00doug/lettertowilliaml00doug_dc.xml is not an HTTPS link -_site/en/lessons/data-mining-the-internet-archive.html,573,http://archive.org/download/lettertowilliaml00doug/lettertowilliaml00doug_marc.xml is not an HTTPS link -_site/en/lessons/data-mining-the-internet-archive.html,574,http://www.loc.gov/marc/bibliographic/ is not an HTTPS link -_site/en/lessons/data-mining-the-internet-archive.html,580,http://archive.org/search.php?query=collection%3Abplscas&sort=-publicdate is not an HTTPS link -_site/en/lessons/data-mining-the-internet-archive.html,612,http://blog.archive.org/2011/03/31/how-archive-org-items-are-structured/ is not an HTTPS link -_site/en/lessons/data-mining-the-internet-archive.html,622,http://internetarchive.readthedocs.io/en/latest/quickstart.html#searching is not an HTTPS link -_site/en/lessons/data-mining-the-internet-archive.html,647,http://archive.org/search.php?query=collection%3Abplscas is not an HTTPS link -_site/en/lessons/data-mining-the-internet-archive.html,653,http://internetarchive.readthedocs.io/en/latest/quickstart.html#downloading is not an HTTPS link -_site/en/lessons/data-mining-the-internet-archive.html,794,http://internetarchive.readthedocs.io/en/latest/quickstart.html#downloading is not an HTTPS link -_site/en/lessons/data-mining-the-internet-archive.html,872,http://docs.python.org/2/tutorial/errors.html#handling-exceptions is not an HTTPS link -_site/en/lessons/data-mining-the-internet-archive.html,945,http://archive.org/download/lettertowilliaml00doug/lettertowilliaml00doug_marc.xml is not an HTTPS link -_site/en/lessons/data-mining-the-internet-archive.html,952,http://www.loc.gov/marc/bibliographic/bd260.html is not an HTTPS link -_site/en/lessons/data-mining-the-internet-archive.html,952,http://www.loc.gov/marc/ is not an HTTPS link _site/en/lessons/data-wrangling-and-management-in-r.html,117,'a' tag is missing a reference _site/en/lessons/data-wrangling-and-management-in-r.html,136,'a' tag is missing a reference _site/en/lessons/data-wrangling-and-management-in-r.html,173,'a' tag is missing a reference _site/en/lessons/data-wrangling-and-management-in-r.html,199,'a' tag is missing a reference -_site/en/lessons/data-wrangling-and-management-in-r.html,539,http://hadley.nz/ is not an HTTPS link -_site/en/lessons/data-wrangling-and-management-in-r.html,563,http://tidyverse.org/ is not an HTTPS link -_site/en/lessons/data-wrangling-and-management-in-r.html,574,http://magrittr.tidyverse.org is not an HTTPS link -_site/en/lessons/data-wrangling-and-management-in-r.html,576,http://ggplot2.tidyverse.org/ is not an HTTPS link -_site/en/lessons/data-wrangling-and-management-in-r.html,577,http://www.springer.com/us/book/9780387245447 is not an HTTPS link -_site/en/lessons/data-wrangling-and-management-in-r.html,579,http://readr.tidyverse.org is not an HTTPS link -_site/en/lessons/data-wrangling-and-management-in-r.html,582,http://tibble.tidyverse.org/ is not an HTTPS link -_site/en/lessons/data-wrangling-and-management-in-r.html,709,http://stefanbache.dk/ is not an HTTPS link -_site/en/lessons/data-wrangling-and-management-in-r.html,710,http://hadley.nz/ is not an HTTPS link -_site/en/lessons/data-wrangling-and-management-in-r.html,797,http://lincolnmullen.com/ is not an HTTPS link _site/en/lessons/dealing-with-big-data-and-network-analysis-using-neo4j.html,117,'a' tag is missing a reference _site/en/lessons/dealing-with-big-data-and-network-analysis-using-neo4j.html,136,'a' tag is missing a reference _site/en/lessons/dealing-with-big-data-and-network-analysis-using-neo4j.html,173,'a' tag is missing a reference _site/en/lessons/dealing-with-big-data-and-network-analysis-using-neo4j.html,199,'a' tag is missing a reference -_site/en/lessons/dealing-with-big-data-and-network-analysis-using-neo4j.html,528,http://faculty.ucr.edu/~hanneman/nettext/ is not an HTTPS link -_site/en/lessons/dealing-with-big-data-and-network-analysis-using-neo4j.html,529,http://www.insna.org is not an HTTPS link -_site/en/lessons/dealing-with-big-data-and-network-analysis-using-neo4j.html,985,http://localhost:7474/browser/ is not an HTTPS link -_site/en/lessons/dealing-with-big-data-and-network-analysis-using-neo4j.html,1048,http://localhost:7474 is not an HTTPS link -_site/en/lessons/dealing-with-big-data-and-network-analysis-using-neo4j.html,1097,http://jgmackay.com/ is not an HTTPS link _site/en/lessons/designing-a-timeline-tabletop-simulator.html,117,'a' tag is missing a reference _site/en/lessons/designing-a-timeline-tabletop-simulator.html,136,'a' tag is missing a reference _site/en/lessons/designing-a-timeline-tabletop-simulator.html,173,'a' tag is missing a reference @@ -611,77 +385,30 @@ _site/en/lessons/detecting-text-reuse-with-passim.html,119,'a' tag is missing a _site/en/lessons/detecting-text-reuse-with-passim.html,138,'a' tag is missing a reference _site/en/lessons/detecting-text-reuse-with-passim.html,175,'a' tag is missing a reference _site/en/lessons/detecting-text-reuse-with-passim.html,201,'a' tag is missing a reference -_site/en/lessons/detecting-text-reuse-with-passim.html,542,http://www.ccs.neu.edu/home/dasmith/ is not an HTTPS link -_site/en/lessons/detecting-text-reuse-with-passim.html,705,http://spark.apache.org/downloads is not an HTTPS link -_site/en/lessons/detecting-text-reuse-with-passim.html,910,http://jsonlines.org/ is not an HTTPS link -_site/en/lessons/detecting-text-reuse-with-passim.html,1486,http://ceur-ws.org/Vol-2253/paper22.pdf is not an HTTPS link -_site/en/lessons/detecting-text-reuse-with-passim.html,1487,http://dx.doi.org/10.1093/alh/ajv029 is not an HTTPS link -_site/en/lessons/detecting-text-reuse-with-passim.html,1488,http://dx.doi.org/10.1093/alh/ajv028 is not an HTTPS link -_site/en/lessons/detecting-text-reuse-with-passim.html,1489,http://dx.doi.org/10.1080/1461670x.2020.1761865 is not an HTTPS link -_site/en/lessons/detecting-text-reuse-with-passim.html,1491,http://dx.doi.org/10.1007/978-3-319-12655-5_11 is not an HTTPS link -_site/en/lessons/detecting-text-reuse-with-passim.html,1492,http://dx.doi.org/10.22148/16.034 is not an HTTPS link -_site/en/lessons/detecting-text-reuse-with-passim.html,1495,http://dx.doi.org/10.1145/2682571.2797068 is not an HTTPS link _site/en/lessons/displaying-georeferenced-map-knightlab-storymap-js.html,119,'a' tag is missing a reference _site/en/lessons/displaying-georeferenced-map-knightlab-storymap-js.html,138,'a' tag is missing a reference _site/en/lessons/displaying-georeferenced-map-knightlab-storymap-js.html,175,'a' tag is missing a reference _site/en/lessons/displaying-georeferenced-map-knightlab-storymap-js.html,201,'a' tag is missing a reference -_site/en/lessons/displaying-georeferenced-map-knightlab-storymap-js.html,546,http://gmail.com is not an HTTPS link _site/en/lessons/downloading-multiple-records-using-query-strings.html,117,'a' tag is missing a reference _site/en/lessons/downloading-multiple-records-using-query-strings.html,136,'a' tag is missing a reference _site/en/lessons/downloading-multiple-records-using-query-strings.html,173,'a' tag is missing a reference _site/en/lessons/downloading-multiple-records-using-query-strings.html,199,'a' tag is missing a reference -_site/en/lessons/downloading-multiple-records-using-query-strings.html,517,http://www.oldbaileyonline.org/ is not an HTTPS link -_site/en/lessons/downloading-multiple-records-using-query-strings.html,551,http://www.oldbaileyonline.org/browse.jsp?id=t17800628-33&div=t17800628-33 is not an HTTPS link -_site/en/lessons/downloading-multiple-records-using-query-strings.html,612,http://www.oldbaileyonline.org/forms/formMain.jsp is not an HTTPS link -_site/en/lessons/downloading-multiple-records-using-query-strings.html,1303,http://www.checkupdown.com/status/E408.html is not an HTTPS link -_site/en/lessons/downloading-multiple-records-using-query-strings.html,1472,http://docs.python.org/tutorial/errors.html is not an HTTPS link -_site/en/lessons/downloading-multiple-records-using-query-strings.html,1659,http://www.oldbaileyonline.org/static/DocAPI.jsp is not an HTTPS link -_site/en/lessons/downloading-multiple-records-using-query-strings.html,1660,http://stackoverflow.com/questions/273192/python-best-way-to-create-directory-if-it-doesnt-exist-for-file-write is not an HTTPS link _site/en/lessons/editing-audio-with-audacity.html,117,'a' tag is missing a reference _site/en/lessons/editing-audio-with-audacity.html,136,'a' tag is missing a reference _site/en/lessons/editing-audio-with-audacity.html,173,'a' tag is missing a reference _site/en/lessons/editing-audio-with-audacity.html,199,'a' tag is missing a reference -_site/en/lessons/editing-audio-with-audacity.html,487,http://audacityteam.org/ is not an HTTPS link -_site/en/lessons/editing-audio-with-audacity.html,499,http://audacityteam.org/ is not an HTTPS link -_site/en/lessons/editing-audio-with-audacity.html,525,http://web.archive.org/web/20161119231053/http://www.indiana.edu:80/~emusic/acoustics/amplitude.htm is not an HTTPS link -_site/en/lessons/editing-audio-with-audacity.html,621,http://www.diffen.com/difference/Mono_vs_Stereo is not an HTTPS link -_site/en/lessons/editing-audio-with-audacity.html,791,http://manual.audacityteam.org/man/crossfade_tracks.html is not an HTTPS link -_site/en/lessons/editing-audio-with-audacity.html,821,http://www.nch.com.au/acm/formats.html is not an HTTPS link -_site/en/lessons/editing-audio-with-audacity.html,821,http://www.w3schools.com/html/html5_audio.asp is not an HTTPS link _site/en/lessons/exploring-and-analyzing-network-data-with-python.html,126,'a' tag is missing a reference _site/en/lessons/exploring-and-analyzing-network-data-with-python.html,145,'a' tag is missing a reference _site/en/lessons/exploring-and-analyzing-network-data-with-python.html,182,'a' tag is missing a reference _site/en/lessons/exploring-and-analyzing-network-data-with-python.html,208,'a' tag is missing a reference -_site/en/lessons/exploring-and-analyzing-network-data-with-python.html,559,http://docs.python-guide.org/en/latest/starting/installation/ is not an HTTPS link -_site/en/lessons/exploring-and-analyzing-network-data-with-python.html,580,http://www.oxforddnb.com is not an HTTPS link -_site/en/lessons/exploring-and-analyzing-network-data-with-python.html,580,http://www.sixdegreesoffrancisbacon.com is not an HTTPS link -_site/en/lessons/exploring-and-analyzing-network-data-with-python.html,836,http://6dfb.tumblr.com/post/159420498411/ut-tensio-sic-vis-introducing-the-hooke-graph is not an HTTPS link -_site/en/lessons/exploring-and-analyzing-network-data-with-python.html,852,http://sixdegreesoffrancisbacon.com/ is not an HTTPS link -_site/en/lessons/exploring-and-analyzing-network-data-with-python.html,996,http://perso.crans.org/aynaud/communities/api.html is not an HTTPS link -_site/en/lessons/exploring-and-analyzing-network-data-with-python.html,1043,http://pandas.pydata.org/ is not an HTTPS link _site/en/lessons/extracting-illustrated-pages.html,117,'a' tag is missing a reference _site/en/lessons/extracting-illustrated-pages.html,136,'a' tag is missing a reference _site/en/lessons/extracting-illustrated-pages.html,173,'a' tag is missing a reference _site/en/lessons/extracting-illustrated-pages.html,199,'a' tag is missing a reference -_site/en/lessons/extracting-illustrated-pages.html,559,http://projectaida.org/ is not an HTTPS link -_site/en/lessons/extracting-illustrated-pages.html,559,http://www.ccs.neu.edu/home/dasmith/ichneumon-proposal.pdf is not an HTTPS link -_site/en/lessons/extracting-illustrated-pages.html,638,http://web.archive.org/web/20190115051900/https://conda.io/docs/_downloads/conda-cheatsheet.pdf is not an HTTPS link _site/en/lessons/extracting-keywords.html,117,'a' tag is missing a reference _site/en/lessons/extracting-keywords.html,136,'a' tag is missing a reference _site/en/lessons/extracting-keywords.html,173,'a' tag is missing a reference _site/en/lessons/extracting-keywords.html,199,'a' tag is missing a reference -_site/en/lessons/extracting-keywords.html,502,http://en.wikipedia.org/wiki/Gazetteer is not an HTTPS link -_site/en/lessons/extracting-keywords.html,502,http://en.wikipedia.org/wiki/Stop_words is not an HTTPS link -_site/en/lessons/extracting-keywords.html,510,http://www.british-history.ac.uk/alumni-oxon/1500-1714 is not an HTTPS link -_site/en/lessons/extracting-keywords.html,510,http://en.wikipedia.org/wiki/Comma-separated_values is not an HTTPS link -_site/en/lessons/extracting-keywords.html,552,http://en.wikipedia.org/wiki/Historic_counties_of_England is not an HTTPS link -_site/en/lessons/extracting-keywords.html,603,http://stackoverflow.com/questions/3056740/gedit-adds-line-at-end-of-file is not an HTTPS link -_site/en/lessons/extracting-keywords.html,605,http://en.wikipedia.org/wiki/Word_processor is not an HTTPS link -_site/en/lessons/extracting-keywords.html,660,http://stackoverflow.com/questions/11497376/new-line-python is not an HTTPS link -_site/en/lessons/extracting-keywords.html,682,http://en.wikipedia.org/wiki/Carriage_return is not an HTTPS link -_site/en/lessons/extracting-keywords.html,705,http://en.wikipedia.org/wiki/Regular_expression is not an HTTPS link -_site/en/lessons/extracting-keywords.html,985,http://stackoverflow.com/questions/17315635/csv-new-line-character-seen-in-unquoted-field-error is not an HTTPS link -_site/en/lessons/extracting-keywords.html,1108,http://fredgibbs.net/tutorials/extract-geocode-placenames-from-text-file.html is not an HTTPS link _site/en/lessons/facial-recognition-ai-python.html,119,'a' tag is missing a reference _site/en/lessons/facial-recognition-ai-python.html,138,'a' tag is missing a reference _site/en/lessons/facial-recognition-ai-python.html,175,'a' tag is missing a reference @@ -690,23 +417,6 @@ _site/en/lessons/fetch-and-parse-data-with-openrefine.html,117,'a' tag is missin _site/en/lessons/fetch-and-parse-data-with-openrefine.html,136,'a' tag is missing a reference _site/en/lessons/fetch-and-parse-data-with-openrefine.html,173,'a' tag is missing a reference _site/en/lessons/fetch-and-parse-data-with-openrefine.html,199,'a' tag is missing a reference -_site/en/lessons/fetch-and-parse-data-with-openrefine.html,524,http://web.archive.org/web/20180129051941/http://data-lessons.github.io/library-openrefine/ is not an HTTPS link -_site/en/lessons/fetch-and-parse-data-with-openrefine.html,524,http://www.datacarpentry.org/OpenRefine-ecology-lesson/ is not an HTTPS link -_site/en/lessons/fetch-and-parse-data-with-openrefine.html,566,http://www.gutenberg.org/ebooks/1105 is not an HTTPS link -_site/en/lessons/fetch-and-parse-data-with-openrefine.html,566,http://www.gutenberg.org/ is not an HTTPS link -_site/en/lessons/fetch-and-parse-data-with-openrefine.html,570,http://www.gutenberg.org/wiki/Gutenberg:Feeds is not an HTTPS link -_site/en/lessons/fetch-and-parse-data-with-openrefine.html,1142,http://www.json.org/ is not an HTTPS link -_site/en/lessons/fetch-and-parse-data-with-openrefine.html,1282,http://www.jython.org/ is not an HTTPS link -_site/en/lessons/fetch-and-parse-data-with-openrefine.html,1285,http://www.jython.org/ is not an HTTPS link -_site/en/lessons/fetch-and-parse-data-with-openrefine.html,1362,http://text-processing.com/ is not an HTTPS link -_site/en/lessons/fetch-and-parse-data-with-openrefine.html,1362,http://www.nltk.org/ is not an HTTPS link -_site/en/lessons/fetch-and-parse-data-with-openrefine.html,1363,http://text-processing.com/docs/sentiment.html is not an HTTPS link -_site/en/lessons/fetch-and-parse-data-with-openrefine.html,1440,http://sentiment.vivekn.com/docs/api/ is not an HTTPS link -_site/en/lessons/fetch-and-parse-data-with-openrefine.html,1465,http://www.nltk.org/book/ch06.html is not an HTTPS link -_site/en/lessons/fetch-and-parse-data-with-openrefine.html,1465,http://www.nltk.org/book/ is not an HTTPS link -_site/en/lessons/fetch-and-parse-data-with-openrefine.html,1481,http://web.archive.org/web/20150528125345/http://davidhuynh.net/spaces/nicar2011/tutorial.pdf is not an HTTPS link -_site/en/lessons/fetch-and-parse-data-with-openrefine.html,1484,http://text-processing.com/docs/index.html is not an HTTPS link -_site/en/lessons/fetch-and-parse-data-with-openrefine.html,1487,http://text-processing.com/demo/sentiment/ is not an HTTPS link _site/en/lessons/finding-places-world-historical-gazetteer.html,119,'a' tag is missing a reference _site/en/lessons/finding-places-world-historical-gazetteer.html,138,'a' tag is missing a reference _site/en/lessons/finding-places-world-historical-gazetteer.html,175,'a' tag is missing a reference @@ -715,120 +425,47 @@ _site/en/lessons/from-html-to-list-of-words-1.html,119,'a' tag is missing a refe _site/en/lessons/from-html-to-list-of-words-1.html,138,'a' tag is missing a reference _site/en/lessons/from-html-to-list-of-words-1.html,175,'a' tag is missing a reference _site/en/lessons/from-html-to-list-of-words-1.html,201,'a' tag is missing a reference -_site/en/lessons/from-html-to-list-of-words-1.html,517,http://www.oldbaileyonline.org/browse.jsp?id=t17800628-33&div=t17800628-33 is not an HTTPS link -_site/en/lessons/from-html-to-list-of-words-1.html,532,http://www.w3schools.com/html/ is not an HTTPS link _site/en/lessons/from-html-to-list-of-words-2.html,119,'a' tag is missing a reference _site/en/lessons/from-html-to-list-of-words-2.html,138,'a' tag is missing a reference _site/en/lessons/from-html-to-list-of-words-2.html,175,'a' tag is missing a reference _site/en/lessons/from-html-to-list-of-words-2.html,201,'a' tag is missing a reference -_site/en/lessons/from-html-to-list-of-words-2.html,725,http://docs.python.org/2.4/lib/typesnumeric.html is not an HTTPS link -_site/en/lessons/from-html-to-list-of-words-2.html,741,http://docs.python.org/3/library/types.html is not an HTTPS link _site/en/lessons/generating-an-ordered-data-set-from-an-OCR-text-file.html,117,'a' tag is missing a reference _site/en/lessons/generating-an-ordered-data-set-from-an-OCR-text-file.html,136,'a' tag is missing a reference _site/en/lessons/generating-an-ordered-data-set-from-an-OCR-text-file.html,173,'a' tag is missing a reference _site/en/lessons/generating-an-ordered-data-set-from-an-OCR-text-file.html,199,'a' tag is missing a reference -_site/en/lessons/generating-an-ordered-data-set-from-an-OCR-text-file.html,523,http://en.wikipedia.org/wiki/Regular_language is not an HTTPS link -_site/en/lessons/generating-an-ordered-data-set-from-an-OCR-text-file.html,529,http://www.worldcat.org/oclc/17591390 is not an HTTPS link -_site/en/lessons/generating-an-ordered-data-set-from-an-OCR-text-file.html,685,http://en.wikipedia.org/wiki/Levenshtein_distance is not an HTTPS link -_site/en/lessons/generating-an-ordered-data-set-from-an-OCR-text-file.html,1359,http://www.worldcat.org/oclc/41238508 is not an HTTPS link -_site/en/lessons/generating-an-ordered-data-set-from-an-OCR-text-file.html,1387,http://www.egenix.com/products/python/mxBase/mxDateTime/ is not an HTTPS link -_site/en/lessons/generating-an-ordered-data-set-from-an-OCR-text-file.html,1491,http://lxml.de/ is not an HTTPS link -_site/en/lessons/generating-an-ordered-data-set-from-an-OCR-text-file.html,1596,http://brat.nlplab.org is not an HTTPS link _site/en/lessons/geocoding-qgis.html,118,'a' tag is missing a reference _site/en/lessons/geocoding-qgis.html,137,'a' tag is missing a reference _site/en/lessons/geocoding-qgis.html,174,'a' tag is missing a reference _site/en/lessons/geocoding-qgis.html,200,'a' tag is missing a reference -_site/en/lessons/geocoding-qgis.html,517,http://www.qgis.org/en/site/ is not an HTTPS link -_site/en/lessons/geocoding-qgis.html,561,http://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.html is not an HTTPS link -_site/en/lessons/geocoding-qgis.html,563,http://www.british-history.ac.uk/alumni-oxon/1500-1714 is not an HTTPS link -_site/en/lessons/geocoding-qgis.html,640,http://www.qgistutorials.com/en/docs/3/working_with_projections.html is not an HTTPS link -_site/en/lessons/geocoding-qgis.html,643,http://www.county-borders.co.uk/ is not an HTTPS link -_site/en/lessons/geocoding-qgis.html,714,http://wiki.gis.com/wiki/index.php/Classification is not an HTTPS link -_site/en/lessons/geocoding-qgis.html,728,http://www.gazetteer.org.uk/index.php is not an HTTPS link -_site/en/lessons/geocoding-qgis.html,740,http://www.county-borders.co.uk/ is not an HTTPS link -_site/en/lessons/geocoding-qgis.html,797,http://michaelminn.com/linux/mmqgis/ is not an HTTPS link -_site/en/lessons/geocoding-qgis.html,910,http://www.w3schools.com/sql/sql_like.asp is not an HTTPS link -_site/en/lessons/geocoding-qgis.html,910,http://www.w3schools.com/sql/ is not an HTTPS link _site/en/lessons/geoparsing-text-with-edinburgh.html,117,'a' tag is missing a reference _site/en/lessons/geoparsing-text-with-edinburgh.html,136,'a' tag is missing a reference _site/en/lessons/geoparsing-text-with-edinburgh.html,173,'a' tag is missing a reference _site/en/lessons/geoparsing-text-with-edinburgh.html,199,'a' tag is missing a reference -_site/en/lessons/geoparsing-text-with-edinburgh.html,488,http://jekyll.inf.ed.ac.uk/geoparser.html is not an HTTPS link -_site/en/lessons/geoparsing-text-with-edinburgh.html,652,http://www.geonames.org/ is not an HTTPS link -_site/en/lessons/geoparsing-text-with-edinburgh.html,652,http://groups.inf.ed.ac.uk/geoparser/documentation/v1.3/html/gaz.html is not an HTTPS link -_site/en/lessons/geoparsing-text-with-edinburgh.html,659,http://groups.inf.ed.ac.uk/geoparser/documentation/v1.3/html/pipeline.html is not an HTTPS link -_site/en/lessons/geoparsing-text-with-edinburgh.html,766,http://boundingbox.klokantech.com is not an HTTPS link -_site/en/lessons/geoparsing-text-with-edinburgh.html,771,http://boundingbox.klokantech.com is not an HTTPS link -_site/en/lessons/geoparsing-text-with-edinburgh.html,887,http://groups.inf.ed.ac.uk/geoparser/scripts/run-multiple-files.sh is not an HTTPS link -_site/en/lessons/geoparsing-text-with-edinburgh.html,887,http://homepages.inf.ed.ac.uk/balex/publications/geoparser-workshop.pdf is not an HTTPS link -_site/en/lessons/geoparsing-text-with-edinburgh.html,982,http://palimpsest.blogs.edina.ac.uk/ is not an HTTPS link -_site/en/lessons/geoparsing-text-with-edinburgh.html,982,http://litlong.org/ is not an HTTPS link -_site/en/lessons/geoparsing-text-with-edinburgh.html,984,http://web.archive.org/web/20170722115758/http://englishplacenames.cerch.kcl.ac.uk/ is not an HTTPS link -_site/en/lessons/geoparsing-text-with-edinburgh.html,985,http://tradingconsequences.blogs.edina.ac.uk/ is not an HTTPS link -_site/en/lessons/geoparsing-text-with-edinburgh.html,986,http://www.lancaster.ac.uk/staff/gregoryi/ is not an HTTPS link -_site/en/lessons/geoparsing-text-with-edinburgh.html,989,http://nrabinowitz.github.io/gapvis/ is not an HTTPS link -_site/en/lessons/geoparsing-text-with-edinburgh.html,993,http://www.lrec-conf.org/proceedings/lrec2016/pdf/129_Paper.pdf is not an HTTPS link -_site/en/lessons/geoparsing-text-with-edinburgh.html,995,http://www.euppublishing.com/doi/pdfplus/10.3366/ijhac.2015.0136 is not an HTTPS link -_site/en/lessons/geoparsing-text-with-edinburgh.html,1003,http://homepages.inf.ed.ac.uk/grover/papers/PTRS-A-2010-Grover-3875-89.pdf is not an HTTPS link _site/en/lessons/georeferencing-qgis.html,121,'a' tag is missing a reference _site/en/lessons/georeferencing-qgis.html,140,'a' tag is missing a reference _site/en/lessons/georeferencing-qgis.html,177,'a' tag is missing a reference _site/en/lessons/georeferencing-qgis.html,203,'a' tag is missing a reference -_site/en/lessons/georeferencing-qgis.html,528,http://en.wikipedia.org/wiki/Rubbersheeting is not an HTTPS link -_site/en/lessons/georeferencing-qgis.html,578,http://en.wikipedia.org/wiki/Spatial_reference_system is not an HTTPS link -_site/en/lessons/georeferencing-qgis.html,605,http://www.gov.pe.ca/gis/license_agreement.php3?name=lot_town&file_format=SHP is not an HTTPS link -_site/en/lessons/georeferencing-qgis.html,775,http://books.google.ca/books?id=TqCNZYXWXAUC&dq=tilting&source=gbs_navlinks_s is not an HTTPS link -_site/en/lessons/georeferencing-qgis.html,943,http://en.wikipedia.org/wiki/World_file is not an HTTPS link -_site/en/lessons/georeferencing-qgis.html,950,http://en.wikipedia.org/wiki/Tagged_Image_File_Format is not an HTTPS link -_site/en/lessons/georeferencing-qgis.html,1129,http://geospatialhistorian.wordpress.com/ is not an HTTPS link _site/en/lessons/geospatial-data-analysis.html,120,'a' tag is missing a reference _site/en/lessons/geospatial-data-analysis.html,139,'a' tag is missing a reference _site/en/lessons/geospatial-data-analysis.html,176,'a' tag is missing a reference _site/en/lessons/geospatial-data-analysis.html,202,'a' tag is missing a reference -_site/en/lessons/geospatial-data-analysis.html,495,http://www.ats.ucla.edu/stat/r/default.htm is not an HTTPS link -_site/en/lessons/geospatial-data-analysis.html,570,http://geoservices.tamu.edu/Services/Geocode/ is not an HTTPS link -_site/en/lessons/geospatial-data-analysis.html,616,http://r4ds.had.co.nz/transform.html is not an HTTPS link -_site/en/lessons/geospatial-data-analysis.html,632,http://web.archive.org/web/20190922234254/http://strimas.com/r/tidy-sf/ is not an HTTPS link -_site/en/lessons/geospatial-data-analysis.html,722,http://www.theanalysisfactor.com/regression-models-for-count-data/ is not an HTTPS link -_site/en/lessons/geospatial-data-analysis.html,766,http://web.archive.org/web/20200225021219/https://www.nceas.ucsb.edu/~frazier/RSpatialGuides/OverviewCoordinateReferenceSystems.pdf is not an HTTPS link -_site/en/lessons/geospatial-data-analysis.html,772,http://www.statisticshowto.com/probability-and-statistics/normal-distributions/ is not an HTTPS link -_site/en/lessons/geospatial-data-analysis.html,775,http://www.sciencedirect.com/science/article/pii/S0031405608000073 is not an HTTPS link -_site/en/lessons/geospatial-data-analysis.html,775,http://www.biostathandbook.com/transformation.html is not an HTTPS link _site/en/lessons/getting-started-with-markdown.html,117,'a' tag is missing a reference _site/en/lessons/getting-started-with-markdown.html,136,'a' tag is missing a reference _site/en/lessons/getting-started-with-markdown.html,173,'a' tag is missing a reference _site/en/lessons/getting-started-with-markdown.html,199,'a' tag is missing a reference -_site/en/lessons/getting-started-with-markdown.html,528,http://daringfireball.net/projects/markdown/ is not an HTTPS link -_site/en/lessons/getting-started-with-markdown.html,532,http://github.com is not an HTTPS link _site/en/lessons/getting-started-with-markdown.html,727,http://programminghistorian.org/ is not an HTTPS link _site/en/lessons/getting-started-with-mysql-using-r.html,117,'a' tag is missing a reference _site/en/lessons/getting-started-with-mysql-using-r.html,136,'a' tag is missing a reference _site/en/lessons/getting-started-with-mysql-using-r.html,173,'a' tag is missing a reference _site/en/lessons/getting-started-with-mysql-using-r.html,199,'a' tag is missing a reference -_site/en/lessons/getting-started-with-mysql-using-r.html,482,http://www.jason-french.com/blog/2014/07/03/using-r-with-mysql-databases/ is not an HTTPS link -_site/en/lessons/getting-started-with-mysql-using-r.html,595,http://newspapers.library.wales is not an HTTPS link -_site/en/lessons/getting-started-with-mysql-using-r.html,848,http://dev.mysql.com/downloads/workbench/ is not an HTTPS link -_site/en/lessons/getting-started-with-mysql-using-r.html,1147,http://newspapers.library.wales/view/4121281/4121288/94/ is not an HTTPS link -_site/en/lessons/getting-started-with-mysql-using-r.html,1686,http://www.jeffblackadar.ca/graham_fellowship/corpus_entities_equity/ is not an HTTPS link -_site/en/lessons/getting-started-with-mysql-using-r.html,1689,http://web.archive.org/web/20171228130133/https://www.ntu.edu.sg/home/ehchua/programming/sql/MySQL_Beginner.html is not an HTTPS link -_site/en/lessons/getting-started-with-mysql-using-r.html,1696,http://grahamresearchfellow.org/ is not an HTTPS link -_site/en/lessons/getting-started-with-mysql-using-r.html,1710,http://www.jason-french.com/blog/2014/07/03/using-r-with-mysql-databases/ is not an HTTPS link _site/en/lessons/googlemaps-googleearth.html,121,'a' tag is missing a reference _site/en/lessons/googlemaps-googleearth.html,140,'a' tag is missing a reference _site/en/lessons/googlemaps-googleearth.html,177,'a' tag is missing a reference _site/en/lessons/googlemaps-googleearth.html,203,'a' tag is missing a reference -_site/en/lessons/googlemaps-googleearth.html,1181,http://www.davidrumsey.com/ is not an HTTPS link -_site/en/lessons/googlemaps-googleearth.html,1444,http://niche-canada.org/2011/12/14/mobile-mapping-and-historical-gis-in-the-field/ is not an HTTPS link -_site/en/lessons/googlemaps-googleearth.html,1593,http://geospatialhistorian.wordpress.com/ is not an HTTPS link _site/en/lessons/gravity-model.html,133,'a' tag is missing a reference _site/en/lessons/gravity-model.html,152,'a' tag is missing a reference _site/en/lessons/gravity-model.html,189,'a' tag is missing a reference _site/en/lessons/gravity-model.html,215,'a' tag is missing a reference -_site/en/lessons/gravity-model.html,637,http://doi.org/10.5334/johd.1 is not an HTTPS link -_site/en/lessons/gravity-model.html,661,http://www.migrants.adamcrymble.org/the-project/ is not an HTTPS link -_site/en/lessons/gravity-model.html,664,http://doi.org/10.5334/johd.1 is not an HTTPS link -_site/en/lessons/gravity-model.html,842,http://www.statisticshowto.com/population-mean/ is not an HTTPS link -_site/en/lessons/gravity-model.html,1489,http://www.mathematica-journal.com/2013/06/negative-binomial-regression/ is not an HTTPS link _site/en/lessons/image-classification-neural-networks.html,117,'a' tag is missing a reference _site/en/lessons/image-classification-neural-networks.html,136,'a' tag is missing a reference _site/en/lessons/image-classification-neural-networks.html,173,'a' tag is missing a reference @@ -841,19 +478,10 @@ _site/en/lessons/installing-omeka.html,117,'a' tag is missing a reference _site/en/lessons/installing-omeka.html,136,'a' tag is missing a reference _site/en/lessons/installing-omeka.html,173,'a' tag is missing a reference _site/en/lessons/installing-omeka.html,199,'a' tag is missing a reference -_site/en/lessons/installing-omeka.html,469,http://omeka.net is not an HTTPS link -_site/en/lessons/installing-omeka.html,484,http://aws.amazon.com/free/ is not an HTTPS link -_site/en/lessons/installing-omeka.html,484,http://www.hostgator.com/ is not an HTTPS link -_site/en/lessons/installing-omeka.html,484,http://www.dreamhost.com is not an HTTPS link -_site/en/lessons/installing-omeka.html,486,http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-launch-instance_linux.html is not an HTTPS link -_site/en/lessons/installing-omeka.html,492,http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/AccessingInstancesLinux.html is not an HTTPS link -_site/en/lessons/installing-omeka.html,500,http://support.hostgator.com/articles/hosting-guide/lets-get-started/how-do-i-get-and-use-ssh-access is not an HTTPS link _site/en/lessons/installing-python-modules-pip.html,117,'a' tag is missing a reference _site/en/lessons/installing-python-modules-pip.html,136,'a' tag is missing a reference _site/en/lessons/installing-python-modules-pip.html,173,'a' tag is missing a reference _site/en/lessons/installing-python-modules-pip.html,199,'a' tag is missing a reference -_site/en/lessons/installing-python-modules-pip.html,531,http://www.thegeekstuff.com/2012/04/curl-examples/ is not an HTTPS link -_site/en/lessons/installing-python-modules-pip.html,578,http://stackoverflow.com/questions/4750806/how-to-install-pip-on-windows is not an HTTPS link _site/en/lessons/interactive-data-visualization-dashboard.html,117,'a' tag is missing a reference _site/en/lessons/interactive-data-visualization-dashboard.html,136,'a' tag is missing a reference _site/en/lessons/interactive-data-visualization-dashboard.html,173,'a' tag is missing a reference @@ -862,9 +490,6 @@ _site/en/lessons/interactive-text-games-using-twine.html,117,'a' tag is missing _site/en/lessons/interactive-text-games-using-twine.html,136,'a' tag is missing a reference _site/en/lessons/interactive-text-games-using-twine.html,173,'a' tag is missing a reference _site/en/lessons/interactive-text-games-using-twine.html,199,'a' tag is missing a reference -_site/en/lessons/interactive-text-games-using-twine.html,576,http://www.depressionquest.com/ is not an HTTPS link -_site/en/lessons/interactive-text-games-using-twine.html,643,http://twinery.org/ is not an HTTPS link -_site/en/lessons/interactive-text-games-using-twine.html,1076,http://www.depressionquest.com/ is not an HTTPS link _site/en/lessons/interactive-visualization-with-plotly.html,117,'a' tag is missing a reference _site/en/lessons/interactive-visualization-with-plotly.html,136,'a' tag is missing a reference _site/en/lessons/interactive-visualization-with-plotly.html,173,'a' tag is missing a reference @@ -877,40 +502,12 @@ _site/en/lessons/intro-to-bash.html,119,'a' tag is missing a reference _site/en/lessons/intro-to-bash.html,138,'a' tag is missing a reference _site/en/lessons/intro-to-bash.html,175,'a' tag is missing a reference _site/en/lessons/intro-to-bash.html,201,'a' tag is missing a reference -_site/en/lessons/intro-to-bash.html,528,http://www.tldp.org/LDP/Bash-Beginners-Guide/html/chap_01.html is not an HTTPS link -_site/en/lessons/intro-to-bash.html,530,http://en.wikipedia.org/wiki/Syntax is not an HTTPS link -_site/en/lessons/intro-to-bash.html,532,http://en.wikipedia.org/wiki/Unix_shell is not an HTTPS link -_site/en/lessons/intro-to-bash.html,532,http://en.wikipedia.org/wiki/Unix is not an HTTPS link -_site/en/lessons/intro-to-bash.html,578,http://ethanschoonover.com/solarized is not an HTTPS link -_site/en/lessons/intro-to-bash.html,714,http://en.wikipedia.org/wiki/Tree_structure is not an HTTPS link -_site/en/lessons/intro-to-bash.html,738,http://www.viemu.com/a-why-vi-vim.html is not an HTTPS link -_site/en/lessons/intro-to-bash.html,750,http://www.gutenberg.org/ebooks/2600 is not an HTTPS link -_site/en/lessons/intro-to-bash.html,824,http://en.wikipedia.org/wiki/Vim_%28text_editor%29 is not an HTTPS link -_site/en/lessons/intro-to-bash.html,848,http://vimdoc.sourceforge.net/htmldoc/quickref.html is not an HTTPS link _site/en/lessons/intro-to-linked-data.html,118,'a' tag is missing a reference _site/en/lessons/intro-to-linked-data.html,137,'a' tag is missing a reference _site/en/lessons/intro-to-linked-data.html,174,'a' tag is missing a reference _site/en/lessons/intro-to-linked-data.html,200,'a' tag is missing a reference -_site/en/lessons/intro-to-linked-data.html,531,http://linkeddatacatalog.dws.informatik.uni-mannheim.de/state/ is not an HTTPS link -_site/en/lessons/intro-to-linked-data.html,560,http://www.oxforddnb.com is not an HTTPS link -_site/en/lessons/intro-to-linked-data.html,572,http://www.geonames.org/ is not an HTTPS link -_site/en/lessons/intro-to-linked-data.html,673,http://www.history.ac.uk/projects/digital/tobias is not an HTTPS link -_site/en/lessons/intro-to-linked-data.html,707,http://semanticweb.org/wiki/Main_Page.html is not an HTTPS link -_site/en/lessons/intro-to-linked-data.html,709,http://web.archive.org/web/20170715094229/http://www.musicontology.com/ is not an HTTPS link -_site/en/lessons/intro-to-linked-data.html,709,http://web.archive.org/web/20170718143925/http://musicontology.com/docs/getting-started.html is not an HTTPS link -_site/en/lessons/intro-to-linked-data.html,827,http://web.archive.org/web/20170718143925/http://musicontology.com/docs/getting-started.html is not an HTTPS link -_site/en/lessons/intro-to-linked-data.html,862,http://www.history.ac.uk/projects/digital/tobias is not an HTTPS link -_site/en/lessons/intro-to-linked-data.html,883,http://www.easyrdf.org/converter is not an HTTPS link -_site/en/lessons/intro-to-linked-data.html,895,http://dbpedia.org/snorql/ is not an HTTPS link -_site/en/lessons/intro-to-linked-data.html,995,http://dbpedia.org/class/yago/WikicatBritishHistorians is not an HTTPS link -_site/en/lessons/intro-to-linked-data.html,995,http://dbpedia.org/class/yago/WikicatWomenHistorians is not an HTTPS link _site/en/lessons/intro-to-linked-data.html,1024,'a' tag is missing a reference _site/en/lessons/intro-to-linked-data.html,1024,'a' tag is missing a reference -_site/en/lessons/intro-to-linked-data.html,1034,http://www.snee.com/bobdc.blog/ is not an HTTPS link -_site/en/lessons/intro-to-linked-data.html,1038,http://linkeddata.org/guides-and-tutorials is not an HTTPS link -_site/en/lessons/intro-to-linked-data.html,1040,http://linkeddatacatalog.dws.informatik.uni-mannheim.de/state/ is not an HTTPS link -_site/en/lessons/intro-to-linked-data.html,1046,http://www.history.ac.uk/projects/digital/tobias is not an HTTPS link -_site/en/lessons/intro-to-linked-data.html,1046,http://www.ahrc.ac.uk/ is not an HTTPS link _site/en/lessons/intro-to-powershell.html,117,'a' tag is missing a reference _site/en/lessons/intro-to-powershell.html,136,'a' tag is missing a reference _site/en/lessons/intro-to-powershell.html,173,'a' tag is missing a reference @@ -919,32 +516,10 @@ _site/en/lessons/intro-to-twitterbots.html,121,'a' tag is missing a reference _site/en/lessons/intro-to-twitterbots.html,140,'a' tag is missing a reference _site/en/lessons/intro-to-twitterbots.html,177,'a' tag is missing a reference _site/en/lessons/intro-to-twitterbots.html,203,'a' tag is missing a reference -_site/en/lessons/intro-to-twitterbots.html,518,http://tracery.io is not an HTTPS link -_site/en/lessons/intro-to-twitterbots.html,518,http://cheapbotsdonequick.com/ is not an HTTPS link -_site/en/lessons/intro-to-twitterbots.html,521,http://www.sciencedirect.com/science/article/pii/S0747563213003129 is not an HTTPS link -_site/en/lessons/intro-to-twitterbots.html,521,http://www.docnow.io/ is not an HTTPS link -_site/en/lessons/intro-to-twitterbots.html,589,http://json.org/ is not an HTTPS link -_site/en/lessons/intro-to-twitterbots.html,593,http://twitter.com/tinyarchae is not an HTTPS link -_site/en/lessons/intro-to-twitterbots.html,593,http://web.archive.org/web/20180131161516/https://publicarchaeologyconference.wordpress.com/ is not an HTTPS link -_site/en/lessons/intro-to-twitterbots.html,603,http://tracery.io is not an HTTPS link -_site/en/lessons/intro-to-twitterbots.html,769,http://apps.twitter.com is not an HTTPS link -_site/en/lessons/intro-to-twitterbots.html,771,http://cheapbotsdonequick.com/ is not an HTTPS link -_site/en/lessons/intro-to-twitterbots.html,821,http://tinysubversions.com/2013/03/basic-twitter-bot-etiquette/ is not an HTTPS link -_site/en/lessons/intro-to-twitterbots.html,845,http://unicode.org/emoji/charts/full-emoji-list.html is not an HTTPS link -_site/en/lessons/intro-to-twitterbots.html,868,http://www.crystalcodepalace.com/traceryTut.html is not an HTTPS link -_site/en/lessons/intro-to-twitterbots.html,884,http://cheapbotsdonequick.com/ is not an HTTPS link -_site/en/lessons/intro-to-twitterbots.html,923,http://cheapbotsdonequick.com/source/softlandscapes is not an HTTPS link -_site/en/lessons/intro-to-twitterbots.html,942,http://www.codingblocks.net/videos/generating-music-in-javascript/ is not an HTTPS link -_site/en/lessons/intro-to-twitterbots.html,947,http://www.zachwhalen.net/posts/how-to-make-a-twitter-bot-with-google-spreadsheets-version-04/ is not an HTTPS link _site/en/lessons/introduction-and-installation.html,119,'a' tag is missing a reference _site/en/lessons/introduction-and-installation.html,138,'a' tag is missing a reference _site/en/lessons/introduction-and-installation.html,175,'a' tag is missing a reference _site/en/lessons/introduction-and-installation.html,201,'a' tag is missing a reference -_site/en/lessons/introduction-and-installation.html,513,http://www.python.org/ is not an HTTPS link -_site/en/lessons/introduction-and-installation.html,514,http://www.crummy.com/software/BeautifulSoup/ is not an HTTPS link -_site/en/lessons/introduction-and-installation.html,515,http://www.activestate.com/komodo-edit is not an HTTPS link -_site/en/lessons/introduction-and-installation.html,517,http://wiki.python.org/moin/PythonEditors/ is not an HTTPS link -_site/en/lessons/introduction-and-installation.html,526,http://sebastianraschka.com/Articles/2014_python_2_3_key_diff.html is not an HTTPS link _site/en/lessons/introduction-map-warper.html,121,'a' tag is missing a reference _site/en/lessons/introduction-map-warper.html,140,'a' tag is missing a reference _site/en/lessons/introduction-map-warper.html,177,'a' tag is missing a reference @@ -953,46 +528,18 @@ _site/en/lessons/introduction-to-ffmpeg.html,117,'a' tag is missing a reference _site/en/lessons/introduction-to-ffmpeg.html,136,'a' tag is missing a reference _site/en/lessons/introduction-to-ffmpeg.html,173,'a' tag is missing a reference _site/en/lessons/introduction-to-ffmpeg.html,199,'a' tag is missing a reference -_site/en/lessons/introduction-to-ffmpeg.html,574,http://linuxbrew.sh/ is not an HTTPS link -_site/en/lessons/introduction-to-ffmpeg.html,621,http://linuxbrew.sh/ is not an HTTPS link -_site/en/lessons/introduction-to-ffmpeg.html,1081,http://tldp.org/LDP/GNU-Linux-Tools-Summary/html/x11655.htm is not an HTTPS link -_site/en/lessons/introduction-to-ffmpeg.html,1082,http://tldp.org/HOWTO/Bash-Prog-Intro-HOWTO-5.html is not an HTTPS link _site/en/lessons/introduction-to-populating-a-website-with-api-data.html,120,'a' tag is missing a reference _site/en/lessons/introduction-to-populating-a-website-with-api-data.html,139,'a' tag is missing a reference _site/en/lessons/introduction-to-populating-a-website-with-api-data.html,176,'a' tag is missing a reference _site/en/lessons/introduction-to-populating-a-website-with-api-data.html,202,'a' tag is missing a reference -_site/en/lessons/introduction-to-populating-a-website-with-api-data.html,512,http://php.net/ is not an HTTPS link -_site/en/lessons/introduction-to-populating-a-website-with-api-data.html,636,http://jsonviewer.stack.hu/ is not an HTTPS link -_site/en/lessons/introduction-to-populating-a-website-with-api-data.html,661,http://json.org/ is not an HTTPS link -_site/en/lessons/introduction-to-populating-a-website-with-api-data.html,717,http://uurl.kbr.be/1017835 is not an HTTPS link -_site/en/lessons/introduction-to-populating-a-website-with-api-data.html,732,http://www.europeana.eu/portal/record/90402/RP_P_OB_84_508.html is not an HTTPS link -_site/en/lessons/introduction-to-populating-a-website-with-api-data.html,741,http://dublincore.org/documents/dcmi-terms/ is not an HTTPS link -_site/en/lessons/introduction-to-populating-a-website-with-api-data.html,821,http://localhost/dashboard is not an HTTPS link -_site/en/lessons/introduction-to-populating-a-website-with-api-data.html,859,http://localhost/helloworld.php is not an HTTPS link -_site/en/lessons/introduction-to-populating-a-website-with-api-data.html,1245,http://developer.nytimes.com/ is not an HTTPS link -_site/en/lessons/introduction-to-populating-a-website-with-api-data.html,1248,http://www.geonames.org/export/web-services.html is not an HTTPS link -_site/en/lessons/introduction-to-populating-a-website-with-api-data.html,1251,http://museum-api.pbworks.com/w/page/21933420/Museum%C2%A0APIs is not an HTTPS link _site/en/lessons/introduction-to-stylometry-with-python.html,120,'a' tag is missing a reference _site/en/lessons/introduction-to-stylometry-with-python.html,139,'a' tag is missing a reference _site/en/lessons/introduction-to-stylometry-with-python.html,176,'a' tag is missing a reference _site/en/lessons/introduction-to-stylometry-with-python.html,202,'a' tag is missing a reference -_site/en/lessons/introduction-to-stylometry-with-python.html,568,http://www.gutenberg.org/cache/epub/1404/pg1404.txt is not an HTTPS link -_site/en/lessons/introduction-to-stylometry-with-python.html,584,http://www.nltk.org/book/ is not an HTTPS link -_site/en/lessons/introduction-to-stylometry-with-python.html,588,http://www.nltk.org/book/ is not an HTTPS link -_site/en/lessons/introduction-to-stylometry-with-python.html,724,http://jupyter.org/ is not an HTTPS link -_site/en/lessons/introduction-to-stylometry-with-python.html,724,http://jupyterlab.readthedocs.io/en/stable/getting_started/installation.html is not an HTTPS link -_site/en/lessons/introduction-to-stylometry-with-python.html,935,http://www.cis.uni-muenchen.de/~schmid/tools/TreeTagger/ is not an HTTPS link _site/en/lessons/json-and-jq.html,117,'a' tag is missing a reference _site/en/lessons/json-and-jq.html,136,'a' tag is missing a reference _site/en/lessons/json-and-jq.html,173,'a' tag is missing a reference _site/en/lessons/json-and-jq.html,199,'a' tag is missing a reference -_site/en/lessons/json-and-jq.html,520,http://www.json.org/ is not an HTTPS link -_site/en/lessons/json-and-jq.html,731,http://stackoverflow.com/questions/3135325/why-do-vector-indices-in-r-start-with-1-instead-of-0 is not an HTTPS link -_site/en/lessons/json-and-jq.html,731,http://skillcrush.com/2013/01/17/why-programmers-start-counting-at-zero/ is not an HTTPS link -_site/en/lessons/json-and-jq.html,1416,http://brew.sh/ is not an HTTPS link -_site/en/lessons/json-and-jq.html,1467,http://stackoverflow.com/questions/tagged/jq is not an HTTPS link -_site/en/lessons/json-and-jq.html,1468,http://stackoverflow.com/help/how-to-ask is not an HTTPS link -_site/en/lessons/json-and-jq.html,1468,http://stackoverflow.com/help/mcve is not an HTTPS link _site/en/lessons/jupyter-notebooks.html,121,'a' tag is missing a reference _site/en/lessons/jupyter-notebooks.html,140,'a' tag is missing a reference _site/en/lessons/jupyter-notebooks.html,177,'a' tag is missing a reference @@ -1015,14 +562,10 @@ _site/en/lessons/logistic-regression.html,133,'a' tag is missing a reference _site/en/lessons/logistic-regression.html,152,'a' tag is missing a reference _site/en/lessons/logistic-regression.html,189,'a' tag is missing a reference _site/en/lessons/logistic-regression.html,215,'a' tag is missing a reference -_site/en/lessons/logistic-regression.html,1993,http://scikit-learn.org/stable/install.html is not an HTTPS link _site/en/lessons/mac-installation.html,119,'a' tag is missing a reference _site/en/lessons/mac-installation.html,138,'a' tag is missing a reference _site/en/lessons/mac-installation.html,175,'a' tag is missing a reference _site/en/lessons/mac-installation.html,201,'a' tag is missing a reference -_site/en/lessons/mac-installation.html,498,http://support.apple.com/kb/ht1427 is not an HTTPS link -_site/en/lessons/mac-installation.html,504,http://www.python.org/ is not an HTTPS link -_site/en/lessons/mac-installation.html,517,http://wiki.python.org/moin/PythonEditors/ is not an HTTPS link _site/en/lessons/manipulating-strings-in-python.html,119,'a' tag is missing a reference _site/en/lessons/manipulating-strings-in-python.html,138,'a' tag is missing a reference _site/en/lessons/manipulating-strings-in-python.html,175,'a' tag is missing a reference @@ -1031,51 +574,14 @@ _site/en/lessons/mapping-with-python-leaflet.html,117,'a' tag is missing a refer _site/en/lessons/mapping-with-python-leaflet.html,136,'a' tag is missing a reference _site/en/lessons/mapping-with-python-leaflet.html,173,'a' tag is missing a reference _site/en/lessons/mapping-with-python-leaflet.html,199,'a' tag is missing a reference -_site/en/lessons/mapping-with-python-leaflet.html,511,http://pip.readthedocs.org/en/stable/ is not an HTTPS link -_site/en/lessons/mapping-with-python-leaflet.html,511,http://pandas.pydata.org/pandas-docs/stable/dsintro.html#dataframe is not an HTTPS link -_site/en/lessons/mapping-with-python-leaflet.html,512,http://leafletjs.com/ is not an HTTPS link -_site/en/lessons/mapping-with-python-leaflet.html,513,http://geojson.io/ is not an HTTPS link -_site/en/lessons/mapping-with-python-leaflet.html,522,http://www.barebones.com/products/textwrangler/ is not an HTTPS link -_site/en/lessons/mapping-with-python-leaflet.html,522,http://www.sublimetext.com/ is not an HTTPS link -_site/en/lessons/mapping-with-python-leaflet.html,524,http://docs.python-guide.org/en/latest/dev/virtualenvs/ is not an HTTPS link -_site/en/lessons/mapping-with-python-leaflet.html,535,http://data.london.gov.uk/dataset/historic-census-population is not an HTTPS link -_site/en/lessons/mapping-with-python-leaflet.html,555,http://pandas.pydata.org/pandas-docs/stable/dsintro.html#dataframe is not an HTTPS link -_site/en/lessons/mapping-with-python-leaflet.html,557,http://pip.readthedocs.org/en/stable/ is not an HTTPS link -_site/en/lessons/mapping-with-python-leaflet.html,561,http://pip.readthedocs.org/en/stable/installing/ is not an HTTPS link -_site/en/lessons/mapping-with-python-leaflet.html,573,http://pandas.pydata.org/pandas-docs/stable/install.html#dependencies is not an HTTPS link -_site/en/lessons/mapping-with-python-leaflet.html,611,http://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_csv.html is not an HTTPS link -_site/en/lessons/mapping-with-python-leaflet.html,661,http://geopy.readthedocs.org/ is not an HTTPS link -_site/en/lessons/mapping-with-python-leaflet.html,682,http://geopy.readthedocs.io/en/latest/#module-geopy.geocoders is not an HTTPS link -_site/en/lessons/mapping-with-python-leaflet.html,682,http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.apply.html is not an HTTPS link -_site/en/lessons/mapping-with-python-leaflet.html,792,http://geojson.io is not an HTTPS link -_site/en/lessons/mapping-with-python-leaflet.html,896,http://geojson.io is not an HTTPS link -_site/en/lessons/mapping-with-python-leaflet.html,1185,http://leafletjs.com/SlavaUkraini/reference-1.2.0.html#geojson-oneachfeature is not an HTTPS link _site/en/lessons/naive-bayesian.html,117,'a' tag is missing a reference _site/en/lessons/naive-bayesian.html,136,'a' tag is missing a reference _site/en/lessons/naive-bayesian.html,173,'a' tag is missing a reference _site/en/lessons/naive-bayesian.html,199,'a' tag is missing a reference -_site/en/lessons/naive-bayesian.html,493,http://digitalhistoryhacks.blogspot.com/2008/05/naive-bayesian-in-old-bailey-part-1.html is not an HTTPS link -_site/en/lessons/naive-bayesian.html,501,http://www.oldbaileyonline.org/ is not an HTTPS link -_site/en/lessons/naive-bayesian.html,557,http://www.crummy.com/software/BeautifulSoup/ is not an HTTPS link -_site/en/lessons/naive-bayesian.html,562,http://www.oldbaileyonline.org/ is not an HTTPS link -_site/en/lessons/naive-bayesian.html,581,http://www.oldbaileyonline.org/forms/formMain.jsp is not an HTTPS link -_site/en/lessons/naive-bayesian.html,593,http://en.wikipedia.org/wiki/Statistical_classification is not an HTTPS link -_site/en/lessons/naive-bayesian.html,593,http://home.deib.polimi.it/matteucc/Clustering/tutorial_html/ is not an HTTPS link -_site/en/lessons/naive-bayesian.html,616,http://www.paulgraham.com/spam.html is not an HTTPS link -_site/en/lessons/naive-bayesian.html,696,http://www.yudkowsky.net/rational/bayes is not an HTTPS link -_site/en/lessons/naive-bayesian.html,897,http://betterexplained.com/articles/using-logs-in-the-real-world/ is not an HTTPS link -_site/en/lessons/naive-bayesian.html,915,http://support.sas.com/documentation/cdl/en/statug/63033/HTML/default/viewer.htm#statug_introbayes_sect004.htm is not an HTTPS link -_site/en/lessons/naive-bayesian.html,1082,http://www.oldbaileyonline.org/static/DocAPI.jsp is not an HTTPS link -_site/en/lessons/naive-bayesian.html,1098,http://www.oldbaileyonline.org/forms/formMain.jsp is not an HTTPS link -_site/en/lessons/naive-bayesian.html,1310,http://pypy.org/ is not an HTTPS link -_site/en/lessons/naive-bayesian.html,2014,http://snowball.tartarus.org/ is not an HTTPS link -_site/en/lessons/naive-bayesian.html,2024,http://stevenloria.com/finding-important-words-in-a-document-using-tf-idf/ is not an HTTPS link _site/en/lessons/normalizing-data.html,119,'a' tag is missing a reference _site/en/lessons/normalizing-data.html,138,'a' tag is missing a reference _site/en/lessons/normalizing-data.html,175,'a' tag is missing a reference _site/en/lessons/normalizing-data.html,201,'a' tag is missing a reference -_site/en/lessons/normalizing-data.html,538,http://www.oldbaileyonline.org/browse.jsp?id=t17800628-33&div=t17800628-33 is not an HTTPS link -_site/en/lessons/normalizing-data.html,724,http://unicode.org/ is not an HTTPS link _site/en/lessons/ocr-with-google-vision-and-tesseract.html,117,'a' tag is missing a reference _site/en/lessons/ocr-with-google-vision-and-tesseract.html,136,'a' tag is missing a reference _site/en/lessons/ocr-with-google-vision-and-tesseract.html,173,'a' tag is missing a reference @@ -1092,58 +598,22 @@ _site/en/lessons/preserving-your-research-data.html,117,'a' tag is missing a ref _site/en/lessons/preserving-your-research-data.html,136,'a' tag is missing a reference _site/en/lessons/preserving-your-research-data.html,173,'a' tag is missing a reference _site/en/lessons/preserving-your-research-data.html,199,'a' tag is missing a reference -_site/en/lessons/preserving-your-research-data.html,580,http://en.wikipedia.org/wiki/PRINCE2 is not an HTTPS link -_site/en/lessons/preserving-your-research-data.html,621,http://en.wikipedia.org/wiki/Cross-platform is not an HTTPS link -_site/en/lessons/preserving-your-research-data.html,644,http://en.wikipedia.org/wiki/Markdown is not an HTTPS link -_site/en/lessons/preserving-your-research-data.html,648,http://notepad-plus-plus.org/ is not an HTTPS link -_site/en/lessons/preserving-your-research-data.html,650,http://komodoide.com/komodo-edit/ is not an HTTPS link -_site/en/lessons/preserving-your-research-data.html,699,http://cradledincaricature.com/2014/02/06/comic-art-beyond-the-print-shop/ is not an HTTPS link -_site/en/lessons/preserving-your-research-data.html,709,http://www.theguardian.com/uk-news/2014/feb/20/rebekah-brooks-rupert-murdoch-phone-hacking-trial is not an HTTPS link -_site/en/lessons/preserving-your-research-data.html,719,http://www.cartoons.ac.uk/record/SBD0931 is not an HTTPS link -_site/en/lessons/preserving-your-research-data.html,726,http://www.oldbaileyonline.org/browse.jsp?ref=OA16780417 is not an HTTPS link -_site/en/lessons/preserving-your-research-data.html,901,http://historyonics.blogspot.co.uk/2014/01/judging-book-by-its-url.html is not an HTTPS link -_site/en/lessons/preserving-your-research-data.html,905,http://earlymodernnotes.wordpress.com/2013/05/18/unclean-unclean-what-historians-can-do-about-sharing-our-messy-research-data/ is not an HTTPS link -_site/en/lessons/preserving-your-research-data.html,918,http://britishlibrary.typepad.co.uk/collectioncare/2013/09/the-twelve-principles-of-digital-preservation.html is not an HTTPS link -_site/en/lessons/preserving-your-research-data.html,927,http://data-archive.ac.uk/create-manage/document is not an HTTPS link _site/en/lessons/qgis-layers.html,121,'a' tag is missing a reference _site/en/lessons/qgis-layers.html,140,'a' tag is missing a reference _site/en/lessons/qgis-layers.html,177,'a' tag is missing a reference _site/en/lessons/qgis-layers.html,203,'a' tag is missing a reference -_site/en/lessons/qgis-layers.html,529,http://qgis.org/en/site/forusers/download.html is not an HTTPS link -_site/en/lessons/qgis-layers.html,539,http://www.kyngchaos.com/software/qgis is not an HTTPS link -_site/en/lessons/qgis-layers.html,541,http://www.kyngchaos.com/software/archive is not an HTTPS link -_site/en/lessons/qgis-layers.html,609,http://www.gov.pe.ca/gis/download.php3?name=coastline&file_format=SHP is not an HTTPS link -_site/en/lessons/qgis-layers.html,610,http://www.gov.pe.ca/gis/download.php3?name=lot_town&file_format=SHP is not an HTTPS link -_site/en/lessons/qgis-layers.html,611,http://www.gov.pe.ca/gis/download.php3?name=hydronetwork&file_format=SHP is not an HTTPS link -_site/en/lessons/qgis-layers.html,612,http://www.gov.pe.ca/gis/download.php3?name=forest_35&file_format=SHP is not an HTTPS link -_site/en/lessons/qgis-layers.html,613,http://www.gov.pe.ca/gis/download.php3?name=nat_parks&file_format=SHP is not an HTTPS link -_site/en/lessons/qgis-layers.html,634,http://en.wikipedia.org/wiki/Spatial_reference_system is not an HTTPS link -_site/en/lessons/qgis-layers.html,642,http://www.gov.pe.ca/gis/index.php3?number=77865&lang=E is not an HTTPS link -_site/en/lessons/qgis-layers.html,737,http://web.archive.org/web/20180807132308/http://qgis.spatialthoughts.com/2012/04/tutorial-working-with-projections-in.html is not an HTTPS link -_site/en/lessons/qgis-layers.html,768,http://www.gislounge.com/geodatabases-explored-vector-and-raster-data/ is not an HTTPS link -_site/en/lessons/qgis-layers.html,1312,http://en.wikipedia.org/wiki/Orthophoto is not an HTTPS link -_site/en/lessons/qgis-layers.html,1463,http://geospatialhistorian.wordpress.com/ is not an HTTPS link _site/en/lessons/r-basics-with-tabular-data.html,117,'a' tag is missing a reference _site/en/lessons/r-basics-with-tabular-data.html,136,'a' tag is missing a reference _site/en/lessons/r-basics-with-tabular-data.html,173,'a' tag is missing a reference _site/en/lessons/r-basics-with-tabular-data.html,199,'a' tag is missing a reference -_site/en/lessons/r-basics-with-tabular-data.html,1016,http://web.archive.org/web/20191015004305/https://www.nceas.ucsb.edu/files/scicomp/Dloads/RProgramming/BestFirstRTutorial.pdf is not an HTTPS link -_site/en/lessons/r-basics-with-tabular-data.html,1020,http://dh-r.lincolnmullen.com/ is not an HTTPS link _site/en/lessons/research-data-with-unix.html,119,'a' tag is missing a reference _site/en/lessons/research-data-with-unix.html,138,'a' tag is missing a reference _site/en/lessons/research-data-with-unix.html,175,'a' tag is missing a reference _site/en/lessons/research-data-with-unix.html,201,'a' tag is missing a reference -_site/en/lessons/research-data-with-unix.html,506,http://msysgit.github.io/ is not an HTTPS link -_site/en/lessons/research-data-with-unix.html,514,http://www.7-zip.org/ is not an HTTPS link -_site/en/lessons/research-data-with-unix.html,536,http://en.wikipedia.org/wiki/Tab-separated_values is not an HTTPS link -_site/en/lessons/research-data-with-unix.html,538,http://en.wikipedia.org/wiki/Comma-separated_values is not an HTTPS link -_site/en/lessons/research-data-with-unix.html,597,http://williamjturkel.net/2013/06/15/basic-text-analysis-with-command-line-tools-in-linux/ is not an HTTPS link -_site/en/lessons/research-data-with-unix.html,598,http://williamjturkel.net/2013/06/20/pattern-matching-and-permuted-term-indexing-with-command-line-tools-in-linux/ is not an HTTPS link _site/en/lessons/retired/OCR-and-Machine-Translation.html,87,'a' tag is missing a reference _site/en/lessons/retired/OCR-and-Machine-Translation.html,106,'a' tag is missing a reference _site/en/lessons/retired/OCR-and-Machine-Translation.html,143,'a' tag is missing a reference _site/en/lessons/retired/OCR-and-Machine-Translation.html,169,'a' tag is missing a reference -_site/en/lessons/retired/OCR-and-Machine-Translation.html,523,http://www.fmwconcepts.com/imagemagick/textcleaner/index.php is not an HTTPS link _site/en/lessons/retired/OCR-with-Tesseract-and-ScanTailor.html,87,'a' tag is missing a reference _site/en/lessons/retired/OCR-with-Tesseract-and-ScanTailor.html,106,'a' tag is missing a reference _site/en/lessons/retired/OCR-with-Tesseract-and-ScanTailor.html,143,'a' tag is missing a reference @@ -1151,7 +621,6 @@ _site/en/lessons/retired/OCR-with-Tesseract-and-ScanTailor.html,169,'a' tag is m _site/en/lessons/retired/OCR-with-Tesseract-and-ScanTailor.html,444,'a' tag is missing a reference _site/en/lessons/retired/OCR-with-Tesseract-and-ScanTailor.html,488,'a' tag is missing a reference _site/en/lessons/retired/OCR-with-Tesseract-and-ScanTailor.html,662,'a' tag is missing a reference -_site/en/lessons/retired/OCR-with-Tesseract-and-ScanTailor.html,667,http://www.7-zip.org/ is not an HTTPS link _site/en/lessons/retired/OCR-with-Tesseract-and-ScanTailor.html,788,'a' tag is missing a reference _site/en/lessons/retired/counting-frequencies-from-zotero-items.html,87,'a' tag is missing a reference _site/en/lessons/retired/counting-frequencies-from-zotero-items.html,106,'a' tag is missing a reference @@ -1165,60 +634,22 @@ _site/en/lessons/retired/getting-started-with-github-desktop.html,87,'a' tag is _site/en/lessons/retired/getting-started-with-github-desktop.html,106,'a' tag is missing a reference _site/en/lessons/retired/getting-started-with-github-desktop.html,143,'a' tag is missing a reference _site/en/lessons/retired/getting-started-with-github-desktop.html,169,'a' tag is missing a reference -_site/en/lessons/retired/getting-started-with-github-desktop.html,449,http://swcarpentry.github.io/git-novice/ is not an HTTPS link -_site/en/lessons/retired/getting-started-with-github-desktop.html,611,http://flight-manual.atom.io/ is not an HTTPS link -_site/en/lessons/retired/getting-started-with-github-desktop.html,728,http://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html is not an HTTPS link _site/en/lessons/retired/graph-databases-and-SPARQL.html,87,'a' tag is missing a reference _site/en/lessons/retired/graph-databases-and-SPARQL.html,106,'a' tag is missing a reference _site/en/lessons/retired/graph-databases-and-SPARQL.html,143,'a' tag is missing a reference _site/en/lessons/retired/graph-databases-and-SPARQL.html,169,'a' tag is missing a reference -_site/en/lessons/retired/graph-databases-and-SPARQL.html,514,http://collection.britishmuseum.org is not an HTTPS link -_site/en/lessons/retired/graph-databases-and-SPARQL.html,514,http://labs.europeana.eu/api/linked-open-data-introduction is not an HTTPS link -_site/en/lessons/retired/graph-databases-and-SPARQL.html,515,http://americanart.si.edu is not an HTTPS link -_site/en/lessons/retired/graph-databases-and-SPARQL.html,515,http://britishart.yale.edu/collections/using-collections/technology/linked-open-data is not an HTTPS link -_site/en/lessons/retired/graph-databases-and-SPARQL.html,516,http://vocab.getty.edu is not an HTTPS link -_site/en/lessons/retired/graph-databases-and-SPARQL.html,589,http://plot.ly is not an HTTPS link -_site/en/lessons/retired/graph-databases-and-SPARQL.html,590,http://palladio.designhumanities.org/ is not an HTTPS link -_site/en/lessons/retired/graph-databases-and-SPARQL.html,794,http://collection.britishmuseum.org/sparql is not an HTTPS link -_site/en/lessons/retired/graph-databases-and-SPARQL.html,816,http://collection.britishmuseum.org/id/object/PPA82633 is not an HTTPS link -_site/en/lessons/retired/graph-databases-and-SPARQL.html,830,http://collection.britishmuseum.org/sparql?query=SELECT+*%0D%0AWHERE+%7B%0D%0A++%3Chttp%3A%2F%2Fcollection.britishmuseum.org%2Fid%2Fobject%2FPPA82633%3E+%3Fp+%3Fo+.%0D%0A++%7D&_implicit=false&_equivalent=false&_form=%2Fsparql is not an HTTPS link -_site/en/lessons/retired/graph-databases-and-SPARQL.html,1104,http://sparql.europeana.eu/ is not an HTTPS link -_site/en/lessons/retired/graph-databases-and-SPARQL.html,1106,http://wiki.dbpedia.org/ is not an HTTPS link -_site/en/lessons/retired/graph-databases-and-SPARQL.html,1107,http://sws.geonames.org/ is not an HTTPS link -_site/en/lessons/retired/graph-databases-and-SPARQL.html,1111,http://sparql.europeana.eu/ is not an HTTPS link -_site/en/lessons/retired/graph-databases-and-SPARQL.html,1186,http://openrefine.org/ is not an HTTPS link -_site/en/lessons/retired/graph-databases-and-SPARQL.html,1189,http://stedolan.github.io/jq/download/ is not an HTTPS link -_site/en/lessons/retired/graph-databases-and-SPARQL.html,1200,http://palladio.designhumanities.org/ is not an HTTPS link -_site/en/lessons/retired/graph-databases-and-SPARQL.html,1272,http://en.wikibooks.org/wiki/XQuery/SPARQL_Tutorial is not an HTTPS link -_site/en/lessons/retired/graph-databases-and-SPARQL.html,1281,http://labs.europeana.eu/api/linked-open-data-SPARQL-endpoint is not an HTTPS link -_site/en/lessons/retired/graph-databases-and-SPARQL.html,1282,http://vocab.getty.edu/queries is not an HTTPS link _site/en/lessons/retired/intro-to-augmented-reality-with-unity.html,87,'a' tag is missing a reference _site/en/lessons/retired/intro-to-augmented-reality-with-unity.html,106,'a' tag is missing a reference _site/en/lessons/retired/intro-to-augmented-reality-with-unity.html,143,'a' tag is missing a reference _site/en/lessons/retired/intro-to-augmented-reality-with-unity.html,169,'a' tag is missing a reference -_site/en/lessons/retired/intro-to-augmented-reality-with-unity.html,490,http://www.gizmag.com/ikea-augmented-reality-catalog-app/28703/ is not an HTTPS link -_site/en/lessons/retired/intro-to-augmented-reality-with-unity.html,490,http://www.digi-capital.com/news/2015/04/augmentedvirtual-reality-to-hit-150-billion-disrupting-mobile-by-2020/#.VbetCU1VhHw is not an HTTPS link -_site/en/lessons/retired/intro-to-augmented-reality-with-unity.html,508,http://www.tamikothiel.com/AR/clouding-green.html is not an HTTPS link -_site/en/lessons/retired/intro-to-augmented-reality-with-unity.html,510,http://web.archive.org/web/20180421163517/http://english.ufl.edu/trace_arcs/ is not an HTTPS link -_site/en/lessons/retired/intro-to-augmented-reality-with-unity.html,516,http://docs.unity3d.com/Manual/LearningtheInterface.html is not an HTTPS link -_site/en/lessons/retired/intro-to-augmented-reality-with-unity.html,649,http://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.html is not an HTTPS link -_site/en/lessons/retired/intro-to-augmented-reality-with-unity.html,1046,http://www.gimp.org/ is not an HTTPS link -_site/en/lessons/retired/intro-to-augmented-reality-with-unity.html,1335,http://docs.unity3d.com/Manual/Transforms.html is not an HTTPS link -_site/en/lessons/retired/intro-to-augmented-reality-with-unity.html,1392,http://developer.android.com/tools/device.html is not an HTTPS link _site/en/lessons/retired/intro-to-beautiful-soup.html,87,'a' tag is missing a reference _site/en/lessons/retired/intro-to-beautiful-soup.html,106,'a' tag is missing a reference _site/en/lessons/retired/intro-to-beautiful-soup.html,143,'a' tag is missing a reference _site/en/lessons/retired/intro-to-beautiful-soup.html,169,'a' tag is missing a reference -_site/en/lessons/retired/intro-to-beautiful-soup.html,456,http://praxis.scholarslab.org/resources/bash/ is not an HTTPS link -_site/en/lessons/retired/intro-to-beautiful-soup.html,463,http://www.crummy.com/software/BeautifulSoup/bs4/doc/ is not an HTTPS link -_site/en/lessons/retired/intro-to-beautiful-soup.html,475,http://www.crummy.com/software/BeautifulSoup/bs4/doc/ is not an HTTPS link -_site/en/lessons/retired/intro-to-beautiful-soup.html,601,http://urllib3.readthedocs.org/en/latest/ is not an HTTPS link -_site/en/lessons/retired/intro-to-beautiful-soup.html,609,http://bioguide.congress.gov/biosearch/biosearch.asp is not an HTTPS link _site/en/lessons/retired/intro-to-the-zotero-api.html,87,'a' tag is missing a reference _site/en/lessons/retired/intro-to-the-zotero-api.html,106,'a' tag is missing a reference _site/en/lessons/retired/intro-to-the-zotero-api.html,143,'a' tag is missing a reference _site/en/lessons/retired/intro-to-the-zotero-api.html,169,'a' tag is missing a reference -_site/en/lessons/retired/intro-to-the-zotero-api.html,449,http://zotero.org is not an HTTPS link _site/en/lessons/scalable-reading-of-structured-data.html,125,'a' tag is missing a reference _site/en/lessons/scalable-reading-of-structured-data.html,144,'a' tag is missing a reference _site/en/lessons/scalable-reading-of-structured-data.html,181,'a' tag is missing a reference @@ -1231,12 +662,6 @@ _site/en/lessons/sentiment-analysis.html,120,'a' tag is missing a reference _site/en/lessons/sentiment-analysis.html,139,'a' tag is missing a reference _site/en/lessons/sentiment-analysis.html,176,'a' tag is missing a reference _site/en/lessons/sentiment-analysis.html,202,'a' tag is missing a reference -_site/en/lessons/sentiment-analysis.html,506,http://www.nltk.org/ is not an HTTPS link -_site/en/lessons/sentiment-analysis.html,549,http://journals.sagepub.com/doi/abs/10.1177/1749975514542486 is not an HTTPS link -_site/en/lessons/sentiment-analysis.html,549,http://www.emeraldinsight.com/doi/abs/10.1108/S0733-558X%282014%290000040001 is not an HTTPS link -_site/en/lessons/sentiment-analysis.html,567,http://www.nltk.org/install.html is not an HTTPS link -_site/en/lessons/sentiment-analysis.html,579,http://www.nltk.org/_modules/nltk/sentiment/vader.html is not an HTTPS link -_site/en/lessons/sentiment-analysis.html,602,http://www.nltk.org/_modules/nltk/sentiment/vader.html is not an HTTPS link _site/en/lessons/shiny-leaflet-newspaper-map-tutorial.html,117,'a' tag is missing a reference _site/en/lessons/shiny-leaflet-newspaper-map-tutorial.html,136,'a' tag is missing a reference _site/en/lessons/shiny-leaflet-newspaper-map-tutorial.html,173,'a' tag is missing a reference @@ -1245,35 +670,10 @@ _site/en/lessons/simulating-historical-communication-networks-python.html,123,'a _site/en/lessons/simulating-historical-communication-networks-python.html,142,'a' tag is missing a reference _site/en/lessons/simulating-historical-communication-networks-python.html,179,'a' tag is missing a reference _site/en/lessons/simulating-historical-communication-networks-python.html,205,'a' tag is missing a reference -_site/en/lessons/simulating-historical-communication-networks-python.html,1500,http://arxiv.org/abs/2112.04336 is not an HTTPS link _site/en/lessons/sonification.html,117,'a' tag is missing a reference _site/en/lessons/sonification.html,136,'a' tag is missing a reference _site/en/lessons/sonification.html,173,'a' tag is missing a reference _site/en/lessons/sonification.html,199,'a' tag is missing a reference -_site/en/lessons/sonification.html,517,http://web.archive.org/web/20190203083307/http://www.digitalhumanities.org/dhq/vol/5/1/000091/000091.html is not an HTTPS link -_site/en/lessons/sonification.html,521,http://blog.taracopplestone.co.uk/making-things-photobashing-as-archaeological-remediation/ is not an HTTPS link -_site/en/lessons/sonification.html,521,http://www.samplereality.com/2012/05/02/notes-towards-a-deformed-humanities/ is not an HTTPS link -_site/en/lessons/sonification.html,521,http://nowviskie.org/2013/resistance-in-the-materials/ is not an HTTPS link -_site/en/lessons/sonification.html,521,http://nooart.org/post/73353953758/temkin-glitchhumancomputerinteraction is not an HTTPS link -_site/en/lessons/sonification.html,533,http://musicalgorithms.org/ is not an HTTPS link -_site/en/lessons/sonification.html,535,http://sonic-pi.net/ is not an HTTPS link -_site/en/lessons/sonification.html,548,http://www.icad.org/Proceedings/2008/Hermann2008.pdf is not an HTTPS link -_site/en/lessons/sonification.html,559,http://waxy.org/2015/12/if_drake_was_born_a_piano/ is not an HTTPS link -_site/en/lessons/sonification.html,571,http://musicalgorithms.org/ is not an HTTPS link -_site/en/lessons/sonification.html,571,http://musicalgorithms.org/3.0/index.html is not an HTTPS link -_site/en/lessons/sonification.html,625,http://www.lynda.com/GarageBand-tutorials/Importing-audio-tracks/156620/164050-4.html is not an HTTPS link -_site/en/lessons/sonification.html,663,http://musicalgorithms.org/3.0/index.html is not an HTTPS link -_site/en/lessons/sonification.html,706,http://www.ethanhein.com/wp/2010/scales-and-emotions/ is not an HTTPS link -_site/en/lessons/sonification.html,771,http://docs.python-guide.org/en/latest/starting/install/win/ is not an HTTPS link -_site/en/lessons/sonification.html,775,http://www.howtogeek.com/235101/10-ways-to-open-the-command-prompt-in-windows-10/ is not an HTTPS link -_site/en/lessons/sonification.html,836,http://abcnotation.com/wiki/abc:standard:v2.1 is not an HTTPS link -_site/en/lessons/sonification.html,836,http://trillian.mit.edu/~jc/music/abc/ABCcontrib.html is not an HTTPS link -_site/en/lessons/sonification.html,840,http://themacroscope.org is not an HTTPS link -_site/en/lessons/sonification.html,991,http://sonic-pi.net is not an HTTPS link -_site/en/lessons/sonification.html,997,http://puffin.creighton.edu/jesuit/relations/ is not an HTTPS link -_site/en/lessons/sonification.html,1057,http://library.gwu.edu/scholarly-technology-group/posts/sound-library-work is not an HTTPS link -_site/en/lessons/sonification.html,1060,http://www.lilypond.org/ is not an HTTPS link -_site/en/lessons/sonification.html,1064,http://www.trevorowens.org/2012/11/discovery-and-justification-are-different-notes-on-sciencing-the-humanities/ is not an HTTPS link _site/en/lessons/sonification.html,1069,'a' tag is missing a reference _site/en/lessons/sonification.html,1070,'a' tag is missing a reference _site/en/lessons/sonification.html,1071,'a' tag is missing a reference @@ -1282,94 +682,35 @@ _site/en/lessons/sonification.html,1073,'a' tag is missing a reference _site/en/lessons/sonification.html,1074,'a' tag is missing a reference _site/en/lessons/sonification.html,1075,'a' tag is missing a reference _site/en/lessons/sonification.html,1079,'a' tag is missing a reference -_site/en/lessons/sonification.html,1079,http://waxy.org/2015/12/if_drake_was_born_a_piano/ is not an HTTPS link _site/en/lessons/sonification.html,1081,'a' tag is missing a reference -_site/en/lessons/sonification.html,1081,http://web.archive.org/web/20190203083307/http://www.digitalhumanities.org/dhq/vol/5/1/000091/000091.html is not an HTTPS link _site/en/lessons/sonification.html,1083,'a' tag is missing a reference -_site/en/lessons/sonification.html,1083,http://www.jstor.org/stable/734136 is not an HTTPS link _site/en/lessons/sonification.html,1085,'a' tag is missing a reference -_site/en/lessons/sonification.html,1085,http://www.icad.org/Proceedings/2008/Hermann2008.pdf is not an HTTPS link _site/en/lessons/sonification.html,1087,'a' tag is missing a reference _site/en/lessons/sonification.html,1089,'a' tag is missing a reference _site/en/lessons/space-place-gazetteers.html,119,'a' tag is missing a reference _site/en/lessons/space-place-gazetteers.html,138,'a' tag is missing a reference _site/en/lessons/space-place-gazetteers.html,175,'a' tag is missing a reference _site/en/lessons/space-place-gazetteers.html,201,'a' tag is missing a reference -_site/en/lessons/space-place-gazetteers.html,547,http://bombsight.org/#17/51.50595/-0.10680 is not an HTTPS link _site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html,119,'a' tag is missing a reference _site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html,138,'a' tag is missing a reference _site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html,175,'a' tag is missing a reference _site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html,201,'a' tag is missing a reference -_site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html,588,http://notepad-plus-plus.org is not an HTTPS link -_site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html,892,http://daringfireball.net/projects/markdown/dingus is not an HTTPS link -_site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html,1145,http://editor.citationstyles.org/about/ is not an HTTPS link -_site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html,1207,http://stackoverflow.com/questions/tagged/pandoc is not an HTTPS link -_site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html,1208,http://web.archive.org/web/20190203062832/http://digitalhumanities.org/answers/ is not an HTTPS link -_site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html,1214,http://web.archive.org/web/20140120195538/http://mashable.com/2013/06/24/markdown-tools/ is not an HTTPS link -_site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html,1218,http://mouapp.com/ is not an HTTPS link -_site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html,1218,http://writemonkey.com is not an HTTPS link -_site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html,1219,http://www.sublimetext.com/ is not an HTTPS link -_site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html,1222,http://prose.io is not an HTTPS link -_site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html,1223,http://www.authorea.com is not an HTTPS link -_site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html,1224,http://www.draftin.com is not an HTTPS link -_site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html,1227,http://gitit.net/ is not an HTTPS link -_site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html,1232,http://github.com/fauno/jekyll-pandoc-multiple-formats is not an HTTPS link -_site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html,1233,http://jaspervdj.be/hakyll/ is not an HTTPS link -_site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html,1242,http://readthedocs.org is not an HTTPS link -_site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html,1254,http://www.antipope.org/charlie/blog-static/2013/10/why-microsoft-word-must-die.html is not an HTTPS link _site/en/lessons/temporal-network-analysis-with-r.html,121,'a' tag is missing a reference _site/en/lessons/temporal-network-analysis-with-r.html,140,'a' tag is missing a reference _site/en/lessons/temporal-network-analysis-with-r.html,177,'a' tag is missing a reference _site/en/lessons/temporal-network-analysis-with-r.html,203,'a' tag is missing a reference -_site/en/lessons/temporal-network-analysis-with-r.html,1193,http://www.epimodel.org/ is not an HTTPS link _site/en/lessons/text-mining-with-extracted-features.html,136,'a' tag is missing a reference _site/en/lessons/text-mining-with-extracted-features.html,155,'a' tag is missing a reference _site/en/lessons/text-mining-with-extracted-features.html,192,'a' tag is missing a reference _site/en/lessons/text-mining-with-extracted-features.html,218,'a' tag is missing a reference -_site/en/lessons/text-mining-with-extracted-features.html,551,http://mimno.infosci.cornell.edu/wordsim/nearest.html is not an HTTPS link -_site/en/lessons/text-mining-with-extracted-features.html,630,http://stackoverflow.com/a/19350234/233577 is not an HTTPS link -_site/en/lessons/text-mining-with-extracted-features.html,630,http://pandas.pydata.org/pandas-docs/version/0.15.2/install.html#recommended-dependencies is not an HTTPS link -_site/en/lessons/text-mining-with-extracted-features.html,930,http://hdl.handle.net/2027/nyp.33433074811310 is not an HTTPS link -_site/en/lessons/text-mining-with-extracted-features.html,1055,http://htrc.github.io/htrc-feature-reader/htrc_features/feature_reader.m.html#htrc_features.feature_reader.Volume.tokenlist is not an HTTPS link -_site/en/lessons/text-mining-with-extracted-features.html,1671,http://pandas.pydata.org/pandas-docs/stable/groupby.html is not an HTTPS link -_site/en/lessons/text-mining-with-extracted-features.html,1800,http://htrc.github.io/htrc-feature-reader/htrc_features/feature_reader.m.html is not an HTTPS link -_site/en/lessons/text-mining-with-extracted-features.html,1860,http://data.analytics.hathitrust.org/genre/fiction_paths.txt is not an HTTPS link -_site/en/lessons/text-mining-with-extracted-features.html,1860,http://data.analytics.hathitrust.org/genre/drama_paths.txt is not an HTTPS link -_site/en/lessons/text-mining-with-extracted-features.html,1860,http://data.analytics.hathitrust.org/genre/poetry_paths.txt is not an HTTPS link _site/en/lessons/text-mining-youtube-comments.html,121,'a' tag is missing a reference _site/en/lessons/text-mining-youtube-comments.html,140,'a' tag is missing a reference _site/en/lessons/text-mining-youtube-comments.html,177,'a' tag is missing a reference _site/en/lessons/text-mining-youtube-comments.html,203,'a' tag is missing a reference -_site/en/lessons/text-mining-youtube-comments.html,876,http://www.Wordfish.org/software.html is not an HTTPS link -_site/en/lessons/text-mining-youtube-comments.html,876,http://www.wordfish.org/ is not an HTTPS link -_site/en/lessons/text-mining-youtube-comments.html,906,http://www.wordfish.org/ is not an HTTPS link -_site/en/lessons/text-mining-youtube-comments.html,906,http://www.Wordfish.org/software.html is not an HTTPS link _site/en/lessons/topic-modeling-and-mallet.html,121,'a' tag is missing a reference _site/en/lessons/topic-modeling-and-mallet.html,140,'a' tag is missing a reference _site/en/lessons/topic-modeling-and-mallet.html,177,'a' tag is missing a reference _site/en/lessons/topic-modeling-and-mallet.html,203,'a' tag is missing a reference -_site/en/lessons/topic-modeling-and-mallet.html,529,http://mallet.cs.umass.edu/mailinglist.php is not an HTTPS link -_site/en/lessons/topic-modeling-and-mallet.html,542,http://www.cs.umbc.edu/~hillol/NGDM07/abstracts/talks/MKirschenbaum.pdf is not an HTTPS link -_site/en/lessons/topic-modeling-and-mallet.html,545,http://www.worldcat.org/title/reading-machines-toward-an-algorithmic-criticism/oclc/708761605&referer=brief_results is not an HTTPS link -_site/en/lessons/topic-modeling-and-mallet.html,550,http://voyant-tools.org is not an HTTPS link -_site/en/lessons/topic-modeling-and-mallet.html,597,http://arxiv.org/abs/1003.6087/ is not an HTTPS link -_site/en/lessons/topic-modeling-and-mallet.html,606,http://en.wikipedia.org/wiki/Latent_Dirichlet_allocation is not an HTTPS link -_site/en/lessons/topic-modeling-and-mallet.html,616,http://dsl.richmond.edu/dispatch/ is not an HTTPS link -_site/en/lessons/topic-modeling-and-mallet.html,629,http://mallet.cs.umass.edu/index.php is not an HTTPS link -_site/en/lessons/topic-modeling-and-mallet.html,630,http://en.wikipedia.org/wiki/Gibbs_sampling is not an HTTPS link -_site/en/lessons/topic-modeling-and-mallet.html,657,http://mallet.cs.umass.edu/index.php is not an HTTPS link -_site/en/lessons/topic-modeling-and-mallet.html,657,http://mallet.cs.umass.edu/download.php is not an HTTPS link -_site/en/lessons/topic-modeling-and-mallet.html,658,http://www.oracle.com/technetwork/java/javase/downloads/index.html is not an HTTPS link -_site/en/lessons/topic-modeling-and-mallet.html,836,http://mallet.cs.umass.edu/download.php is not an HTTPS link -_site/en/lessons/topic-modeling-and-mallet.html,837,http://www.oracle.com/technetwork/java/javase/downloads/index.html is not an HTTPS link -_site/en/lessons/topic-modeling-and-mallet.html,1177,http://dsl.richmond.edu/dispatch/ is not an HTTPS link -_site/en/lessons/topic-modeling-and-mallet.html,1205,http://electricarchaeology.ca/2012/07/09/mining-a-day-of-archaeology/ is not an HTTPS link -_site/en/lessons/topic-modeling-and-mallet.html,1211,http://electricarchaeology.ca/2012/06/08/mining-the-open-web-with-looted-heritage-draft/ is not an HTTPS link -_site/en/lessons/topic-modeling-and-mallet.html,1222,http://tedunderwood.wordpress.com/2012/04/07/topic-modeling-made-just-simple-enough/ is not an HTTPS link -_site/en/lessons/topic-modeling-and-mallet.html,1225,http://web.archive.org/web/20160704150726/http://www.lisarhody.com:80/some-assembly-required/ is not an HTTPS link -_site/en/lessons/topic-modeling-and-mallet.html,1228,http://dl.acm.org/citation.cfm?id=944937 is not an HTTPS link -_site/en/lessons/topic-modeling-and-mallet.html,1230,http://mimno.infosci.cornell.edu/topics.html is not an HTTPS link -_site/en/lessons/topic-modeling-and-mallet.html,1233,http://www.perseus.tufts.edu/publications/02-jocch-mimno.pdf is not an HTTPS link _site/en/lessons/transcribing-handwritten-text-with-python-and-azure.html,117,'a' tag is missing a reference _site/en/lessons/transcribing-handwritten-text-with-python-and-azure.html,136,'a' tag is missing a reference _site/en/lessons/transcribing-handwritten-text-with-python-and-azure.html,173,'a' tag is missing a reference @@ -1378,28 +719,10 @@ _site/en/lessons/transforming-xml-with-xsl.html,117,'a' tag is missing a referen _site/en/lessons/transforming-xml-with-xsl.html,136,'a' tag is missing a reference _site/en/lessons/transforming-xml-with-xsl.html,173,'a' tag is missing a reference _site/en/lessons/transforming-xml-with-xsl.html,199,'a' tag is missing a reference -_site/en/lessons/transforming-xml-with-xsl.html,728,http://scissors-and-paste.net is not an HTTPS link -_site/en/lessons/transforming-xml-with-xsl.html,823,http://www.w3.org/ is not an HTTPS link _site/en/lessons/transliterating.html,117,'a' tag is missing a reference _site/en/lessons/transliterating.html,136,'a' tag is missing a reference _site/en/lessons/transliterating.html,173,'a' tag is missing a reference _site/en/lessons/transliterating.html,199,'a' tag is missing a reference -_site/en/lessons/transliterating.html,476,http://en.wikipedia.org/wiki/Ascii is not an HTTPS link -_site/en/lessons/transliterating.html,481,http://lists.memo.ru is not an HTTPS link -_site/en/lessons/transliterating.html,481,http://en.wikipedia.org/wiki/Cyrillic_script is not an HTTPS link -_site/en/lessons/transliterating.html,481,http://en.wikipedia.org/wiki/Latin_script is not an HTTPS link -_site/en/lessons/transliterating.html,483,http://en.wikipedia.org/wiki/Unicode is not an HTTPS link -_site/en/lessons/transliterating.html,507,http://en.wikipedia.org/wiki/ALA-LC_romanization_for_Russian is not an HTTPS link -_site/en/lessons/transliterating.html,511,http://www.crummy.com/software/BeautifulSoup/ is not an HTTPS link -_site/en/lessons/transliterating.html,519,http://en.wikipedia.org/wiki/Glasnost is not an HTTPS link -_site/en/lessons/transliterating.html,522,http://lists.memo.ru is not an HTTPS link -_site/en/lessons/transliterating.html,530,http://lists.memo.ru/d1/f1.htm is not an HTTPS link -_site/en/lessons/transliterating.html,573,http://www.unicode.org/standard/WhatIsUnicode.html is not an HTTPS link -_site/en/lessons/transliterating.html,592,http://en.wikipedia.org/wiki/Comma-separated_values is not an HTTPS link -_site/en/lessons/transliterating.html,657,http://web.archive.org/web/20170312041508/http://www.lcweb.loc.gov/catdir/cpso/romanization/russian.pdf is not an HTTPS link -_site/en/lessons/transliterating.html,664,http://en.wikipedia.org/wiki/Cyrillic_script_in_Unicode is not an HTTPS link -_site/en/lessons/transliterating.html,665,http://www.unicode.org/charts/ is not an HTTPS link -_site/en/lessons/transliterating.html,837,http://www.w3schools.com/css/ is not an HTTPS link _site/en/lessons/understanding-creating-word-embeddings.html,121,'a' tag is missing a reference _site/en/lessons/understanding-creating-word-embeddings.html,140,'a' tag is missing a reference _site/en/lessons/understanding-creating-word-embeddings.html,177,'a' tag is missing a reference @@ -1408,76 +731,34 @@ _site/en/lessons/understanding-regular-expressions.html,117,'a' tag is missing a _site/en/lessons/understanding-regular-expressions.html,136,'a' tag is missing a reference _site/en/lessons/understanding-regular-expressions.html,173,'a' tag is missing a reference _site/en/lessons/understanding-regular-expressions.html,199,'a' tag is missing a reference -_site/en/lessons/understanding-regular-expressions.html,579,http://www.libreoffice.org/download is not an HTTPS link -_site/en/lessons/understanding-regular-expressions.html,626,http://archive.org/details/jstor-4560629/ is not an HTTPS link -_site/en/lessons/understanding-regular-expressions.html,628,http://archive.org/stream/jstor-4560629/4560629#page/n0/mode/2up is not an HTTPS link -_site/en/lessons/understanding-regular-expressions.html,646,http://archive.org/stream/jstor-4560629/4560629_djvu.txt is not an HTTPS link -_site/en/lessons/understanding-regular-expressions.html,1412,http://en.wikipedia.org/wiki/Regular_expressions is not an HTTPS link -_site/en/lessons/understanding-regular-expressions.html,1425,http://rubular.com/ is not an HTTPS link -_site/en/lessons/understanding-regular-expressions.html,1430,http://dh.obdurodon.org/regex.html is not an HTTPS link _site/en/lessons/up-and-running-with-omeka.html,117,'a' tag is missing a reference _site/en/lessons/up-and-running-with-omeka.html,136,'a' tag is missing a reference _site/en/lessons/up-and-running-with-omeka.html,173,'a' tag is missing a reference _site/en/lessons/up-and-running-with-omeka.html,199,'a' tag is missing a reference -_site/en/lessons/up-and-running-with-omeka.html,507,http://www.omeka.net is not an HTTPS link -_site/en/lessons/up-and-running-with-omeka.html,527,http://www.omeka.net is not an HTTPS link -_site/en/lessons/up-and-running-with-omeka.html,906,http://info.omeka.net is not an HTTPS link _site/en/lessons/urban-demographic-data-r-ggplot2.html,119,'a' tag is missing a reference _site/en/lessons/urban-demographic-data-r-ggplot2.html,138,'a' tag is missing a reference _site/en/lessons/urban-demographic-data-r-ggplot2.html,175,'a' tag is missing a reference _site/en/lessons/urban-demographic-data-r-ggplot2.html,201,'a' tag is missing a reference -_site/en/lessons/urban-demographic-data-r-ggplot2.html,488,http://ggplot2.tidyverse.org is not an HTTPS link -_site/en/lessons/urban-demographic-data-r-ggplot2.html,530,http://www.ggplot2-exts.org/ is not an HTTPS link -_site/en/lessons/urban-demographic-data-r-ggplot2.html,946,http://colorbrewer2.org is not an HTTPS link -_site/en/lessons/urban-demographic-data-r-ggplot2.html,1149,http://www.ggplot2-exts.org/ is not an HTTPS link -_site/en/lessons/urban-demographic-data-r-ggplot2.html,1200,http://r4ds.hadley.nz/ is not an HTTPS link -_site/en/lessons/urban-demographic-data-r-ggplot2.html,1218,http://www.ggplot2-exts.org/gallery/ is not an HTTPS link -_site/en/lessons/urban-demographic-data-r-ggplot2.html,1227,http://www.cookbook-r.com/Graphs/ is not an HTTPS link -_site/en/lessons/urban-demographic-data-r-ggplot2.html,1227,http://shop.oreilly.com/product/0636920023135.do is not an HTTPS link _site/en/lessons/using-javascript-to-create-maps.html,123,'a' tag is missing a reference _site/en/lessons/using-javascript-to-create-maps.html,142,'a' tag is missing a reference _site/en/lessons/using-javascript-to-create-maps.html,179,'a' tag is missing a reference _site/en/lessons/using-javascript-to-create-maps.html,205,'a' tag is missing a reference -_site/en/lessons/using-javascript-to-create-maps.html,496,http://wcm1.web.rice.edu/mining-bpl-antislavery.html is not an HTTPS link -_site/en/lessons/using-javascript-to-create-maps.html,496,http://leafletjs.com/ is not an HTTPS link -_site/en/lessons/using-javascript-to-create-maps.html,498,http://postgis.net/ is not an HTTPS link -_site/en/lessons/using-javascript-to-create-maps.html,498,http://postgis.net/ is not an HTTPS link -_site/en/lessons/using-javascript-to-create-maps.html,498,http://dp.la is not an HTTPS link -_site/en/lessons/using-javascript-to-create-maps.html,502,http://hdlab.stanford.edu/palladio/ is not an HTTPS link -_site/en/lessons/using-javascript-to-create-maps.html,518,http://leafletjs.com/ is not an HTTPS link -_site/en/lessons/using-javascript-to-create-maps.html,518,http://jqueryui.com/ is not an HTTPS link -_site/en/lessons/using-javascript-to-create-maps.html,534,http://zotero.org is not an HTTPS link -_site/en/lessons/using-javascript-to-create-maps.html,558,http://www.gpsvisualizer.com/geocoder/ is not an HTTPS link -_site/en/lessons/using-javascript-to-create-maps.html,633,http:///www.mapbox.com is not an HTTPS link -_site/en/lessons/using-javascript-to-create-maps.html,744,http://stackoverflow.com/questions/16151018/npm-throws-error-without-sudo/24404451#24404451 is not an HTTPS link _site/en/lessons/vector-layers-qgis.html,121,'a' tag is missing a reference _site/en/lessons/vector-layers-qgis.html,140,'a' tag is missing a reference _site/en/lessons/vector-layers-qgis.html,177,'a' tag is missing a reference _site/en/lessons/vector-layers-qgis.html,203,'a' tag is missing a reference -_site/en/lessons/vector-layers-qgis.html,683,http://www.lib.uwaterloo.ca/locations/umd/digital/clump_classes.html is not an HTTPS link -_site/en/lessons/vector-layers-qgis.html,878,"http://en.wikipedia.org/wiki/Prince_Royalty,_Prince_Edward_Island is not an HTTPS link" -_site/en/lessons/vector-layers-qgis.html,1233,http://geospatialhistorian.wordpress.com/ is not an HTTPS link _site/en/lessons/viewing-html-files.html,119,'a' tag is missing a reference _site/en/lessons/viewing-html-files.html,138,'a' tag is missing a reference _site/en/lessons/viewing-html-files.html,175,'a' tag is missing a reference _site/en/lessons/viewing-html-files.html,201,'a' tag is missing a reference -_site/en/lessons/viewing-html-files.html,557,http://www.w3schools.com/html/default.asp is not an HTTPS link -_site/en/lessons/viewing-html-files.html,634,http://www.w3schools.com/html/default.asp is not an HTTPS link -_site/en/lessons/viewing-html-files.html,635,http://www.w3schools.com/html/html5_intro.asp is not an HTTPS link _site/en/lessons/visualizing-with-bokeh.html,117,'a' tag is missing a reference _site/en/lessons/visualizing-with-bokeh.html,136,'a' tag is missing a reference _site/en/lessons/visualizing-with-bokeh.html,173,'a' tag is missing a reference _site/en/lessons/visualizing-with-bokeh.html,199,'a' tag is missing a reference -_site/en/lessons/visualizing-with-bokeh.html,626,http://jupyter.org is not an HTTPS link -_site/en/lessons/visualizing-with-bokeh.html,1101,http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases is not an HTTPS link -_site/en/lessons/visualizing-with-bokeh.html,1190,http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases is not an HTTPS link _site/en/lessons/windows-installation.html,119,'a' tag is missing a reference _site/en/lessons/windows-installation.html,138,'a' tag is missing a reference _site/en/lessons/windows-installation.html,175,'a' tag is missing a reference _site/en/lessons/windows-installation.html,201,'a' tag is missing a reference -_site/en/lessons/windows-installation.html,508,http://www.python.org/ is not an HTTPS link -_site/en/lessons/windows-installation.html,521,http://wiki.python.org/moin/PythonEditors/ is not an HTTPS link -_site/en/lessons/windows-installation.html,579,http://en.wikipedia.org/wiki/UTF-8 is not an HTTPS link _site/en/lessons/working-with-batches-of-pdf-files.html,117,'a' tag is missing a reference _site/en/lessons/working-with-batches-of-pdf-files.html,136,'a' tag is missing a reference _site/en/lessons/working-with-batches-of-pdf-files.html,173,'a' tag is missing a reference @@ -1486,16 +767,10 @@ _site/en/lessons/working-with-text-files.html,119,'a' tag is missing a reference _site/en/lessons/working-with-text-files.html,138,'a' tag is missing a reference _site/en/lessons/working-with-text-files.html,175,'a' tag is missing a reference _site/en/lessons/working-with-text-files.html,201,'a' tag is missing a reference -_site/en/lessons/working-with-text-files.html,622,http://docs.python.org/release/2.5.4/ref/keywords.html is not an HTTPS link -_site/en/lessons/working-with-text-files.html,734,"http://en.wikibooks.org/wiki/Non-Programmer%27s_Tutorial_for_Python_2.6/Hello,_World is not an HTTPS link" _site/en/lessons/working-with-web-pages.html,119,'a' tag is missing a reference _site/en/lessons/working-with-web-pages.html,138,'a' tag is missing a reference _site/en/lessons/working-with-web-pages.html,175,'a' tag is missing a reference _site/en/lessons/working-with-web-pages.html,201,'a' tag is missing a reference -_site/en/lessons/working-with-web-pages.html,545,http://www.oldbaileyonline.org/ is not an HTTPS link -_site/en/lessons/working-with-web-pages.html,612,http://en.wikipedia.org/wiki/Gordon_Riots is not an HTTPS link -_site/en/lessons/working-with-web-pages.html,645,http://www.oldbaileyonline.org/browse.jsp?foo=bar&path=sessionsPapers/17800628.xml&div=t17800628-33&xml=yes is not an HTTPS link -_site/en/lessons/working-with-web-pages.html,647,http://www.oldbaileyonline.org/images.jsp?doc=178006280084 is not an HTTPS link _site/en/privacy-policy.html,86,'a' tag is missing a reference _site/en/privacy-policy.html,105,'a' tag is missing a reference _site/en/privacy-policy.html,142,'a' tag is missing a reference @@ -1504,92 +779,10 @@ _site/en/project-team.html,86,'a' tag is missing a reference _site/en/project-team.html,105,'a' tag is missing a reference _site/en/project-team.html,142,'a' tag is missing a reference _site/en/project-team.html,168,'a' tag is missing a reference -_site/en/project-team.html,308,http://twitter.com/maxcarlons is not an HTTPS link -_site/en/project-team.html,311,http://github.com/carlonim is not an HTTPS link -_site/en/project-team.html,412,http://github.com/lachapot is not an HTTPS link -_site/en/project-team.html,510,http://twitter.com/cosovschi is not an HTTPS link -_site/en/project-team.html,513,http://github.com/digitalkosovski is not an HTTPS link -_site/en/project-team.html,616,http://github.com/caiocmello is not an HTTPS link -_site/en/project-team.html,1180,http://github.com/semanticnoodles is not an HTTPS link -_site/en/project-team.html,1276,http://twitter.com/nabsiddiqui is not an HTTPS link -_site/en/project-team.html,1279,http://github.com/nabsiddiqui is not an HTTPS link -_site/en/project-team.html,1629,http://twitter.com/giulia_taurino is not an HTTPS link -_site/en/project-team.html,1632,http://github.com/giuliataurino is not an HTTPS link -_site/en/project-team.html,1802,http://www.alexwermercolan.com/ is not an HTTPS link -_site/en/project-team.html,1808,http://twitter.com/alexwermercolan is not an HTTPS link -_site/en/project-team.html,1811,http://github.com/hawc2 is not an HTTPS link -_site/en/project-team.html,2057,http://www.mariajoseafanador.com is not an HTTPS link -_site/en/project-team.html,2063,http://twitter.com/mariajoafana is not an HTTPS link -_site/en/project-team.html,2066,http://github.com/mariajoafana is not an HTTPS link -_site/en/project-team.html,2532,http://twitter.com/IsaGribomont is not an HTTPS link -_site/en/project-team.html,2535,http://github.com/isag91 is not an HTTPS link -_site/en/project-team.html,2743,http://twitter.com/espejolento is not an HTTPS link -_site/en/project-team.html,2746,http://github.com/silviaegt is not an HTTPS link -_site/en/project-team.html,3034,http://twitter.com/jenniferisve is not an HTTPS link -_site/en/project-team.html,3037,http://github.com/jenniferisasi is not an HTTPS link -_site/en/project-team.html,3359,http://twitter.com/enetreseles is not an HTTPS link -_site/en/project-team.html,3362,http://github.com/nllano is not an HTTPS link -_site/en/project-team.html,3566,http://twitter.com/jgob is not an HTTPS link -_site/en/project-team.html,3569,http://github.com/joshuagob is not an HTTPS link -_site/en/project-team.html,3861,http://twitter.com/rivaquiroga is not an HTTPS link -_site/en/project-team.html,3864,http://github.com/rivaquiroga is not an HTTPS link -_site/en/project-team.html,4155,http://github.com/nivaca is not an HTTPS link -_site/en/project-team.html,4368,http://github.com/marie-flesch is not an HTTPS link -_site/en/project-team.html,4511,http://github.com/matgille is not an HTTPS link -_site/en/project-team.html,4744,http://github.com/mhersent is not an HTTPS link -_site/en/project-team.html,4802,http://twitter.com/superHH is not an HTTPS link -_site/en/project-team.html,5054,http://github.com/DMathelier is not an HTTPS link -_site/en/project-team.html,5188,http://twitter.com/emilienschultz is not an HTTPS link -_site/en/project-team.html,5191,http://github.com/emilienschultz is not an HTTPS link -_site/en/project-team.html,5315,http://twitter.com/davvalent is not an HTTPS link -_site/en/project-team.html,5318,http://github.com/davvalent is not an HTTPS link -_site/en/project-team.html,5447,http://github.com/AlexandreWa is not an HTTPS link -_site/en/project-team.html,5582,http://github.com/josircg is not an HTTPS link -_site/en/project-team.html,5840,http://twitter.com/danielalvesfcsh is not an HTTPS link -_site/en/project-team.html,5843,http://github.com/DanielAlvesLABDH is not an HTTPS link -_site/en/project-team.html,6105,http://twitter.com/ericbrasiln is not an HTTPS link -_site/en/project-team.html,6108,http://github.com/ericbrasiln is not an HTTPS link -_site/en/project-team.html,6299,http://github.com/luisferla is not an HTTPS link -_site/en/project-team.html,6541,http://twitter.com/jimmy_medeiros is not an HTTPS link -_site/en/project-team.html,6544,http://github.com/JimmyMedeiros82 is not an HTTPS link -_site/en/project-team.html,6779,http://github.com/joanacvp is not an HTTPS link -_site/en/project-team.html,7025,http://twitter.com/araceletorres is not an HTTPS link -_site/en/project-team.html,7028,http://github.com/aracele is not an HTTPS link -_site/en/project-team.html,7284,http://twitter.com/j_w_baker is not an HTTPS link -_site/en/project-team.html,7287,http://github.com/drjwbaker is not an HTTPS link -_site/en/project-team.html,7719,http://adamcrymble.org is not an HTTPS link -_site/en/project-team.html,7725,http://twitter.com/Adam_Crymble is not an HTTPS link -_site/en/project-team.html,7728,http://github.com/acrymble is not an HTTPS link -_site/en/project-team.html,8196,http://github.com/adamfarquhar is not an HTTPS link -_site/en/project-team.html,8256,http://twitter.com/jenniferisve is not an HTTPS link -_site/en/project-team.html,8259,http://github.com/jenniferisasi is not an HTTPS link -_site/en/project-team.html,8587,http://twitter.com/rivaquiroga is not an HTTPS link -_site/en/project-team.html,8590,http://github.com/rivaquiroga is not an HTTPS link -_site/en/project-team.html,8876,http://twitter.com/amsichani is not an HTTPS link -_site/en/project-team.html,8879,http://github.com/amsichani is not an HTTPS link -_site/en/project-team.html,9219,http://twitter.com/AnisaHawes is not an HTTPS link -_site/en/project-team.html,9222,http://github.com/anisa-hawes is not an HTTPS link -_site/en/project-team.html,9431,http://github.com/charlottejmc is not an HTTPS link _site/en/research.html,86,'a' tag is missing a reference _site/en/research.html,105,'a' tag is missing a reference _site/en/research.html,142,'a' tag is missing a reference _site/en/research.html,168,'a' tag is missing a reference -_site/en/research.html,253,http://niche-canada.org/wp-content/uploads/2013/09/programming-historian-1.pdf is not an HTTPS link -_site/en/research.html,260,http://www.diva-portal.org/smash/record.jsf?pid=diva2%3A1508542&dswid=7551 is not an HTTPS link -_site/en/research.html,265,http://jitp.commons.gc.cuny.edu/review-of-the-programming-historian/ is not an HTTPS link -_site/en/research.html,277,http://www.digitalhumanities.org/dhq/vol/16/2/000585/000585.html is not an HTTPS link -_site/en/research.html,283,http://www.tandfonline.com/doi/full/10.1080/13555502.2017.1301179 is not an HTTPS link -_site/en/research.html,285,http://web.archive.org/web/20180713014622/http://dhcommons.org/journal/issue-1/editorial-sustainability-and-open-peer-review-programming-historian is not an HTTPS link -_site/en/research.html,286,http://www.themacroscope.org/2.0/ is not an HTTPS link -_site/en/research.html,294,http://doi.org/10.5281/zenodo.3813763 is not an HTTPS link -_site/en/research.html,307,http://www.iea.usp.br/eventos/historia-digital-educacao-caminhos-cruzados is not an HTTPS link -_site/en/research.html,395,http://ixa2.si.ehu.eus/intele/?q=webinars is not an HTTPS link -_site/en/research.html,398,http://walshbr.com/blog/the-programming-historian-and-editorial-process-in-digital-publishing/ is not an HTTPS link -_site/en/research.html,411,http://fredgibbs.net/assets/images/ph-poster/final-board.png is not an HTTPS link -_site/en/research.html,425,http://niche-canada.org/2018/03/23/a-decade-of-programming-historians/ is not an HTTPS link -_site/en/research.html,426,http://fredgibbs.net/posts/reflections-former-PH-editor is not an HTTPS link -_site/en/research.html,427,http://clionauta.hypotheses.org/16979 is not an HTTPS link -_site/en/research.html,429,http://humanidadesdigitales.net/blog/2017/03/17/the-programming-historian-en-espanol/ is not an HTTPS link _site/en/reviewer-guidelines.html,86,'a' tag is missing a reference _site/en/reviewer-guidelines.html,105,'a' tag is missing a reference _site/en/reviewer-guidelines.html,142,'a' tag is missing a reference @@ -1598,7 +791,6 @@ _site/en/supporters.html,86,'a' tag is missing a reference _site/en/supporters.html,105,'a' tag is missing a reference _site/en/supporters.html,142,'a' tag is missing a reference _site/en/supporters.html,168,'a' tag is missing a reference -_site/en/supporters.html,273,http://cdrh.unl.edu/ is not an HTTPS link _site/en/translator-guidelines.html,86,'a' tag is missing a reference _site/en/translator-guidelines.html,105,'a' tag is missing a reference _site/en/translator-guidelines.html,142,'a' tag is missing a reference @@ -1611,24 +803,14 @@ _site/es/acerca-de.html,89,'a' tag is missing a reference _site/es/acerca-de.html,108,'a' tag is missing a reference _site/es/acerca-de.html,145,'a' tag is missing a reference _site/es/acerca-de.html,182,'a' tag is missing a reference -_site/es/acerca-de.html,269,http://dhawards.org/dhawards2016/results/ is not an HTTPS link -_site/es/acerca-de.html,269,http://dhawards.org/dhawards2017/results/ is not an HTTPS link -_site/es/acerca-de.html,269,http://humanidadesdigitaleshispanicas.es/ is not an HTTPS link -_site/es/acerca-de.html,269,http://dhawards.org/dhawards2022/results/ is not an HTTPS link -_site/es/acerca-de.html,282,http://digitalhistoryhacks.blogspot.com/2008/01/programming-historian.html is not an HTTPS link _site/es/colaboradores.html,88,'a' tag is missing a reference _site/es/colaboradores.html,107,'a' tag is missing a reference _site/es/colaboradores.html,144,'a' tag is missing a reference _site/es/colaboradores.html,181,'a' tag is missing a reference -_site/es/colaboradores.html,274,http://cdrh.unl.edu/ is not an HTTPS link _site/es/contribuciones.html,88,'a' tag is missing a reference _site/es/contribuciones.html,107,'a' tag is missing a reference _site/es/contribuciones.html,144,'a' tag is missing a reference _site/es/contribuciones.html,181,'a' tag is missing a reference -_site/es/contribuciones.html,258,http://vocabularios.caicyt.gov.ar/portalthes/index.php?v=42 is not an HTTPS link -_site/es/contribuciones.html,258,http://www.mecd.gob.es/planes-nacionales/dam/jcr:f20a4ba1-0ed2-445d-9be9-b8b0382562ea/mex-glosario-interpares-total0112.pdf is not an HTTPS link -_site/es/contribuciones.html,303,http://www.worldcat.org/title/programming-historian/oclc/951537099 is not an HTTPS link -_site/es/contribuciones.html,305,http://purdue-primo-prod.hosted.exlibrisgroup.com/primo_library/libweb/action/dlDisplay.do?vid=PURDUE&search_scope=everything&docId=PURDUE_ALMA51671812890001081&fn=permalink is not an HTTPS link _site/es/donaciones.html,88,'a' tag is missing a reference _site/es/donaciones.html,107,'a' tag is missing a reference _site/es/donaciones.html,144,'a' tag is missing a reference @@ -1637,72 +819,6 @@ _site/es/equipo-de-proyecto.html,88,'a' tag is missing a reference _site/es/equipo-de-proyecto.html,107,'a' tag is missing a reference _site/es/equipo-de-proyecto.html,144,'a' tag is missing a reference _site/es/equipo-de-proyecto.html,181,'a' tag is missing a reference -_site/es/equipo-de-proyecto.html,306,http://twitter.com/maxcarlons is not an HTTPS link -_site/es/equipo-de-proyecto.html,309,http://github.com/carlonim is not an HTTPS link -_site/es/equipo-de-proyecto.html,410,http://github.com/lachapot is not an HTTPS link -_site/es/equipo-de-proyecto.html,508,http://twitter.com/cosovschi is not an HTTPS link -_site/es/equipo-de-proyecto.html,511,http://github.com/digitalkosovski is not an HTTPS link -_site/es/equipo-de-proyecto.html,614,http://github.com/caiocmello is not an HTTPS link -_site/es/equipo-de-proyecto.html,1178,http://github.com/semanticnoodles is not an HTTPS link -_site/es/equipo-de-proyecto.html,1274,http://twitter.com/nabsiddiqui is not an HTTPS link -_site/es/equipo-de-proyecto.html,1277,http://github.com/nabsiddiqui is not an HTTPS link -_site/es/equipo-de-proyecto.html,1627,http://twitter.com/giulia_taurino is not an HTTPS link -_site/es/equipo-de-proyecto.html,1630,http://github.com/giuliataurino is not an HTTPS link -_site/es/equipo-de-proyecto.html,1800,http://www.alexwermercolan.com/ is not an HTTPS link -_site/es/equipo-de-proyecto.html,1806,http://twitter.com/alexwermercolan is not an HTTPS link -_site/es/equipo-de-proyecto.html,1809,http://github.com/hawc2 is not an HTTPS link -_site/es/equipo-de-proyecto.html,2055,http://www.mariajoseafanador.com is not an HTTPS link -_site/es/equipo-de-proyecto.html,2061,http://twitter.com/mariajoafana is not an HTTPS link -_site/es/equipo-de-proyecto.html,2064,http://github.com/mariajoafana is not an HTTPS link -_site/es/equipo-de-proyecto.html,2530,http://twitter.com/IsaGribomont is not an HTTPS link -_site/es/equipo-de-proyecto.html,2533,http://github.com/isag91 is not an HTTPS link -_site/es/equipo-de-proyecto.html,2741,http://twitter.com/espejolento is not an HTTPS link -_site/es/equipo-de-proyecto.html,2744,http://github.com/silviaegt is not an HTTPS link -_site/es/equipo-de-proyecto.html,3032,http://twitter.com/jenniferisve is not an HTTPS link -_site/es/equipo-de-proyecto.html,3035,http://github.com/jenniferisasi is not an HTTPS link -_site/es/equipo-de-proyecto.html,3357,http://twitter.com/enetreseles is not an HTTPS link -_site/es/equipo-de-proyecto.html,3360,http://github.com/nllano is not an HTTPS link -_site/es/equipo-de-proyecto.html,3564,http://twitter.com/jgob is not an HTTPS link -_site/es/equipo-de-proyecto.html,3567,http://github.com/joshuagob is not an HTTPS link -_site/es/equipo-de-proyecto.html,3859,http://twitter.com/rivaquiroga is not an HTTPS link -_site/es/equipo-de-proyecto.html,3862,http://github.com/rivaquiroga is not an HTTPS link -_site/es/equipo-de-proyecto.html,4153,http://github.com/nivaca is not an HTTPS link -_site/es/equipo-de-proyecto.html,4366,http://github.com/marie-flesch is not an HTTPS link -_site/es/equipo-de-proyecto.html,4509,http://github.com/matgille is not an HTTPS link -_site/es/equipo-de-proyecto.html,4742,http://github.com/mhersent is not an HTTPS link -_site/es/equipo-de-proyecto.html,4800,http://twitter.com/superHH is not an HTTPS link -_site/es/equipo-de-proyecto.html,5052,http://github.com/DMathelier is not an HTTPS link -_site/es/equipo-de-proyecto.html,5186,http://twitter.com/emilienschultz is not an HTTPS link -_site/es/equipo-de-proyecto.html,5189,http://github.com/emilienschultz is not an HTTPS link -_site/es/equipo-de-proyecto.html,5313,http://twitter.com/davvalent is not an HTTPS link -_site/es/equipo-de-proyecto.html,5316,http://github.com/davvalent is not an HTTPS link -_site/es/equipo-de-proyecto.html,5445,http://github.com/AlexandreWa is not an HTTPS link -_site/es/equipo-de-proyecto.html,5580,http://github.com/josircg is not an HTTPS link -_site/es/equipo-de-proyecto.html,5838,http://twitter.com/danielalvesfcsh is not an HTTPS link -_site/es/equipo-de-proyecto.html,5841,http://github.com/DanielAlvesLABDH is not an HTTPS link -_site/es/equipo-de-proyecto.html,6103,http://twitter.com/ericbrasiln is not an HTTPS link -_site/es/equipo-de-proyecto.html,6106,http://github.com/ericbrasiln is not an HTTPS link -_site/es/equipo-de-proyecto.html,6297,http://github.com/luisferla is not an HTTPS link -_site/es/equipo-de-proyecto.html,6539,http://twitter.com/jimmy_medeiros is not an HTTPS link -_site/es/equipo-de-proyecto.html,6542,http://github.com/JimmyMedeiros82 is not an HTTPS link -_site/es/equipo-de-proyecto.html,6777,http://github.com/joanacvp is not an HTTPS link -_site/es/equipo-de-proyecto.html,7023,http://twitter.com/araceletorres is not an HTTPS link -_site/es/equipo-de-proyecto.html,7026,http://github.com/aracele is not an HTTPS link -_site/es/equipo-de-proyecto.html,7282,http://twitter.com/j_w_baker is not an HTTPS link -_site/es/equipo-de-proyecto.html,7285,http://github.com/drjwbaker is not an HTTPS link -_site/es/equipo-de-proyecto.html,7717,http://adamcrymble.org is not an HTTPS link -_site/es/equipo-de-proyecto.html,7723,http://twitter.com/Adam_Crymble is not an HTTPS link -_site/es/equipo-de-proyecto.html,7726,http://github.com/acrymble is not an HTTPS link -_site/es/equipo-de-proyecto.html,8194,http://github.com/adamfarquhar is not an HTTPS link -_site/es/equipo-de-proyecto.html,8254,http://twitter.com/jenniferisve is not an HTTPS link -_site/es/equipo-de-proyecto.html,8257,http://github.com/jenniferisasi is not an HTTPS link -_site/es/equipo-de-proyecto.html,8585,http://twitter.com/rivaquiroga is not an HTTPS link -_site/es/equipo-de-proyecto.html,8588,http://github.com/rivaquiroga is not an HTTPS link -_site/es/equipo-de-proyecto.html,8874,http://twitter.com/amsichani is not an HTTPS link -_site/es/equipo-de-proyecto.html,8877,http://github.com/amsichani is not an HTTPS link -_site/es/equipo-de-proyecto.html,9217,http://twitter.com/AnisaHawes is not an HTTPS link -_site/es/equipo-de-proyecto.html,9220,http://github.com/anisa-hawes is not an HTTPS link -_site/es/equipo-de-proyecto.html,9429,http://github.com/charlottejmc is not an HTTPS link _site/es/eventos.html,88,'a' tag is missing a reference _site/es/eventos.html,107,'a' tag is missing a reference _site/es/eventos.html,144,'a' tag is missing a reference @@ -1711,8 +827,6 @@ _site/es/guia-editor.html,88,'a' tag is missing a reference _site/es/guia-editor.html,107,'a' tag is missing a reference _site/es/guia-editor.html,144,'a' tag is missing a reference _site/es/guia-editor.html,181,'a' tag is missing a reference -_site/es/guia-editor.html,617,http://www.europeana.eu/portal/en is not an HTTPS link -_site/es/guia-editor.html,621,http://www.loc.gov/maps/collections is not an HTTPS link _site/es/guia-para-autores.html,88,'a' tag is missing a reference _site/es/guia-para-autores.html,107,'a' tag is missing a reference _site/es/guia-para-autores.html,144,'a' tag is missing a reference @@ -1721,8 +835,6 @@ _site/es/guia-para-revisores.html,88,'a' tag is missing a reference _site/es/guia-para-revisores.html,107,'a' tag is missing a reference _site/es/guia-para-revisores.html,144,'a' tag is missing a reference _site/es/guia-para-revisores.html,181,'a' tag is missing a reference -_site/es/guia-para-revisores.html,336,http://vocabularios.caicyt.gov.ar/portalthes/index.php?v=42 is not an HTTPS link -_site/es/guia-para-revisores.html,336,http://www.mecd.gob.es/planes-nacionales/dam/jcr:f20a4ba1-0ed2-445d-9be9-b8b0382562ea/mex-glosario-interpares-total0112.pdf is not an HTTPS link _site/es/guia-para-traductores.html,88,'a' tag is missing a reference _site/es/guia-para-traductores.html,107,'a' tag is missing a reference _site/es/guia-para-traductores.html,144,'a' tag is missing a reference @@ -1735,23 +847,6 @@ _site/es/investigacion.html,88,'a' tag is missing a reference _site/es/investigacion.html,107,'a' tag is missing a reference _site/es/investigacion.html,144,'a' tag is missing a reference _site/es/investigacion.html,181,'a' tag is missing a reference -_site/es/investigacion.html,254,http://niche-canada.org/wp-content/uploads/2013/09/programming-historian-1.pdf is not an HTTPS link -_site/es/investigacion.html,260,http://www.diva-portal.org/smash/record.jsf?pid=diva2%3A1508542&dswid=7551 is not an HTTPS link -_site/es/investigacion.html,264,http://jitp.commons.gc.cuny.edu/review-of-the-programming-historian is not an HTTPS link -_site/es/investigacion.html,265,http://jitp.commons.gc.cuny.edu/review-of-the-programming-historian is not an HTTPS link -_site/es/investigacion.html,277,http://www.digitalhumanities.org/dhq/vol/16/2/000585/000585.html is not an HTTPS link -_site/es/investigacion.html,283,http://www.tandfonline.com/doi/full/10.1080/13555502.2017.1301179 is not an HTTPS link -_site/es/investigacion.html,285,http://web.archive.org/web/20180713014622/http://dhcommons.org/journal/issue-1/editorial-sustainability-and-open-peer-review-programming-historian is not an HTTPS link -_site/es/investigacion.html,286,http://www.themacroscope.org/2.0/ is not an HTTPS link -_site/es/investigacion.html,294,http://doi.org/10.5281/zenodo.3813763 is not an HTTPS link -_site/es/investigacion.html,307,http://www.iea.usp.br/eventos/historia-digital-educacao-caminhos-cruzados is not an HTTPS link -_site/es/investigacion.html,394,http://ixa2.si.ehu.eus/intele/?q=webinars is not an HTTPS link -_site/es/investigacion.html,397,http://walshbr.com/blog/the-programming-historian-and-editorial-process-in-digital-publishing/ is not an HTTPS link -_site/es/investigacion.html,410,http://fredgibbs.net/assets/images/ph-poster/final-board.png is not an HTTPS link -_site/es/investigacion.html,426,http://niche-canada.org/2018/03/23/a-decade-of-programming-historians/ is not an HTTPS link -_site/es/investigacion.html,427,http://fredgibbs.net/posts/reflections-former-PH-editor is not an HTTPS link -_site/es/investigacion.html,428,http://clionauta.hypotheses.org/16979 is not an HTTPS link -_site/es/investigacion.html,430,http://humanidadesdigitales.net/blog/2017/03/17/the-programming-historian-en-espanol/ is not an HTTPS link _site/es/jisc-tna-colaboracion.html,88,'a' tag is missing a reference _site/es/jisc-tna-colaboracion.html,107,'a' tag is missing a reference _site/es/jisc-tna-colaboracion.html,144,'a' tag is missing a reference @@ -1760,42 +855,18 @@ _site/es/lecciones/administracion-de-datos-en-r.html,119,'a' tag is missing a re _site/es/lecciones/administracion-de-datos-en-r.html,138,'a' tag is missing a reference _site/es/lecciones/administracion-de-datos-en-r.html,175,'a' tag is missing a reference _site/es/lecciones/administracion-de-datos-en-r.html,212,'a' tag is missing a reference -_site/es/lecciones/administracion-de-datos-en-r.html,564,http://hadley.nz is not an HTTPS link -_site/es/lecciones/administracion-de-datos-en-r.html,585,http://academica-e.unavarra.es/bitstream/handle/2454/15785/Gramática.pdf?sequence=1 is not an HTTPS link -_site/es/lecciones/administracion-de-datos-en-r.html,675,http://stefanbache.dk is not an HTTPS link -_site/es/lecciones/administracion-de-datos-en-r.html,675,http://hadley.nz/ is not an HTTPS link -_site/es/lecciones/administracion-de-datos-en-r.html,1042,http://academica-e.unavarra.es/bitstream/handle/2454/15785/Gramática.pdf?sequence=1 is not an HTTPS link _site/es/lecciones/analisis-de-corpus-con-antconc.html,119,'a' tag is missing a reference _site/es/lecciones/analisis-de-corpus-con-antconc.html,138,'a' tag is missing a reference _site/es/lecciones/analisis-de-corpus-con-antconc.html,175,'a' tag is missing a reference _site/es/lecciones/analisis-de-corpus-con-antconc.html,212,'a' tag is missing a reference -_site/es/lecciones/analisis-de-corpus-con-antconc.html,576,http://voyant-tools.org/ is not an HTTPS link -_site/es/lecciones/analisis-de-corpus-con-antconc.html,580,http://voyant-tools.org/ is not an HTTPS link -_site/es/lecciones/analisis-de-corpus-con-antconc.html,580,http://www.laurenceanthony.net/software/antconc/ is not an HTTPS link -_site/es/lecciones/analisis-de-corpus-con-antconc.html,580,http://www.laurenceanthony.net/ is not an HTTPS link -_site/es/lecciones/analisis-de-corpus-con-antconc.html,588,http://www.laurenceanthony.net/software/antconc/ is not an HTTPS link -_site/es/lecciones/analisis-de-corpus-con-antconc.html,590,http://www.laurenceanthony.net/software/antconc/releases/AntConc324/ is not an HTTPS link -_site/es/lecciones/analisis-de-corpus-con-antconc.html,619,http://notepad-plus-plus.org/ is not an HTTPS link -_site/es/lecciones/analisis-de-corpus-con-antconc.html,619,http://www.barebones.com/products/textwrangler/ is not an HTTPS link -_site/es/lecciones/analisis-de-corpus-con-antconc.html,628,http://www.nltk.org/ is not an HTTPS link -_site/es/lecciones/analisis-de-corpus-con-antconc.html,630,http://www.amazon.com/Developing-Linguistic-Corpora-Practice-Guides/dp/1842172050/ref=sr_1_1 is not an HTTPS link -_site/es/lecciones/analisis-de-corpus-con-antconc.html,984,http://www.wordfrequency.info/free.asp is not an HTTPS link -_site/es/lecciones/analisis-de-corpus-con-antconc.html,1047,http://www.lexically.net/downloads/version6/HTML/index.html?keyness_definition.htm is not an HTTPS link -_site/es/lecciones/analisis-de-corpus-con-antconc.html,1047,http://www.laurenceanthony.net/software/antconc/releases/AntConc335/help.pdf is not an HTTPS link -_site/es/lecciones/analisis-de-corpus-con-antconc.html,1115,http://hfroehlich.wordpress.com/2014/05/11/intro-bibliography-corpus-linguistics/ is not an HTTPS link -_site/es/lecciones/analisis-de-corpus-con-antconc.html,1117,http://hfroehli.ch/workshops/getting-started-with-antconc/ is not an HTTPS link -_site/es/lecciones/analisis-de-corpus-con-antconc.html,1132,http://elies.rediris.es/elies18/ is not an HTTPS link _site/es/lecciones/analisis-de-correspondencia-en-r.html,139,'a' tag is missing a reference _site/es/lecciones/analisis-de-correspondencia-en-r.html,158,'a' tag is missing a reference _site/es/lecciones/analisis-de-correspondencia-en-r.html,195,'a' tag is missing a reference _site/es/lecciones/analisis-de-correspondencia-en-r.html,232,'a' tag is missing a reference -_site/es/lecciones/analisis-de-correspondencia-en-r.html,739,http://doi.org/10.5281/zenodo.889846 is not an HTTPS link _site/es/lecciones/analisis-de-sentimientos-r.html,117,'a' tag is missing a reference _site/es/lecciones/analisis-de-sentimientos-r.html,136,'a' tag is missing a reference _site/es/lecciones/analisis-de-sentimientos-r.html,173,'a' tag is missing a reference _site/es/lecciones/analisis-de-sentimientos-r.html,210,'a' tag is missing a reference -_site/es/lecciones/analisis-de-sentimientos-r.html,541,http://saifmohammad.com/WebPages/NRC-Emotion-Lexicon.htm is not an HTTPS link -_site/es/lecciones/analisis-de-sentimientos-r.html,545,http://www.matthewjockers.net/page/2/ is not an HTTPS link _site/es/lecciones/analisis-redes-sociales-teatro-1.html,117,'a' tag is missing a reference _site/es/lecciones/analisis-redes-sociales-teatro-1.html,136,'a' tag is missing a reference _site/es/lecciones/analisis-redes-sociales-teatro-1.html,173,'a' tag is missing a reference @@ -1804,46 +875,27 @@ _site/es/lecciones/analisis-redes-sociales-teatro-2.html,117,'a' tag is missing _site/es/lecciones/analisis-redes-sociales-teatro-2.html,136,'a' tag is missing a reference _site/es/lecciones/analisis-redes-sociales-teatro-2.html,173,'a' tag is missing a reference _site/es/lecciones/analisis-redes-sociales-teatro-2.html,210,'a' tag is missing a reference -_site/es/lecciones/analisis-redes-sociales-teatro-2.html,1089,http://hdlab.stanford.edu/palladio/ is not an HTTPS link _site/es/lecciones/analisis-temporal-red.html,122,'a' tag is missing a reference _site/es/lecciones/analisis-temporal-red.html,141,'a' tag is missing a reference _site/es/lecciones/analisis-temporal-red.html,178,'a' tag is missing a reference _site/es/lecciones/analisis-temporal-red.html,215,'a' tag is missing a reference -_site/es/lecciones/analisis-temporal-red.html,1187,http://www.epimodel.org is not an HTTPS link _site/es/lecciones/analisis-voyant-tools.html,120,'a' tag is missing a reference _site/es/lecciones/analisis-voyant-tools.html,139,'a' tag is missing a reference _site/es/lecciones/analisis-voyant-tools.html,176,'a' tag is missing a reference _site/es/lecciones/analisis-voyant-tools.html,213,'a' tag is missing a reference -_site/es/lecciones/analisis-voyant-tools.html,550,http://vocabularios.caicyt.gov.ar/portalthes/42/term/26 is not an HTTPS link -_site/es/lecciones/analisis-voyant-tools.html,552,http://vocabularios.caicyt.gov.ar/portalthes/42/term/178 is not an HTTPS link -_site/es/lecciones/analisis-voyant-tools.html,1260,http://voyant-tools.org/ is not an HTTPS link -_site/es/lecciones/analisis-voyant-tools.html,1262,http://melissaterras.blogspot.com/2013/10/for-ada-lovelace-day-father-busas.html is not an HTTPS link _site/es/lecciones/construir-repositorio-de-fuentes.html,120,'a' tag is missing a reference _site/es/lecciones/construir-repositorio-de-fuentes.html,139,'a' tag is missing a reference _site/es/lecciones/construir-repositorio-de-fuentes.html,176,'a' tag is missing a reference _site/es/lecciones/construir-repositorio-de-fuentes.html,213,'a' tag is missing a reference -_site/es/lecciones/construir-repositorio-de-fuentes.html,555,http://www.rubenalcaraz.es/manual-omeka/ is not an HTTPS link -_site/es/lecciones/construir-repositorio-de-fuentes.html,592,http://localhost/ is not an HTTPS link -_site/es/lecciones/construir-repositorio-de-fuentes.html,592,http://127.0.0.1 is not an HTTPS link -_site/es/lecciones/construir-repositorio-de-fuentes.html,622,http://localhost/phpmyadmin/ is not an HTTPS link -_site/es/lecciones/construir-repositorio-de-fuentes.html,692,http://localhost/phpmyadmin is not an HTTPS link -_site/es/lecciones/construir-repositorio-de-fuentes.html,791,'a' tag is missing a reference -_site/es/lecciones/construir-repositorio-de-fuentes.html,914,http://omeka.org/codex/Plugin_Writing_Best_Practices#Plugin_Directory_Structure is not an HTTPS link -_site/es/lecciones/construir-repositorio-de-fuentes.html,967,http://omeka.org/codex/Plugins/DublinCoreExtended_2.0 is not an HTTPS link +_site/es/lecciones/construir-repositorio-de-fuentes.html,792,'a' tag is missing a reference _site/es/lecciones/contar-frecuencias.html,121,'a' tag is missing a reference _site/es/lecciones/contar-frecuencias.html,140,'a' tag is missing a reference _site/es/lecciones/contar-frecuencias.html,177,'a' tag is missing a reference _site/es/lecciones/contar-frecuencias.html,214,'a' tag is missing a reference -_site/es/lecciones/contar-frecuencias.html,624,http://docs.python.org/tutorial/datastructures.html#list-comprehensions is not an HTTPS link -_site/es/lecciones/contar-frecuencias.html,751,http://ir.dcs.gla.ac.uk/resources/linguistic_utils/stop_words is not an HTTPS link _site/es/lecciones/corpus-paralelo-lfaligner.html,121,'a' tag is missing a reference _site/es/lecciones/corpus-paralelo-lfaligner.html,140,'a' tag is missing a reference _site/es/lecciones/corpus-paralelo-lfaligner.html,177,'a' tag is missing a reference _site/es/lecciones/corpus-paralelo-lfaligner.html,214,'a' tag is missing a reference -_site/es/lecciones/corpus-paralelo-lfaligner.html,489,http://vocabularios.caicyt.gov.ar/portalthes/42/term/134 is not an HTTPS link -_site/es/lecciones/corpus-paralelo-lfaligner.html,499,http://vocabularios.caicyt.gov.ar/portalthes/42/term/136 is not an HTTPS link -_site/es/lecciones/corpus-paralelo-lfaligner.html,501,http://utils.mucattu.com/iso_639-1.html is not an HTTPS link -_site/es/lecciones/corpus-paralelo-lfaligner.html,1100,http://www.laurenceanthony.net/software/antpconc/ is not an HTTPS link _site/es/lecciones/creacion-de-aplicacion-shiny.html,119,'a' tag is missing a reference _site/es/lecciones/creacion-de-aplicacion-shiny.html,138,'a' tag is missing a reference _site/es/lecciones/creacion-de-aplicacion-shiny.html,175,'a' tag is missing a reference @@ -1852,23 +904,6 @@ _site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html,119, _site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html,138,'a' tag is missing a reference _site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html,175,'a' tag is missing a reference _site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html,212,'a' tag is missing a reference -_site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html,530,http://hdlab.stanford.edu/palladio/ is not an HTTPS link -_site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html,552,http://hal.archives-ouvertes.fr/docs/00/64/93/16/PDF/lemercier_A_zg.pdf is not an HTTPS link -_site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html,552,http://hal.archives-ouvertes.fr/docs/00/64/93/16/PDF/lemercier_A_zg.pdf is not an HTTPS link -_site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html,552,http://historicalnetworkresearch.org/ is not an HTTPS link -_site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html,562,http://web.archive.org/web/20180422010025/http://www.gdw-berlin.de/fileadmin/bilder/publ/publikationen_in_englischer_sprache/2006_Neuman_eng.pdf is not an HTTPS link -_site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html,562,http://martenduering.com/research/covert-networks-during-the-holocaust/ is not an HTTPS link -_site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html,695,http://hdlab.stanford.edu/palladio/ is not an HTTPS link -_site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html,771,http://hdlab.stanford.edu/doc/scenario-simple-map.pdf is not an HTTPS link -_site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html,937,http://en.wikipedia.org/wiki/Directed_graph#Indegree_and_outdegree is not an HTTPS link -_site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html,985,http://nodegoat.net/ is not an HTTPS link -_site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html,985,http://nodegoat.net/cms/UPLOAD/AsmallguidebyYanan11082014.pdf is not an HTTPS link -_site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html,987,http://www.youtube.com/watch?v=xKhYGRpbwOc is not an HTTPS link -_site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html,989,http://www.clementlevallois.net/training.html is not an HTTPS link -_site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html,989,http://www.youtube.com/watch?v=L6hHv6y5GsQ is not an HTTPS link -_site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html,993,http://pajek.imfm.si/doku.php is not an HTTPS link -_site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html,993,http://www.cambridge.org/us/academic/subjects/sociology/research-methods-sociology-and-criminology/exploratory-social-network-analysis-pajek-2nd-edition is not an HTTPS link -_site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html,1081,http://historicalnetworkresearch.org is not an HTTPS link _site/es/lecciones/crear-exposicion-con-omeka.html,121,'a' tag is missing a reference _site/es/lecciones/crear-exposicion-con-omeka.html,140,'a' tag is missing a reference _site/es/lecciones/crear-exposicion-con-omeka.html,177,'a' tag is missing a reference @@ -1877,9 +912,6 @@ _site/es/lecciones/crear-y-ver-archivos-html-con-python.html,121,'a' tag is miss _site/es/lecciones/crear-y-ver-archivos-html-con-python.html,140,'a' tag is missing a reference _site/es/lecciones/crear-y-ver-archivos-html-con-python.html,177,'a' tag is missing a reference _site/es/lecciones/crear-y-ver-archivos-html-con-python.html,214,'a' tag is missing a reference -_site/es/lecciones/crear-y-ver-archivos-html-con-python.html,572,http://zotero.org is not an HTTPS link -_site/es/lecciones/crear-y-ver-archivos-html-con-python.html,574,http://www.w3schools.com/html/default.asp is not an HTTPS link -_site/es/lecciones/crear-y-ver-archivos-html-con-python.html,580,http://www.w3schools.com/tags/tag_doctype.asp is not an HTTPS link _site/es/lecciones/datos-abiertos-enlazados-wikidata.html,119,'a' tag is missing a reference _site/es/lecciones/datos-abiertos-enlazados-wikidata.html,138,'a' tag is missing a reference _site/es/lecciones/datos-abiertos-enlazados-wikidata.html,175,'a' tag is missing a reference @@ -1888,44 +920,22 @@ _site/es/lecciones/datos-de-investigacion-con-unix.html,121,'a' tag is missing a _site/es/lecciones/datos-de-investigacion-con-unix.html,140,'a' tag is missing a reference _site/es/lecciones/datos-de-investigacion-con-unix.html,177,'a' tag is missing a reference _site/es/lecciones/datos-de-investigacion-con-unix.html,214,'a' tag is missing a reference -_site/es/lecciones/datos-de-investigacion-con-unix.html,556,http://msysgit.github.io/ is not an HTTPS link -_site/es/lecciones/datos-de-investigacion-con-unix.html,564,http://www.7-zip.org/ is not an HTTPS link -_site/es/lecciones/datos-de-investigacion-con-unix.html,580,http://en.wikipedia.org/wiki/Tab-separated_values is not an HTTPS link -_site/es/lecciones/datos-de-investigacion-con-unix.html,582,http://en.wikipedia.org/wiki/Comma-separated_values is not an HTTPS link -_site/es/lecciones/datos-de-investigacion-con-unix.html,641,http://williamjturkel.net/2013/06/15/basic-text-analysis-with-command-line-tools-in-linux/ is not an HTTPS link -_site/es/lecciones/datos-de-investigacion-con-unix.html,642,http://williamjturkel.net/2013/06/20/pattern-matching-and-permuted-term-indexing-with-command-line-tools-in-linux/ is not an HTTPS link _site/es/lecciones/datos-tabulares-en-r.html,119,'a' tag is missing a reference _site/es/lecciones/datos-tabulares-en-r.html,138,'a' tag is missing a reference _site/es/lecciones/datos-tabulares-en-r.html,175,'a' tag is missing a reference _site/es/lecciones/datos-tabulares-en-r.html,212,'a' tag is missing a reference -_site/es/lecciones/datos-tabulares-en-r.html,1046,http://web.archive.org/web/20191015004305/https://www.nceas.ucsb.edu/files/scicomp/Dloads/RProgramming/BestFirstRTutorial.pdf is not an HTTPS link -_site/es/lecciones/datos-tabulares-en-r.html,1050,http://dh-r.lincolnmullen.com is not an HTTPS link _site/es/lecciones/de-html-a-lista-de-palabras-1.html,121,'a' tag is missing a reference _site/es/lecciones/de-html-a-lista-de-palabras-1.html,140,'a' tag is missing a reference _site/es/lecciones/de-html-a-lista-de-palabras-1.html,177,'a' tag is missing a reference _site/es/lecciones/de-html-a-lista-de-palabras-1.html,214,'a' tag is missing a reference -_site/es/lecciones/de-html-a-lista-de-palabras-1.html,561,http://www.oldbaileyonline.org/browse.jsp?id=t17800628-33&div=t17800628-33 is not an HTTPS link -_site/es/lecciones/de-html-a-lista-de-palabras-1.html,565,http://www.w3schools.com/html/ is not an HTTPS link _site/es/lecciones/de-html-a-lista-de-palabras-2.html,121,'a' tag is missing a reference _site/es/lecciones/de-html-a-lista-de-palabras-2.html,140,'a' tag is missing a reference _site/es/lecciones/de-html-a-lista-de-palabras-2.html,177,'a' tag is missing a reference _site/es/lecciones/de-html-a-lista-de-palabras-2.html,214,'a' tag is missing a reference -_site/es/lecciones/de-html-a-lista-de-palabras-2.html,697,http://docs.python.org/2.4/lib/typesnumeric.html is not an HTTPS link -_site/es/lecciones/de-html-a-lista-de-palabras-2.html,709,http://docs.python.org/3/library/types.html is not an HTTPS link _site/es/lecciones/descarga-automatizada-con-wget.html,119,'a' tag is missing a reference _site/es/lecciones/descarga-automatizada-con-wget.html,138,'a' tag is missing a reference _site/es/lecciones/descarga-automatizada-con-wget.html,175,'a' tag is missing a reference _site/es/lecciones/descarga-automatizada-con-wget.html,212,'a' tag is missing a reference -_site/es/lecciones/descarga-automatizada-con-wget.html,663,http://www.gnu.org/software/wget/ is not an HTTPS link -_site/es/lecciones/descarga-automatizada-con-wget.html,663,http://ftp.gnu.org/gnu/wget/ is not an HTTPS link -_site/es/lecciones/descarga-automatizada-con-wget.html,701,http://www.gnu.org/software/wget/manual/wget.html is not an HTTPS link -_site/es/lecciones/descarga-automatizada-con-wget.html,703,http://activehistory.ca/papers/ is not an HTTPS link -_site/es/lecciones/descarga-automatizada-con-wget.html,703,http://activehistory.ca/papers/historypaper-9/ is not an HTTPS link -_site/es/lecciones/descarga-automatizada-con-wget.html,735,http://activehistory.ca/papers/ is not an HTTPS link -_site/es/lecciones/descarga-automatizada-con-wget.html,749,http://activehistory.ca/papers/ is not an HTTPS link -_site/es/lecciones/descarga-automatizada-con-wget.html,749,http://activehistory.ca/papers/historypaper-9/ is not an HTTPS link -_site/es/lecciones/descarga-automatizada-con-wget.html,749,http://uwo.ca is not an HTTPS link -_site/es/lecciones/descarga-automatizada-con-wget.html,822,http://www.gnu.org/software/wget/manual/wget.html is not an HTTPS link _site/es/lecciones/descarga-multiples-registros-usando-cadenas-de-consulta.html,119,'a' tag is missing a reference _site/es/lecciones/descarga-multiples-registros-usando-cadenas-de-consulta.html,138,'a' tag is missing a reference _site/es/lecciones/descarga-multiples-registros-usando-cadenas-de-consulta.html,175,'a' tag is missing a reference @@ -1934,59 +944,26 @@ _site/es/lecciones/editar-audio-con-audacity.html,119,'a' tag is missing a refer _site/es/lecciones/editar-audio-con-audacity.html,138,'a' tag is missing a reference _site/es/lecciones/editar-audio-con-audacity.html,175,'a' tag is missing a reference _site/es/lecciones/editar-audio-con-audacity.html,212,'a' tag is missing a reference -_site/es/lecciones/editar-audio-con-audacity.html,531,http://www.audacityteam.org/ is not an HTTPS link -_site/es/lecciones/editar-audio-con-audacity.html,543,http://www.audacityteam.org/ is not an HTTPS link -_site/es/lecciones/editar-audio-con-audacity.html,555,http://web.archive.org/web/20161119231053/http://www.indiana.edu:80/~emusic/acoustics/amplitude.htm is not an HTTPS link -_site/es/lecciones/editar-audio-con-audacity.html,601,http://www.diffen.com/difference/Mono_vs_Stereo/ is not an HTTPS link -_site/es/lecciones/editar-audio-con-audacity.html,673,http://manual.audacityteam.org/man/crossfade_clips.html is not an HTTPS link -_site/es/lecciones/editar-audio-con-audacity.html,689,http://www.nch.com.au/acm/formats.html is not an HTTPS link _site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html,121,'a' tag is missing a reference _site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html,140,'a' tag is missing a reference _site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html,177,'a' tag is missing a reference _site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html,214,'a' tag is missing a reference -_site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html,577,http://notepad-plus-plus.org is not an HTTPS link -_site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html,735,http://daringfireball.net/projects/markdown/dingus is not an HTTPS link -_site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html,855,http://editor.citationstyles.org/about/ is not an HTTPS link -_site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html,887,http://stackoverflow.com/questions/tagged/pandoc is not an HTTPS link -_site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html,887,http://web.archive.org/web/20190203062832/http://digitalhumanities.org/answers/ is not an HTTPS link -_site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html,889,http://web.archive.org/web/20140120195538/http://mashable.com/2013/06/24/markdown-tools/ is not an HTTPS link -_site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html,889,http://mouapp.com/ is not an HTTPS link -_site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html,889,http://writemonkey.com is not an HTTPS link -_site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html,889,http://www.sublimetext.com/ is not an HTTPS link -_site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html,889,http://prose.io is not an HTTPS link -_site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html,889,http://www.authorea.com is not an HTTPS link -_site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html,889,http://www.draftin.com is not an HTTPS link -_site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html,891,http://gitit.net/ is not an HTTPS link -_site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html,891,http://github.com/fauno/jekyll-pandoc-multiple-formats is not an HTTPS link -_site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html,891,http://jaspervdj.be/hakyll/ is not an HTTPS link -_site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html,893,http://readthedocs.org is not an HTTPS link -_site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html,904,http://www.antipope.org/charlie/blog-static/2013/10/why-microsoft-word-must-die.html is not an HTTPS link _site/es/lecciones/exhibicion-con-collection-builder.html,117,'a' tag is missing a reference _site/es/lecciones/exhibicion-con-collection-builder.html,136,'a' tag is missing a reference _site/es/lecciones/exhibicion-con-collection-builder.html,173,'a' tag is missing a reference _site/es/lecciones/exhibicion-con-collection-builder.html,210,'a' tag is missing a reference -_site/es/lecciones/exhibicion-con-collection-builder.html,584,http://dna.nust.na/heritage_week/ is not an HTTPS link -_site/es/lecciones/exhibicion-con-collection-builder.html,592,http://www.gimp.org.es/descargar-gimp.html is not an HTTPS link _site/es/lecciones/generadores-aventura.html,117,'a' tag is missing a reference _site/es/lecciones/generadores-aventura.html,136,'a' tag is missing a reference _site/es/lecciones/generadores-aventura.html,173,'a' tag is missing a reference _site/es/lecciones/generadores-aventura.html,210,'a' tag is missing a reference -_site/es/lecciones/generadores-aventura.html,534,http://www.spoonbill.org/n+7/ is not an HTTPS link -_site/es/lecciones/generadores-aventura.html,729,http://clementinagrillo.com/sobremesadigital/flare.json is not an HTTPS link _site/es/lecciones/georreferenciacion-visualizacion-con-recogito-y-visone.html,119,'a' tag is missing a reference _site/es/lecciones/georreferenciacion-visualizacion-con-recogito-y-visone.html,138,'a' tag is missing a reference _site/es/lecciones/georreferenciacion-visualizacion-con-recogito-y-visone.html,175,'a' tag is missing a reference _site/es/lecciones/georreferenciacion-visualizacion-con-recogito-y-visone.html,212,'a' tag is missing a reference -_site/es/lecciones/georreferenciacion-visualizacion-con-recogito-y-visone.html,774,http://visone.ethz.ch/html/download.html is not an HTTPS link -_site/es/lecciones/georreferenciacion-visualizacion-con-recogito-y-visone.html,1033,http://dx.doi.org/10.3828/mlo.v0i0.299 is not an HTTPS link _site/es/lecciones/georreferenciar-qgis.html,126,'a' tag is missing a reference _site/es/lecciones/georreferenciar-qgis.html,145,'a' tag is missing a reference _site/es/lecciones/georreferenciar-qgis.html,182,'a' tag is missing a reference _site/es/lecciones/georreferenciar-qgis.html,219,'a' tag is missing a reference -_site/es/lecciones/georreferenciar-qgis.html,550,http://en.wikipedia.org/wiki/Rubbersheeting is not an HTTPS link -_site/es/lecciones/georreferenciar-qgis.html,617,http://www.gov.pe.ca/gis/license_agreement.php3?name=lot_town&file_format=SHP is not an HTTPS link -_site/es/lecciones/georreferenciar-qgis.html,908,http://en.wikipedia.org/wiki/Tagged_Image_File_Format is not an HTTPS link -_site/es/lecciones/georreferenciar-qgis.html,1059,http://geospatialhistorian.wordpress.com/ is not an HTTPS link _site/es/lecciones/index.html,88,'a' tag is missing a reference _site/es/lecciones/index.html,107,'a' tag is missing a reference _site/es/lecciones/index.html,144,'a' tag is missing a reference @@ -1995,7 +972,6 @@ _site/es/lecciones/instalacion-linux.html,121,'a' tag is missing a reference _site/es/lecciones/instalacion-linux.html,140,'a' tag is missing a reference _site/es/lecciones/instalacion-linux.html,177,'a' tag is missing a reference _site/es/lecciones/instalacion-linux.html,214,'a' tag is missing a reference -_site/es/lecciones/instalacion-linux.html,569,http://komodoide.com/komodo-edit/ is not an HTTPS link _site/es/lecciones/instalacion-mac.html,121,'a' tag is missing a reference _site/es/lecciones/instalacion-mac.html,140,'a' tag is missing a reference _site/es/lecciones/instalacion-mac.html,177,'a' tag is missing a reference @@ -2008,40 +984,26 @@ _site/es/lecciones/instalar-modulos-python-pip.html,119,'a' tag is missing a ref _site/es/lecciones/instalar-modulos-python-pip.html,138,'a' tag is missing a reference _site/es/lecciones/instalar-modulos-python-pip.html,175,'a' tag is missing a reference _site/es/lecciones/instalar-modulos-python-pip.html,212,'a' tag is missing a reference -_site/es/lecciones/instalar-modulos-python-pip.html,561,http://www.thegeekstuff.com/2012/04/curl-examples/ is not an HTTPS link -_site/es/lecciones/instalar-modulos-python-pip.html,587,http://stackoverflow.com/questions/4750806/how-to-install-pip-on-windows is not an HTTPS link _site/es/lecciones/intro-a-google-maps-y-google-earth.html,126,'a' tag is missing a reference _site/es/lecciones/intro-a-google-maps-y-google-earth.html,145,'a' tag is missing a reference _site/es/lecciones/intro-a-google-maps-y-google-earth.html,182,'a' tag is missing a reference _site/es/lecciones/intro-a-google-maps-y-google-earth.html,219,'a' tag is missing a reference -_site/es/lecciones/intro-a-google-maps-y-google-earth.html,1248,http://www.davidrumsey.com/ is not an HTTPS link -_site/es/lecciones/intro-a-google-maps-y-google-earth.html,1516,http://niche-canada.org/2011/12/14/mobile-mapping-and-historical-gis-in-the-field/ is not an HTTPS link _site/es/lecciones/introduccion-a-bash.html,121,'a' tag is missing a reference _site/es/lecciones/introduccion-a-bash.html,140,'a' tag is missing a reference _site/es/lecciones/introduccion-a-bash.html,177,'a' tag is missing a reference _site/es/lecciones/introduccion-a-bash.html,214,'a' tag is missing a reference -_site/es/lecciones/introduccion-a-bash.html,577,http://www.tldp.org/LDP/Bash-Beginners-Guide/html/chap_01.html is not an HTTPS link -_site/es/lecciones/introduccion-a-bash.html,627,http://ethanschoonover.com/solarized is not an HTTPS link -_site/es/lecciones/introduccion-a-bash.html,779,http://www.viemu.com/a-why-vi-vim.html is not an HTTPS link -_site/es/lecciones/introduccion-a-bash.html,791,http://www.gutenberg.org/ebooks/2600 is not an HTTPS link -_site/es/lecciones/introduccion-a-bash.html,887,http://vimdoc.sourceforge.net/htmldoc/quickref.html is not an HTTPS link _site/es/lecciones/introduccion-a-ffmpeg.html,119,'a' tag is missing a reference _site/es/lecciones/introduccion-a-ffmpeg.html,138,'a' tag is missing a reference _site/es/lecciones/introduccion-a-ffmpeg.html,175,'a' tag is missing a reference _site/es/lecciones/introduccion-a-ffmpeg.html,212,'a' tag is missing a reference -_site/es/lecciones/introduccion-a-ffmpeg.html,668,http://linuxbrew.sh/ is not an HTTPS link -_site/es/lecciones/introduccion-a-ffmpeg.html,1130,http://tldp.org/LDP/GNU-Linux-Tools-Summary/html/x11655.htm is not an HTTPS link _site/es/lecciones/introduccion-a-imageplot-y-la-visualizacion-de-metadatos.html,121,'a' tag is missing a reference _site/es/lecciones/introduccion-a-imageplot-y-la-visualizacion-de-metadatos.html,140,'a' tag is missing a reference _site/es/lecciones/introduccion-a-imageplot-y-la-visualizacion-de-metadatos.html,177,'a' tag is missing a reference _site/es/lecciones/introduccion-a-imageplot-y-la-visualizacion-de-metadatos.html,214,'a' tag is missing a reference -_site/es/lecciones/introduccion-a-imageplot-y-la-visualizacion-de-metadatos.html,523,http://lab.culturalanalytics.info/ is not an HTTPS link -_site/es/lecciones/introduccion-a-imageplot-y-la-visualizacion-de-metadatos.html,533,http://ladi.lib.utexas.edu/ is not an HTTPS link _site/es/lecciones/introduccion-a-markdown.html,122,'a' tag is missing a reference _site/es/lecciones/introduccion-a-markdown.html,141,'a' tag is missing a reference _site/es/lecciones/introduccion-a-markdown.html,178,'a' tag is missing a reference _site/es/lecciones/introduccion-a-markdown.html,215,'a' tag is missing a reference -_site/es/lecciones/introduccion-a-markdown.html,541,http://daringfireball.net/projects/markdown/ is not an HTTPS link _site/es/lecciones/introduccion-a-markdown.html,748,http://programminghistorian.org/ is not an HTTPS link _site/es/lecciones/introduccion-a-powershell.html,119,'a' tag is missing a reference _site/es/lecciones/introduccion-a-powershell.html,138,'a' tag is missing a reference @@ -2059,45 +1021,20 @@ _site/es/lecciones/introduccion-al-web-scraping-usando-r.html,117,'a' tag is mis _site/es/lecciones/introduccion-al-web-scraping-usando-r.html,136,'a' tag is missing a reference _site/es/lecciones/introduccion-al-web-scraping-usando-r.html,173,'a' tag is missing a reference _site/es/lecciones/introduccion-al-web-scraping-usando-r.html,210,'a' tag is missing a reference -_site/es/lecciones/introduccion-al-web-scraping-usando-r.html,677,http://www.memoriachilena.gob.cl/ is not an HTTPS link _site/es/lecciones/introduccion-datos-abiertos-enlazados.html,119,'a' tag is missing a reference _site/es/lecciones/introduccion-datos-abiertos-enlazados.html,138,'a' tag is missing a reference _site/es/lecciones/introduccion-datos-abiertos-enlazados.html,175,'a' tag is missing a reference _site/es/lecciones/introduccion-datos-abiertos-enlazados.html,212,'a' tag is missing a reference -_site/es/lecciones/introduccion-datos-abiertos-enlazados.html,556,http://linkeddatacatalog.dws.informatik.uni-mannheim.de/state/ is not an HTTPS link -_site/es/lecciones/introduccion-datos-abiertos-enlazados.html,591,http://www.oxforddnb.com is not an HTTPS link -_site/es/lecciones/introduccion-datos-abiertos-enlazados.html,603,http://www.geonames.org/ is not an HTTPS link -_site/es/lecciones/introduccion-datos-abiertos-enlazados.html,701,http://www.history.ac.uk/projects/digital/tobias is not an HTTPS link -_site/es/lecciones/introduccion-datos-abiertos-enlazados.html,733,http://semanticweb.org/wiki/Main_Page.html is not an HTTPS link -_site/es/lecciones/introduccion-datos-abiertos-enlazados.html,738,http://web.archive.org/web/20170715094229/http://www.musicontology.com/ is not an HTTPS link -_site/es/lecciones/introduccion-datos-abiertos-enlazados.html,738,http://web.archive.org/web/20170718143925/http://musicontology.com/docs/getting-started.html is not an HTTPS link -_site/es/lecciones/introduccion-datos-abiertos-enlazados.html,748,http://wiki.dbpedia.org is not an HTTPS link -_site/es/lecciones/introduccion-datos-abiertos-enlazados.html,853,http://web.archive.org/web/20170718143925/http://musicontology.com/docs/getting-started.html is not an HTTPS link -_site/es/lecciones/introduccion-datos-abiertos-enlazados.html,887,http://www.history.ac.uk/projects/digital/tobias is not an HTTPS link -_site/es/lecciones/introduccion-datos-abiertos-enlazados.html,907,http://www.easyrdf.org/converter is not an HTTPS link -_site/es/lecciones/introduccion-datos-abiertos-enlazados.html,919,http://dbpedia.org/snorql/ is not an HTTPS link -_site/es/lecciones/introduccion-datos-abiertos-enlazados.html,1019,http://dbpedia.org/class/yago/WikicatBritishHistorians is not an HTTPS link -_site/es/lecciones/introduccion-datos-abiertos-enlazados.html,1019,http://dbpedia.org/class/yago/WikicatWomenHistorians is not an HTTPS link _site/es/lecciones/introduccion-datos-abiertos-enlazados.html,1048,'a' tag is missing a reference _site/es/lecciones/introduccion-datos-abiertos-enlazados.html,1048,'a' tag is missing a reference -_site/es/lecciones/introduccion-datos-abiertos-enlazados.html,1065,http://www.snee.com/bobdc.blog/ is not an HTTPS link -_site/es/lecciones/introduccion-datos-abiertos-enlazados.html,1077,http://linkeddata.org/guides-and-tutorials is not an HTTPS link -_site/es/lecciones/introduccion-datos-abiertos-enlazados.html,1083,http://linkeddatacatalog.dws.informatik.uni-mannheim.de/state/ is not an HTTPS link -_site/es/lecciones/introduccion-datos-abiertos-enlazados.html,1089,http://datos.bcn.cl/es/informacion/que-es is not an HTTPS link -_site/es/lecciones/introduccion-datos-abiertos-enlazados.html,1098,http://www.history.ac.uk/projects/digital/tobias is not an HTTPS link -_site/es/lecciones/introduccion-datos-abiertos-enlazados.html,1098,http://www.ahrc.ac.uk/ is not an HTTPS link _site/es/lecciones/introduccion-e-instalacion.html,121,'a' tag is missing a reference _site/es/lecciones/introduccion-e-instalacion.html,140,'a' tag is missing a reference _site/es/lecciones/introduccion-e-instalacion.html,177,'a' tag is missing a reference _site/es/lecciones/introduccion-e-instalacion.html,214,'a' tag is missing a reference -_site/es/lecciones/introduccion-e-instalacion.html,557,http://komodoide.com/komodo-edit/ is not an HTTPS link _site/es/lecciones/introduccion-map-warper.html,122,'a' tag is missing a reference _site/es/lecciones/introduccion-map-warper.html,141,'a' tag is missing a reference _site/es/lecciones/introduccion-map-warper.html,178,'a' tag is missing a reference _site/es/lecciones/introduccion-map-warper.html,215,'a' tag is missing a reference -_site/es/lecciones/introduccion-map-warper.html,521,http://bibliotecanacional.gov.co/es-co/colecciones/biblioteca-digital/mapoteca is not an HTTPS link -_site/es/lecciones/introduccion-map-warper.html,521,http://cartografia.bogotaendocumentos.com/ is not an HTTPS link -_site/es/lecciones/introduccion-map-warper.html,586,http://catalogoenlinea.bibliotecanacional.gov.co/custom/web/content/mapoteca/fmapoteca_984_figac_16/fmapoteca_984_figac_16.html is not an HTTPS link _site/es/lecciones/lectura-escalable-de-datos-estructurados.html,127,'a' tag is missing a reference _site/es/lecciones/lectura-escalable-de-datos-estructurados.html,146,'a' tag is missing a reference _site/es/lecciones/lectura-escalable-de-datos-estructurados.html,183,'a' tag is missing a reference @@ -2106,12 +1043,6 @@ _site/es/lecciones/limpieza-de-datos-con-OpenRefine.html,123,'a' tag is missing _site/es/lecciones/limpieza-de-datos-con-OpenRefine.html,142,'a' tag is missing a reference _site/es/lecciones/limpieza-de-datos-con-OpenRefine.html,179,'a' tag is missing a reference _site/es/lecciones/limpieza-de-datos-con-OpenRefine.html,216,'a' tag is missing a reference -_site/es/lecciones/limpieza-de-datos-con-OpenRefine.html,570,http://openrefine.org/ is not an HTTPS link -_site/es/lecciones/limpieza-de-datos-con-OpenRefine.html,591,http://vis.stanford.edu/papers/wrangler/ is not an HTTPS link -_site/es/lecciones/limpieza-de-datos-con-OpenRefine.html,591,http://openrefine.org/ is not an HTTPS link -_site/es/lecciones/limpieza-de-datos-con-OpenRefine.html,593,http://en.wikipedia.org/wiki/Named-entity_recognition is not an HTTPS link -_site/es/lecciones/limpieza-de-datos-con-OpenRefine.html,593,http://www.loc.gov/index.html is not an HTTPS link -_site/es/lecciones/limpieza-de-datos-con-OpenRefine.html,593,http://www.oclc.org/home.en.html is not an HTTPS link _site/es/lecciones/manipular-cadenas-de-caracteres-en-python.html,121,'a' tag is missing a reference _site/es/lecciones/manipular-cadenas-de-caracteres-en-python.html,140,'a' tag is missing a reference _site/es/lecciones/manipular-cadenas-de-caracteres-en-python.html,177,'a' tag is missing a reference @@ -2120,31 +1051,10 @@ _site/es/lecciones/mineria-de-datos-en-internet-archive.html,119,'a' tag is miss _site/es/lecciones/mineria-de-datos-en-internet-archive.html,138,'a' tag is missing a reference _site/es/lecciones/mineria-de-datos-en-internet-archive.html,175,'a' tag is missing a reference _site/es/lecciones/mineria-de-datos-en-internet-archive.html,212,'a' tag is missing a reference -_site/es/lecciones/mineria-de-datos-en-internet-archive.html,531,http://archive.org/ is not an HTTPS link -_site/es/lecciones/mineria-de-datos-en-internet-archive.html,532,http://activehistory.ca/2013/09/the-internet-archive-rocks-or-two-million-plus-free-sources-to-explore/ is not an HTTPS link -_site/es/lecciones/mineria-de-datos-en-internet-archive.html,536,http://archive.org/details/bplscas is not an HTTPS link -_site/es/lecciones/mineria-de-datos-en-internet-archive.html,567,http://archive.org/ is not an HTTPS link -_site/es/lecciones/mineria-de-datos-en-internet-archive.html,569,http://archive.org/details/lettertowilliaml00doug is not an HTTPS link -_site/es/lecciones/mineria-de-datos-en-internet-archive.html,570,http://archive.org/stream/lettertowilliaml00doug/39999066767938#page/n0/mode/2up is not an HTTPS link -_site/es/lecciones/mineria-de-datos-en-internet-archive.html,570,http://archive.org/download/lettertowilliaml00doug is not an HTTPS link -_site/es/lecciones/mineria-de-datos-en-internet-archive.html,570,http://archive.org/download/lettertowilliaml00doug/lettertowilliaml00doug_dc.xml is not an HTTPS link -_site/es/lecciones/mineria-de-datos-en-internet-archive.html,570,http://archive.org/download/lettertowilliaml00doug/lettertowilliaml00doug_marc.xml is not an HTTPS link -_site/es/lecciones/mineria-de-datos-en-internet-archive.html,570,http://www.loc.gov/marc/bibliographic/ is not an HTTPS link -_site/es/lecciones/mineria-de-datos-en-internet-archive.html,572,http://archive.org/search.php?query=collection%3Abplscas&sort=-publicdate is not an HTTPS link -_site/es/lecciones/mineria-de-datos-en-internet-archive.html,591,http://blog.archive.org/2011/03/31/how-archive-org-items-are-structured/ is not an HTTPS link -_site/es/lecciones/mineria-de-datos-en-internet-archive.html,595,http://internetarchive.readthedocs.io/en/latest/quickstart.html#searching is not an HTTPS link -_site/es/lecciones/mineria-de-datos-en-internet-archive.html,609,http://archive.org/search.php?query=collection%3Abplscas is not an HTTPS link -_site/es/lecciones/mineria-de-datos-en-internet-archive.html,613,http://internetarchive.readthedocs.io/en/latest/quickstart.html#downloading is not an HTTPS link -_site/es/lecciones/mineria-de-datos-en-internet-archive.html,698,http://internetarchive.readthedocs.io/en/latest/quickstart.html#downloading is not an HTTPS link -_site/es/lecciones/mineria-de-datos-en-internet-archive.html,741,http://docs.python.org/2/tutorial/errors.html#handling-exceptions is not an HTTPS link -_site/es/lecciones/mineria-de-datos-en-internet-archive.html,785,http://archive.org/download/lettertowilliaml00doug/lettertowilliaml00doug_marc.xml is not an HTTPS link -_site/es/lecciones/mineria-de-datos-en-internet-archive.html,787,http://www.loc.gov/marc/bibliographic/bd260.html is not an HTTPS link -_site/es/lecciones/mineria-de-datos-en-internet-archive.html,787,http://www.loc.gov/marc/ is not an HTTPS link _site/es/lecciones/normalizar-datos.html,121,'a' tag is missing a reference _site/es/lecciones/normalizar-datos.html,140,'a' tag is missing a reference _site/es/lecciones/normalizar-datos.html,177,'a' tag is missing a reference _site/es/lecciones/normalizar-datos.html,214,'a' tag is missing a reference -_site/es/lecciones/normalizar-datos.html,684,http://unicode.org/ is not an HTTPS link _site/es/lecciones/palabras-clave-en-contexto-n-grams.html,121,'a' tag is missing a reference _site/es/lecciones/palabras-clave-en-contexto-n-grams.html,140,'a' tag is missing a reference _site/es/lecciones/palabras-clave-en-contexto-n-grams.html,177,'a' tag is missing a reference @@ -2153,23 +1063,10 @@ _site/es/lecciones/poniendo-omeka-a-funcionar.html,119,'a' tag is missing a refe _site/es/lecciones/poniendo-omeka-a-funcionar.html,138,'a' tag is missing a reference _site/es/lecciones/poniendo-omeka-a-funcionar.html,175,'a' tag is missing a reference _site/es/lecciones/poniendo-omeka-a-funcionar.html,212,'a' tag is missing a reference -_site/es/lecciones/poniendo-omeka-a-funcionar.html,548,http://www.omeka.net is not an HTTPS link -_site/es/lecciones/poniendo-omeka-a-funcionar.html,572,http://www.omeka.net is not an HTTPS link -_site/es/lecciones/poniendo-omeka-a-funcionar.html,949,http://info.omeka.net is not an HTTPS link _site/es/lecciones/preservar-datos-de-investigacion.html,119,'a' tag is missing a reference _site/es/lecciones/preservar-datos-de-investigacion.html,138,'a' tag is missing a reference _site/es/lecciones/preservar-datos-de-investigacion.html,175,'a' tag is missing a reference _site/es/lecciones/preservar-datos-de-investigacion.html,212,'a' tag is missing a reference -_site/es/lecciones/preservar-datos-de-investigacion.html,601,http://notepad-plus-plus.org/ is not an HTTPS link -_site/es/lecciones/preservar-datos-de-investigacion.html,601,http://komodoide.com/komodo-edit/ is not an HTTPS link -_site/es/lecciones/preservar-datos-de-investigacion.html,630,http://cradledincaricature.com/2014/02/06/comic-art-beyond-the-print-shop/ is not an HTTPS link -_site/es/lecciones/preservar-datos-de-investigacion.html,637,http://www.theguardian.com/uk-news/2014/feb/20/rebekah-brooks-rupert-murdoch-phone-hacking-trial is not an HTTPS link -_site/es/lecciones/preservar-datos-de-investigacion.html,644,http://www.cartoons.ac.uk/record/SBD0931 is not an HTTPS link -_site/es/lecciones/preservar-datos-de-investigacion.html,651,http://www.oldbaileyonline.org/browse.jsp?ref=OA16780417 is not an HTTPS link -_site/es/lecciones/preservar-datos-de-investigacion.html,758,http://historyonics.blogspot.co.uk/2014/01/judging-book-by-its-url.html is not an HTTPS link -_site/es/lecciones/preservar-datos-de-investigacion.html,762,http://earlymodernnotes.wordpress.com/2013/05/18/unclean-unclean-what-historians-can-do-about-sharing-our-messy-research-data/ is not an HTTPS link -_site/es/lecciones/preservar-datos-de-investigacion.html,775,http://britishlibrary.typepad.co.uk/collectioncare/2013/09/the-twelve-principles-of-digital-preservation.html is not an HTTPS link -_site/es/lecciones/preservar-datos-de-investigacion.html,784,http://data-archive.ac.uk/create-manage/document is not an HTTPS link _site/es/lecciones/procesamiento-basico-de-textos-en-r.html,121,'a' tag is missing a reference _site/es/lecciones/procesamiento-basico-de-textos-en-r.html,140,'a' tag is missing a reference _site/es/lecciones/procesamiento-basico-de-textos-en-r.html,177,'a' tag is missing a reference @@ -2178,38 +1075,14 @@ _site/es/lecciones/publicar-archivos-tei-ceteicean.html,119,'a' tag is missing a _site/es/lecciones/publicar-archivos-tei-ceteicean.html,138,'a' tag is missing a reference _site/es/lecciones/publicar-archivos-tei-ceteicean.html,175,'a' tag is missing a reference _site/es/lecciones/publicar-archivos-tei-ceteicean.html,212,'a' tag is missing a reference -_site/es/lecciones/publicar-archivos-tei-ceteicean.html,479,http://www.jedit.org/ is not an HTTPS link -_site/es/lecciones/publicar-archivos-tei-ceteicean.html,503,http://hdlab.space/La-Argentina-Manuscrita is not an HTTPS link -_site/es/lecciones/publicar-archivos-tei-ceteicean.html,505,http://hdlab.space/La-Argentina-Manuscrita/assets/Ruy_Diaz-La_argentina_manuscrita.tei.xml is not an HTTPS link -_site/es/lecciones/publicar-archivos-tei-ceteicean.html,774,http://teic.github.io/CETEIcean/ is not an HTTPS link _site/es/lecciones/retirada/introduccion-control-versiones-github-desktop.html,89,'a' tag is missing a reference _site/es/lecciones/retirada/introduccion-control-versiones-github-desktop.html,108,'a' tag is missing a reference _site/es/lecciones/retirada/introduccion-control-versiones-github-desktop.html,145,'a' tag is missing a reference _site/es/lecciones/retirada/introduccion-control-versiones-github-desktop.html,182,'a' tag is missing a reference -_site/es/lecciones/retirada/introduccion-control-versiones-github-desktop.html,649,http://flight-manual.atom.io/ is not an HTTPS link -_site/es/lecciones/retirada/introduccion-control-versiones-github-desktop.html,749,http://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html is not an HTTPS link _site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html,90,'a' tag is missing a reference _site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html,109,'a' tag is missing a reference _site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html,146,'a' tag is missing a reference _site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html,183,'a' tag is missing a reference -_site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html,545,http://collection.britishmuseum.org/ is not an HTTPS link -_site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html,545,http://labs.europeana.eu/api/linked-open-data-introduction is not an HTTPS link -_site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html,545,http://americanart.si.edu/ is not an HTTPS link -_site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html,545,http://britishart.yale.edu/collections/using-collections/technology/linked-open-data is not an HTTPS link -_site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html,545,http://vocab.getty.edu/ is not an HTTPS link -_site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html,597,http://hdlab.stanford.edu/palladio/ is not an HTTPS link -_site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html,754,http://collection.britishmuseum.org/sparql is not an HTTPS link -_site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html,772,http://collection.britishmuseum.org/resource?uri=http://collection.britishmuseum.org/id/object/PPA82633 is not an HTTPS link -_site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html,782,http://collection.britishmuseum.org/sparql?query=SELECT+*%0D%0AWHERE+%7B%0D%0A++%3Chttp%3A%2F%2Fcollection.britishmuseum.org%2Fid%2Fobject%2FPPA82633%3E+%3Fp+%3Fo+.%0D%0A++%7D&_implicit=false&_equivalent=false&_form=%2Fsparql is not an HTTPS link -_site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html,1013,http://sparql.europeana.eu/ is not an HTTPS link -_site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html,1013,http://wiki.dbpedia.org/ is not an HTTPS link -_site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html,1013,http://sws.geonames.org/ is not an HTTPS link -_site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html,1013,http://sparql.europeana.eu is not an HTTPS link -_site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html,1064,http://www.getty.edu/research/ is not an HTTPS link -_site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html,1074,http://openrefine.org/ is not an HTTPS link -_site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html,1081,http://hdlab.stanford.edu/palladio/ is not an HTTPS link -_site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html,1133,http://labs.europeana.eu/api/linked-open-data-SPARQL-endpoint is not an HTTPS link -_site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html,1134,http://vocab.getty.edu/queries#Finding_Subjects is not an HTTPS link _site/es/lecciones/reutilizacion-de-codigo-y-modularidad.html,121,'a' tag is missing a reference _site/es/lecciones/reutilizacion-de-codigo-y-modularidad.html,140,'a' tag is missing a reference _site/es/lecciones/reutilizacion-de-codigo-y-modularidad.html,177,'a' tag is missing a reference @@ -2218,14 +1091,6 @@ _site/es/lecciones/reutilizando-colecciones-digitales-glam-labs.html,123,'a' tag _site/es/lecciones/reutilizando-colecciones-digitales-glam-labs.html,142,'a' tag is missing a reference _site/es/lecciones/reutilizando-colecciones-digitales-glam-labs.html,179,'a' tag is missing a reference _site/es/lecciones/reutilizando-colecciones-digitales-glam-labs.html,216,'a' tag is missing a reference -_site/es/lecciones/reutilizando-colecciones-digitales-glam-labs.html,497,http://labs.bl.uk is not an HTTPS link -_site/es/lecciones/reutilizando-colecciones-digitales-glam-labs.html,515,http://rua.ua.es/dspace/handle/10045/110281 is not an HTTPS link -_site/es/lecciones/reutilizando-colecciones-digitales-glam-labs.html,523,http://data.cervantesvirtual.com/blog/notebooks/ is not an HTTPS link -_site/es/lecciones/reutilizando-colecciones-digitales-glam-labs.html,542,http://api.bnf.fr/ is not an HTTPS link -_site/es/lecciones/reutilizando-colecciones-digitales-glam-labs.html,557,http://data.cervantesvirtual.com/blog/labs is not an HTTPS link -_site/es/lecciones/reutilizando-colecciones-digitales-glam-labs.html,607,http://www.cervantesvirtual.com/ is not an HTTPS link -_site/es/lecciones/reutilizando-colecciones-digitales-glam-labs.html,789,http://bibliontology.com/ is not an HTTPS link -_site/es/lecciones/reutilizando-colecciones-digitales-glam-labs.html,1105,http://www.rdaregistry.info is not an HTTPS link _site/es/lecciones/salida-de-datos-como-archivo-html.html,121,'a' tag is missing a reference _site/es/lecciones/salida-de-datos-como-archivo-html.html,140,'a' tag is missing a reference _site/es/lecciones/salida-de-datos-como-archivo-html.html,177,'a' tag is missing a reference @@ -2277,7 +1142,6 @@ _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,605,'a' tag i _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,606,'a' tag is missing a reference _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,611,'a' tag is missing a reference _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,615,'a' tag is missing a reference -_site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,659,http://jekyllrb.com/docs/home/ is not an HTTPS link _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,661,'a' tag is missing a reference _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,667,'a' tag is missing a reference _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,712,'a' tag is missing a reference @@ -2285,20 +1149,16 @@ _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,726,'a' tag i _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,732,'a' tag is missing a reference _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,750,'a' tag is missing a reference _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,768,'a' tag is missing a reference -_site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,770,http://www.barebones.com/products/bbedit is not an HTTPS link _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,774,'a' tag is missing a reference _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,846,'a' tag is missing a reference _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,850,'a' tag is missing a reference _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,860,'a' tag is missing a reference -_site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,862,http://brew.sh/ is not an HTTPS link _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,922,'a' tag is missing a reference -_site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,924,http://brew.sh/ is not an HTTPS link _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,947,'a' tag is missing a reference _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,969,'a' tag is missing a reference _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,977,'a' tag is missing a reference _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,987,'a' tag is missing a reference _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,1010,'a' tag is missing a reference -_site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,1081,http://amandavisconti.github.io/JekyllDemo/ is not an HTTPS link _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,1111,'a' tag is missing a reference _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,1167,'a' tag is missing a reference _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,1184,'a' tag is missing a reference @@ -2306,86 +1166,41 @@ _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,1188,'a' tag _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,1310,'a' tag is missing a reference _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,1343,'a' tag is missing a reference _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,1349,'a' tag is missing a reference -_site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,1353,http://kramdown.gettalong.org/quickref.html is not an HTTPS link -_site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,1355,http://kramdown.gettalong.org/quickref.html is not an HTTPS link -_site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,1357,http://www.typora.io/ is not an HTTPS link _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,1359,'a' tag is missing a reference -_site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,1393,http://amandavisconti.github.io/JekyllDemo/resume/ is not an HTTPS link _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,1395,'a' tag is missing a reference -_site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,1471,http://raw.githubusercontent.com/amandavisconti/JekyllDemo/gh-pages/_posts/2016-02-29-a-post-about-my-research.markdown is not an HTTPS link _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,1473,'a' tag is missing a reference _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,1677,'a' tag is missing a reference _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,1681,'a' tag is missing a reference -_site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,1690,http://jekyllthemes.org/ is not an HTTPS link -_site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,1691,http://jekyllthemes.io/ is not an HTTPS link _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,1694,'a' tag is missing a reference -_site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,1698,http://jekyllrb.com/docs/plugins/ is not an HTTPS link -_site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,1704,http://jekyllrb.com/docs/plugins/ is not an HTTPS link -_site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,1704,http://jekyllrb.com/docs/plugins/ is not an HTTPS link -_site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,1707,http://literaturegeek.com/ is not an HTTPS link _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,1719,'a' tag is missing a reference _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,1739,'a' tag is missing a reference _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,1741,'a' tag is missing a reference -_site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,1743,http://stackexchange.com/ is not an HTTPS link _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,1745,'a' tag is missing a reference _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,1749,'a' tag is missing a reference -_site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,1755,http://jekyll-windows.juthilo.com/ is not an HTTPS link _site/es/lecciones/topic-modeling-y-mallet.html,123,'a' tag is missing a reference _site/es/lecciones/topic-modeling-y-mallet.html,142,'a' tag is missing a reference _site/es/lecciones/topic-modeling-y-mallet.html,179,'a' tag is missing a reference _site/es/lecciones/topic-modeling-y-mallet.html,216,'a' tag is missing a reference -_site/es/lecciones/topic-modeling-y-mallet.html,565,http://mallet.cs.umass.edu/mailinglist.php is not an HTTPS link -_site/es/lecciones/topic-modeling-y-mallet.html,571,http://www.cs.umbc.edu/~hillol/NGDM07/abstracts/talks/MKirschenbaum.pdf is not an HTTPS link -_site/es/lecciones/topic-modeling-y-mallet.html,571,http://www.worldcat.org/title/reading-machines-toward-an-algorithmic-criticism/oclc/708761605&referer=brief_results is not an HTTPS link -_site/es/lecciones/topic-modeling-y-mallet.html,573,http://voyant-tools.org/ is not an HTTPS link -_site/es/lecciones/topic-modeling-y-mallet.html,591,http://arxiv.org/abs/1003.6087/ is not an HTTPS link -_site/es/lecciones/topic-modeling-y-mallet.html,598,http://dsl.richmond.edu/dispatch/ is not an HTTPS link -_site/es/lecciones/topic-modeling-y-mallet.html,605,http://mallet.cs.umass.edu/index.php is not an HTTPS link -_site/es/lecciones/topic-modeling-y-mallet.html,628,http://mallet.cs.umass.edu/index.php is not an HTTPS link -_site/es/lecciones/topic-modeling-y-mallet.html,628,http://mallet.cs.umass.edu/download.php is not an HTTPS link -_site/es/lecciones/topic-modeling-y-mallet.html,629,http://www.oracle.com/technetwork/java/javase/downloads/index.html is not an HTTPS link -_site/es/lecciones/topic-modeling-y-mallet.html,768,http://mallet.cs.umass.edu/download.php is not an HTTPS link -_site/es/lecciones/topic-modeling-y-mallet.html,769,http://www.oracle.com/technetwork/java/javase/downloads/index.html is not an HTTPS link -_site/es/lecciones/topic-modeling-y-mallet.html,990,http://dsl.richmond.edu/dispatch/ is not an HTTPS link -_site/es/lecciones/topic-modeling-y-mallet.html,997,http://electricarchaeology.ca/2012/07/09/mining-a-day-of-archaeology/ is not an HTTPS link -_site/es/lecciones/topic-modeling-y-mallet.html,1001,http://electricarchaeology.ca/2012/06/08/mining-the-open-web-with-looted-heritage-draft/ is not an HTTPS link -_site/es/lecciones/topic-modeling-y-mallet.html,1008,http://tedunderwood.wordpress.com/2012/04/07/topic-modeling-made-just-simple-enough/ is not an HTTPS link -_site/es/lecciones/topic-modeling-y-mallet.html,1009,http://web.archive.org/web/20160704150726/http://www.lisarhody.com:80/some-assembly-required/ is not an HTTPS link -_site/es/lecciones/topic-modeling-y-mallet.html,1011,http://dl.acm.org/citation.cfm?id=944937 is not an HTTPS link -_site/es/lecciones/topic-modeling-y-mallet.html,1012,http://mimno.infosci.cornell.edu/topics.html is not an HTTPS link -_site/es/lecciones/topic-modeling-y-mallet.html,1012,http://www.perseus.tufts.edu/publications/02-jocch-mimno.pdf is not an HTTPS link -_site/es/lecciones/topic-modeling-y-mallet.html,1055,http://www.morethanbooks.eu/topic-modeling-introduccion/ is not an HTTPS link _site/es/lecciones/trabajar-con-archivos-de-texto.html,121,'a' tag is missing a reference _site/es/lecciones/trabajar-con-archivos-de-texto.html,140,'a' tag is missing a reference _site/es/lecciones/trabajar-con-archivos-de-texto.html,177,'a' tag is missing a reference _site/es/lecciones/trabajar-con-archivos-de-texto.html,214,'a' tag is missing a reference -_site/es/lecciones/trabajar-con-archivos-de-texto.html,672,"http://en.wikibooks.org/wiki/Non-Programmer%27s_Tutorial_for_Python_2.6/Hello,_World is not an HTTPS link" _site/es/lecciones/trabajar-con-paginas-web.html,121,'a' tag is missing a reference _site/es/lecciones/trabajar-con-paginas-web.html,140,'a' tag is missing a reference _site/es/lecciones/trabajar-con-paginas-web.html,177,'a' tag is missing a reference _site/es/lecciones/trabajar-con-paginas-web.html,214,'a' tag is missing a reference -_site/es/lecciones/trabajar-con-paginas-web.html,582,http://oldbaileyonline.org is not an HTTPS link -_site/es/lecciones/trabajar-con-paginas-web.html,623,http://en.wikipedia.org/wiki/Gordon_Riots is not an HTTPS link -_site/es/lecciones/trabajar-con-paginas-web.html,646,http://www.oldbaileyonline.org/browse.jsp?foo=bar&path=sessionsPapers/17800628.xml&div=t17800628-33&xml=yes is not an HTTPS link -_site/es/lecciones/trabajar-con-paginas-web.html,646,http://www.oldbaileyonline.org/images.jsp?doc=178006280084 is not an HTTPS link _site/es/lecciones/transformacion-datos-xml-xsl.html,119,'a' tag is missing a reference _site/es/lecciones/transformacion-datos-xml-xsl.html,138,'a' tag is missing a reference _site/es/lecciones/transformacion-datos-xml-xsl.html,175,'a' tag is missing a reference _site/es/lecciones/transformacion-datos-xml-xsl.html,212,'a' tag is missing a reference -_site/es/lecciones/transformacion-datos-xml-xsl.html,777,http://scissors-and-paste.net is not an HTTPS link -_site/es/lecciones/transformacion-datos-xml-xsl.html,875,http://www.w3.org/ is not an HTTPS link _site/es/lecciones/uso-las-colecciones-hathitrust-mineria-textual-R.html,117,'a' tag is missing a reference _site/es/lecciones/uso-las-colecciones-hathitrust-mineria-textual-R.html,136,'a' tag is missing a reference _site/es/lecciones/uso-las-colecciones-hathitrust-mineria-textual-R.html,173,'a' tag is missing a reference _site/es/lecciones/uso-las-colecciones-hathitrust-mineria-textual-R.html,210,'a' tag is missing a reference -_site/es/lecciones/uso-las-colecciones-hathitrust-mineria-textual-R.html,545,http://hdl.handle.net/2027/uc1.31175010656638 is not an HTTPS link _site/es/lecciones/ver-archivos-html.html,121,'a' tag is missing a reference _site/es/lecciones/ver-archivos-html.html,140,'a' tag is missing a reference _site/es/lecciones/ver-archivos-html.html,177,'a' tag is missing a reference _site/es/lecciones/ver-archivos-html.html,214,'a' tag is missing a reference -_site/es/lecciones/ver-archivos-html.html,591,http://www.w3schools.com/html/default.asp is not an HTTPS link -_site/es/lecciones/ver-archivos-html.html,643,http://www.w3schools.com/html/default.asp is not an HTTPS link -_site/es/lecciones/ver-archivos-html.html,644,http://www.w3schools.com/html/html5_intro.asp is not an HTTPS link _site/es/lecciones/visualizacion-y-animacion-de-tablas-historicas-con-R.html,120,'a' tag is missing a reference _site/es/lecciones/visualizacion-y-animacion-de-tablas-historicas-con-R.html,139,'a' tag is missing a reference _site/es/lecciones/visualizacion-y-animacion-de-tablas-historicas-con-R.html,176,'a' tag is missing a reference @@ -2414,11 +1229,6 @@ _site/fr/apropos.html,88,'a' tag is missing a reference _site/fr/apropos.html,107,'a' tag is missing a reference _site/fr/apropos.html,144,'a' tag is missing a reference _site/fr/apropos.html,192,'a' tag is missing a reference -_site/fr/apropos.html,266,http://dhawards.org/dhawards2016/results/ is not an HTTPS link -_site/fr/apropos.html,266,http://dhawards.org/dhawards2017/results/ is not an HTTPS link -_site/fr/apropos.html,266,http://humanidadesdigitaleshispanicas.es/ is not an HTTPS link -_site/fr/apropos.html,266,http://dhawards.org/dhawards2022/results/ is not an HTTPS link -_site/fr/apropos.html,279,http://digitalhistoryhacks.blogspot.com/2008/01/programming-historian.html is not an HTTPS link _site/fr/consignes-auteurs.html,88,'a' tag is missing a reference _site/fr/consignes-auteurs.html,107,'a' tag is missing a reference _site/fr/consignes-auteurs.html,144,'a' tag is missing a reference @@ -2431,7 +1241,6 @@ _site/fr/consignes-redacteurs.html,88,'a' tag is missing a reference _site/fr/consignes-redacteurs.html,107,'a' tag is missing a reference _site/fr/consignes-redacteurs.html,144,'a' tag is missing a reference _site/fr/consignes-redacteurs.html,192,'a' tag is missing a reference -_site/fr/consignes-redacteurs.html,583,http://www.loc.gov/maps/collections is not an HTTPS link _site/fr/consignes-traducteurs.html,88,'a' tag is missing a reference _site/fr/consignes-traducteurs.html,107,'a' tag is missing a reference _site/fr/consignes-traducteurs.html,144,'a' tag is missing a reference @@ -2440,8 +1249,6 @@ _site/fr/contribuer.html,88,'a' tag is missing a reference _site/fr/contribuer.html,107,'a' tag is missing a reference _site/fr/contribuer.html,144,'a' tag is missing a reference _site/fr/contribuer.html,192,'a' tag is missing a reference -_site/fr/contribuer.html,298,http://www.worldcat.org/title/programming-historian/oclc/951537099 is not an HTTPS link -_site/fr/contribuer.html,300,http://purdue-primo-prod.hosted.exlibrisgroup.com/primo_library/libweb/action/dlDisplay.do?vid=PURDUE&search_scope=everything&docId=PURDUE_ALMA51671812890001081&fn=permalink is not an HTTPS link _site/fr/dons.html,88,'a' tag is missing a reference _site/fr/dons.html,107,'a' tag is missing a reference _site/fr/dons.html,144,'a' tag is missing a reference @@ -2450,72 +1257,6 @@ _site/fr/equipe-projet.html,88,'a' tag is missing a reference _site/fr/equipe-projet.html,107,'a' tag is missing a reference _site/fr/equipe-projet.html,144,'a' tag is missing a reference _site/fr/equipe-projet.html,192,'a' tag is missing a reference -_site/fr/equipe-projet.html,310,http://twitter.com/maxcarlons is not an HTTPS link -_site/fr/equipe-projet.html,313,http://github.com/carlonim is not an HTTPS link -_site/fr/equipe-projet.html,414,http://github.com/lachapot is not an HTTPS link -_site/fr/equipe-projet.html,512,http://twitter.com/cosovschi is not an HTTPS link -_site/fr/equipe-projet.html,515,http://github.com/digitalkosovski is not an HTTPS link -_site/fr/equipe-projet.html,618,http://github.com/caiocmello is not an HTTPS link -_site/fr/equipe-projet.html,1182,http://github.com/semanticnoodles is not an HTTPS link -_site/fr/equipe-projet.html,1278,http://twitter.com/nabsiddiqui is not an HTTPS link -_site/fr/equipe-projet.html,1281,http://github.com/nabsiddiqui is not an HTTPS link -_site/fr/equipe-projet.html,1631,http://twitter.com/giulia_taurino is not an HTTPS link -_site/fr/equipe-projet.html,1634,http://github.com/giuliataurino is not an HTTPS link -_site/fr/equipe-projet.html,1804,http://www.alexwermercolan.com/ is not an HTTPS link -_site/fr/equipe-projet.html,1810,http://twitter.com/alexwermercolan is not an HTTPS link -_site/fr/equipe-projet.html,1813,http://github.com/hawc2 is not an HTTPS link -_site/fr/equipe-projet.html,2059,http://www.mariajoseafanador.com is not an HTTPS link -_site/fr/equipe-projet.html,2065,http://twitter.com/mariajoafana is not an HTTPS link -_site/fr/equipe-projet.html,2068,http://github.com/mariajoafana is not an HTTPS link -_site/fr/equipe-projet.html,2534,http://twitter.com/IsaGribomont is not an HTTPS link -_site/fr/equipe-projet.html,2537,http://github.com/isag91 is not an HTTPS link -_site/fr/equipe-projet.html,2745,http://twitter.com/espejolento is not an HTTPS link -_site/fr/equipe-projet.html,2748,http://github.com/silviaegt is not an HTTPS link -_site/fr/equipe-projet.html,3036,http://twitter.com/jenniferisve is not an HTTPS link -_site/fr/equipe-projet.html,3039,http://github.com/jenniferisasi is not an HTTPS link -_site/fr/equipe-projet.html,3361,http://twitter.com/enetreseles is not an HTTPS link -_site/fr/equipe-projet.html,3364,http://github.com/nllano is not an HTTPS link -_site/fr/equipe-projet.html,3568,http://twitter.com/jgob is not an HTTPS link -_site/fr/equipe-projet.html,3571,http://github.com/joshuagob is not an HTTPS link -_site/fr/equipe-projet.html,3863,http://twitter.com/rivaquiroga is not an HTTPS link -_site/fr/equipe-projet.html,3866,http://github.com/rivaquiroga is not an HTTPS link -_site/fr/equipe-projet.html,4157,http://github.com/nivaca is not an HTTPS link -_site/fr/equipe-projet.html,4370,http://github.com/marie-flesch is not an HTTPS link -_site/fr/equipe-projet.html,4513,http://github.com/matgille is not an HTTPS link -_site/fr/equipe-projet.html,4746,http://github.com/mhersent is not an HTTPS link -_site/fr/equipe-projet.html,4804,http://twitter.com/superHH is not an HTTPS link -_site/fr/equipe-projet.html,5056,http://github.com/DMathelier is not an HTTPS link -_site/fr/equipe-projet.html,5190,http://twitter.com/emilienschultz is not an HTTPS link -_site/fr/equipe-projet.html,5193,http://github.com/emilienschultz is not an HTTPS link -_site/fr/equipe-projet.html,5317,http://twitter.com/davvalent is not an HTTPS link -_site/fr/equipe-projet.html,5320,http://github.com/davvalent is not an HTTPS link -_site/fr/equipe-projet.html,5449,http://github.com/AlexandreWa is not an HTTPS link -_site/fr/equipe-projet.html,5584,http://github.com/josircg is not an HTTPS link -_site/fr/equipe-projet.html,5842,http://twitter.com/danielalvesfcsh is not an HTTPS link -_site/fr/equipe-projet.html,5845,http://github.com/DanielAlvesLABDH is not an HTTPS link -_site/fr/equipe-projet.html,6107,http://twitter.com/ericbrasiln is not an HTTPS link -_site/fr/equipe-projet.html,6110,http://github.com/ericbrasiln is not an HTTPS link -_site/fr/equipe-projet.html,6301,http://github.com/luisferla is not an HTTPS link -_site/fr/equipe-projet.html,6543,http://twitter.com/jimmy_medeiros is not an HTTPS link -_site/fr/equipe-projet.html,6546,http://github.com/JimmyMedeiros82 is not an HTTPS link -_site/fr/equipe-projet.html,6781,http://github.com/joanacvp is not an HTTPS link -_site/fr/equipe-projet.html,7027,http://twitter.com/araceletorres is not an HTTPS link -_site/fr/equipe-projet.html,7030,http://github.com/aracele is not an HTTPS link -_site/fr/equipe-projet.html,7286,http://twitter.com/j_w_baker is not an HTTPS link -_site/fr/equipe-projet.html,7289,http://github.com/drjwbaker is not an HTTPS link -_site/fr/equipe-projet.html,7721,http://adamcrymble.org is not an HTTPS link -_site/fr/equipe-projet.html,7727,http://twitter.com/Adam_Crymble is not an HTTPS link -_site/fr/equipe-projet.html,7730,http://github.com/acrymble is not an HTTPS link -_site/fr/equipe-projet.html,8198,http://github.com/adamfarquhar is not an HTTPS link -_site/fr/equipe-projet.html,8258,http://twitter.com/jenniferisve is not an HTTPS link -_site/fr/equipe-projet.html,8261,http://github.com/jenniferisasi is not an HTTPS link -_site/fr/equipe-projet.html,8589,http://twitter.com/rivaquiroga is not an HTTPS link -_site/fr/equipe-projet.html,8592,http://github.com/rivaquiroga is not an HTTPS link -_site/fr/equipe-projet.html,8878,http://twitter.com/amsichani is not an HTTPS link -_site/fr/equipe-projet.html,8881,http://github.com/amsichani is not an HTTPS link -_site/fr/equipe-projet.html,9221,http://twitter.com/AnisaHawes is not an HTTPS link -_site/fr/equipe-projet.html,9224,http://github.com/anisa-hawes is not an HTTPS link -_site/fr/equipe-projet.html,9433,http://github.com/charlottejmc is not an HTTPS link _site/fr/evenements.html,88,'a' tag is missing a reference _site/fr/evenements.html,107,'a' tag is missing a reference _site/fr/evenements.html,144,'a' tag is missing a reference @@ -2532,69 +1273,30 @@ _site/fr/lecons/analyse-corpus-antconc.html,120,'a' tag is missing a reference _site/fr/lecons/analyse-corpus-antconc.html,139,'a' tag is missing a reference _site/fr/lecons/analyse-corpus-antconc.html,176,'a' tag is missing a reference _site/fr/lecons/analyse-corpus-antconc.html,224,'a' tag is missing a reference -_site/fr/lecons/analyse-corpus-antconc.html,578,http://voyant-tools.org/ is not an HTTPS link -_site/fr/lecons/analyse-corpus-antconc.html,582,http://voyant-tools.org/ is not an HTTPS link -_site/fr/lecons/analyse-corpus-antconc.html,582,http://www.laurenceanthony.net/software/antconc/ is not an HTTPS link -_site/fr/lecons/analyse-corpus-antconc.html,582,http://www.laurenceanthony.net/ is not an HTTPS link -_site/fr/lecons/analyse-corpus-antconc.html,582,http://hfroehli.ch/2014/05/11/intro-bibliography-corpus-linguistics/ is not an HTTPS link -_site/fr/lecons/analyse-corpus-antconc.html,589,http://www.laurenceanthony.net/software/antconc/ is not an HTTPS link -_site/fr/lecons/analyse-corpus-antconc.html,590,http://www.laurenceanthony.net/software/antconc/releases/AntConc324/ is not an HTTPS link -_site/fr/lecons/analyse-corpus-antconc.html,620,http://notepad-plus-plus.org/ is not an HTTPS link -_site/fr/lecons/analyse-corpus-antconc.html,620,http://www.barebones.com/products/textwrangler/ is not an HTTPS link -_site/fr/lecons/analyse-corpus-antconc.html,629,http://www.nltk.org/ is not an HTTPS link -_site/fr/lecons/analyse-corpus-antconc.html,631,http://www.amazon.com/Developing-Linguistic-Corpora-Practice-Guides/dp/1842172050/ref=sr_1_1_1 is not an HTTPS link -_site/fr/lecons/analyse-corpus-antconc.html,968,http://www.wordfrequency.info/free.asp is not an HTTPS link -_site/fr/lecons/analyse-corpus-antconc.html,1022,http://www.lexically.net/downloads/version6/HTML/index.html?keyness_definition.htm is not an HTTPS link -_site/fr/lecons/analyse-corpus-antconc.html,1022,http://www.laurenceanthony.net/software/antconc/releases/AntConc335/help.pdf is not an HTTPS link -_site/fr/lecons/analyse-corpus-antconc.html,1087,http://hfroehlich.wordpress.com/2014/05/11/intro-bibliography-corpus-linguistics/ is not an HTTPS link -_site/fr/lecons/analyse-corpus-antconc.html,1088,http://hfroehli.ch/workshops/getting-started-with-antconc/ is not an HTTPS link -_site/fr/lecons/analyse-corpus-antconc.html,1091,http://edutechwiki.unige.ch/fr/AntConc# is not an HTTPS link -_site/fr/lecons/analyse-corpus-antconc.html,1092,http://explorationdecorpus.corpusecrits.huma-num.fr/antconc/ is not an HTTPS link -_site/fr/lecons/analyse-corpus-antconc.html,1093,http://cid.ens-lyon.fr/ac_article.asp?fic=antconc.asp is not an HTTPS link -_site/fr/lecons/analyse-corpus-antconc.html,1095,http://ancilla.unice.fr/ is not an HTTPS link -_site/fr/lecons/analyse-corpus-antconc.html,1095,http://iramuteq.org/ is not an HTTPS link -_site/fr/lecons/analyse-corpus-antconc.html,1095,http://www.lexi-co.com/ is not an HTTPS link -_site/fr/lecons/analyse-corpus-antconc.html,1095,http://textometrie.ens-lyon.fr/?lang=fr is not an HTTPS link -_site/fr/lecons/analyse-corpus-antconc.html,1099,http://lexicometrica.univ-paris3.fr/livre/st94/st94-tdm.html is not an HTTPS link _site/fr/lecons/analyse-de-documents-avec-tfidf.html,135,'a' tag is missing a reference _site/fr/lecons/analyse-de-documents-avec-tfidf.html,154,'a' tag is missing a reference _site/fr/lecons/analyse-de-documents-avec-tfidf.html,191,'a' tag is missing a reference _site/fr/lecons/analyse-de-documents-avec-tfidf.html,239,'a' tag is missing a reference -_site/fr/lecons/analyse-de-documents-avec-tfidf.html,591,http://www.worldcat.org/oclc/1232233436 is not an HTTPS link -_site/fr/lecons/analyse-de-documents-avec-tfidf.html,1678,http://scikit-learn.org/stable/install.html is not an HTTPS link _site/fr/lecons/analyse-donnees-tabulaires-R.html,119,'a' tag is missing a reference _site/fr/lecons/analyse-donnees-tabulaires-R.html,138,'a' tag is missing a reference _site/fr/lecons/analyse-donnees-tabulaires-R.html,175,'a' tag is missing a reference _site/fr/lecons/analyse-donnees-tabulaires-R.html,223,'a' tag is missing a reference -_site/fr/lecons/analyse-donnees-tabulaires-R.html,1099,http://web.archive.org/web/20191015004305/https://www.nceas.ucsb.edu/files/scicomp/Dloads/RProgramming/BestFirstRTutorial.pdf is not an HTTPS link _site/fr/lecons/analyse-reseau-python.html,125,'a' tag is missing a reference _site/fr/lecons/analyse-reseau-python.html,144,'a' tag is missing a reference _site/fr/lecons/analyse-reseau-python.html,181,'a' tag is missing a reference _site/fr/lecons/analyse-reseau-python.html,229,'a' tag is missing a reference -_site/fr/lecons/analyse-reseau-python.html,606,http://www.oxforddnb.com is not an HTTPS link -_site/fr/lecons/analyse-reseau-python.html,1063,http://pandas.pydata.org/ is not an HTTPS link _site/fr/lecons/calibration-radiocarbone-avec-r.html,135,'a' tag is missing a reference _site/fr/lecons/calibration-radiocarbone-avec-r.html,154,'a' tag is missing a reference _site/fr/lecons/calibration-radiocarbone-avec-r.html,191,'a' tag is missing a reference _site/fr/lecons/calibration-radiocarbone-avec-r.html,239,'a' tag is missing a reference -_site/fr/lecons/calibration-radiocarbone-avec-r.html,709,http://calib.org is not an HTTPS link _site/fr/lecons/comprendre-les-expressions-regulieres.html,119,'a' tag is missing a reference _site/fr/lecons/comprendre-les-expressions-regulieres.html,138,'a' tag is missing a reference _site/fr/lecons/comprendre-les-expressions-regulieres.html,175,'a' tag is missing a reference _site/fr/lecons/comprendre-les-expressions-regulieres.html,223,'a' tag is missing a reference -_site/fr/lecons/comprendre-les-expressions-regulieres.html,619,http://www.libreoffice.org/download is not an HTTPS link -_site/fr/lecons/comprendre-les-expressions-regulieres.html,665,http://archive.org/details/jstor-4560629/ is not an HTTPS link -_site/fr/lecons/comprendre-les-expressions-regulieres.html,668,http://archive.org/stream/jstor-4560629/4560629#page/n0/mode/2up is not an HTTPS link -_site/fr/lecons/comprendre-les-expressions-regulieres.html,688,http://archive.org/stream/jstor-4560629/4560629_djvu.txt is not an HTTPS link -_site/fr/lecons/comprendre-les-expressions-regulieres.html,1461,http://rubular.com/ is not an HTTPS link -_site/fr/lecons/comprendre-les-expressions-regulieres.html,1466,http://dh.obdurodon.org/regex.html is not an HTTPS link _site/fr/lecons/comprendre-les-pages-web.html,121,'a' tag is missing a reference _site/fr/lecons/comprendre-les-pages-web.html,140,'a' tag is missing a reference _site/fr/lecons/comprendre-les-pages-web.html,177,'a' tag is missing a reference _site/fr/lecons/comprendre-les-pages-web.html,225,'a' tag is missing a reference -_site/fr/lecons/comprendre-les-pages-web.html,581,http://www.w3schools.com/html/default.asp is not an HTTPS link -_site/fr/lecons/comprendre-les-pages-web.html,656,http://www.w3schools.com/html/default.asp is not an HTTPS link -_site/fr/lecons/comprendre-les-pages-web.html,658,http://www.w3schools.com/html/html5_intro.asp is not an HTTPS link _site/fr/lecons/concevoir-base-donnees-nodegoat.html,117,'a' tag is missing a reference _site/fr/lecons/concevoir-base-donnees-nodegoat.html,136,'a' tag is missing a reference _site/fr/lecons/concevoir-base-donnees-nodegoat.html,173,'a' tag is missing a reference @@ -2603,8 +1305,6 @@ _site/fr/lecons/debuter-avec-markdown.html,119,'a' tag is missing a reference _site/fr/lecons/debuter-avec-markdown.html,138,'a' tag is missing a reference _site/fr/lecons/debuter-avec-markdown.html,175,'a' tag is missing a reference _site/fr/lecons/debuter-avec-markdown.html,223,'a' tag is missing a reference -_site/fr/lecons/debuter-avec-markdown.html,571,http://daringfireball.net/projects/markdown/ is not an HTTPS link -_site/fr/lecons/debuter-avec-markdown.html,575,http://github.com is not an HTTPS link _site/fr/lecons/debuter-avec-markdown.html,772,http://programminghistorian.org/ is not an HTTPS link _site/fr/lecons/decomptes-de-frequences-de-mots-en-python.html,121,'a' tag is missing a reference _site/fr/lecons/decomptes-de-frequences-de-mots-en-python.html,140,'a' tag is missing a reference @@ -2618,16 +1318,6 @@ _site/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.html,121,'a' tag _site/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.html,140,'a' tag is missing a reference _site/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.html,177,'a' tag is missing a reference _site/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.html,225,'a' tag is missing a reference -_site/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.html,582,http://www.ccs.neu.edu/home/dasmith/ is not an HTTPS link -_site/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.html,745,http://spark.apache.org/downloads is not an HTTPS link -_site/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.html,953,http://jsonlines.org/ is not an HTTPS link -_site/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.html,1529,http://ceur-ws.org/Vol-2253/paper22.pdf is not an HTTPS link -_site/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.html,1530,http://dx.doi.org/10.1093/alh/ajv029 is not an HTTPS link -_site/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.html,1531,http://dx.doi.org/10.1093/alh/ajv028 is not an HTTPS link -_site/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.html,1532,http://dx.doi.org/10.1080/1461670x.2020.1761865 is not an HTTPS link -_site/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.html,1534,http://dx.doi.org/10.1007/978-3-319-12655-5_11 is not an HTTPS link -_site/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.html,1535,http://dx.doi.org/10.22148/16.034 is not an HTTPS link -_site/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.html,1538,http://dx.doi.org/10.1145/2682571.2797068 is not an HTTPS link _site/fr/lecons/du-html-a-une-liste-de-mots-1.html,121,'a' tag is missing a reference _site/fr/lecons/du-html-a-une-liste-de-mots-1.html,140,'a' tag is missing a reference _site/fr/lecons/du-html-a-une-liste-de-mots-1.html,177,'a' tag is missing a reference @@ -2640,13 +1330,10 @@ _site/fr/lecons/generer-jeu-donnees-texte-ocr.html,119,'a' tag is missing a refe _site/fr/lecons/generer-jeu-donnees-texte-ocr.html,138,'a' tag is missing a reference _site/fr/lecons/generer-jeu-donnees-texte-ocr.html,175,'a' tag is missing a reference _site/fr/lecons/generer-jeu-donnees-texte-ocr.html,223,'a' tag is missing a reference -_site/fr/lecons/generer-jeu-donnees-texte-ocr.html,567,http://www.worldcat.org/oclc/17591390 is not an HTTPS link -_site/fr/lecons/generer-jeu-donnees-texte-ocr.html,1631,http://brat.nlplab.org/ is not an HTTPS link _site/fr/lecons/gestion-manipulation-donnees-r.html,119,'a' tag is missing a reference _site/fr/lecons/gestion-manipulation-donnees-r.html,138,'a' tag is missing a reference _site/fr/lecons/gestion-manipulation-donnees-r.html,175,'a' tag is missing a reference _site/fr/lecons/gestion-manipulation-donnees-r.html,223,'a' tag is missing a reference -_site/fr/lecons/gestion-manipulation-donnees-r.html,584,http://tidyverse.org/ is not an HTTPS link _site/fr/lecons/index.html,88,'a' tag is missing a reference _site/fr/lecons/index.html,107,'a' tag is missing a reference _site/fr/lecons/index.html,144,'a' tag is missing a reference @@ -2659,9 +1346,6 @@ _site/fr/lecons/installation-windows-py.html,121,'a' tag is missing a reference _site/fr/lecons/installation-windows-py.html,140,'a' tag is missing a reference _site/fr/lecons/installation-windows-py.html,177,'a' tag is missing a reference _site/fr/lecons/installation-windows-py.html,225,'a' tag is missing a reference -_site/fr/lecons/installation-windows-py.html,548,http://www.python.org/ is not an HTTPS link -_site/fr/lecons/installation-windows-py.html,556,http://wiki.python.org/moin/PythonEditors/ is not an HTTPS link -_site/fr/lecons/installation-windows-py.html,608,http://en.wikipedia.org/wiki/UTF-8 is not an HTTPS link _site/fr/lecons/installer-ide-python-linux.html,121,'a' tag is missing a reference _site/fr/lecons/installer-ide-python-linux.html,140,'a' tag is missing a reference _site/fr/lecons/installer-ide-python-linux.html,177,'a' tag is missing a reference @@ -2670,28 +1354,10 @@ _site/fr/lecons/intro-a-bash-et-zsh.html,121,'a' tag is missing a reference _site/fr/lecons/intro-a-bash-et-zsh.html,140,'a' tag is missing a reference _site/fr/lecons/intro-a-bash-et-zsh.html,177,'a' tag is missing a reference _site/fr/lecons/intro-a-bash-et-zsh.html,225,'a' tag is missing a reference -_site/fr/lecons/intro-a-bash-et-zsh.html,817,http://www.gutenberg.org/ebooks/2600 is not an HTTPS link _site/fr/lecons/intro-aux-bots-twitter.html,119,'a' tag is missing a reference _site/fr/lecons/intro-aux-bots-twitter.html,138,'a' tag is missing a reference _site/fr/lecons/intro-aux-bots-twitter.html,175,'a' tag is missing a reference _site/fr/lecons/intro-aux-bots-twitter.html,223,'a' tag is missing a reference -_site/fr/lecons/intro-aux-bots-twitter.html,561,http://tracery.io is not an HTTPS link -_site/fr/lecons/intro-aux-bots-twitter.html,561,http://cheapbotsdonequick.com/ is not an HTTPS link -_site/fr/lecons/intro-aux-bots-twitter.html,564,http://www.sciencedirect.com/science/article/pii/S0747563213003129 is not an HTTPS link -_site/fr/lecons/intro-aux-bots-twitter.html,564,http://www.docnow.io/ is not an HTTPS link -_site/fr/lecons/intro-aux-bots-twitter.html,651,http://json.org/ is not an HTTPS link -_site/fr/lecons/intro-aux-bots-twitter.html,653,http://tracery.io/ is not an HTTPS link -_site/fr/lecons/intro-aux-bots-twitter.html,655,http://twitter.com/tinyarchae is not an HTTPS link -_site/fr/lecons/intro-aux-bots-twitter.html,655,http://web.archive.org/web/20180131161516/https://publicarchaeologyconference.wordpress.com/ is not an HTTPS link -_site/fr/lecons/intro-aux-bots-twitter.html,665,http://tracery.io is not an HTTPS link -_site/fr/lecons/intro-aux-bots-twitter.html,838,http://cheapbotsdonequick.com/ is not an HTTPS link -_site/fr/lecons/intro-aux-bots-twitter.html,888,http://tinysubversions.com/2013/03/basic-twitter-bot-etiquette/ is not an HTTPS link -_site/fr/lecons/intro-aux-bots-twitter.html,907,http://unicode.org/emoji/charts/full-emoji-list.html is not an HTTPS link -_site/fr/lecons/intro-aux-bots-twitter.html,930,http://www.crystalcodepalace.com/traceryTut.html is not an HTTPS link -_site/fr/lecons/intro-aux-bots-twitter.html,946,http://cheapbotsdonequick.com/ is not an HTTPS link -_site/fr/lecons/intro-aux-bots-twitter.html,986,http://cheapbotsdonequick.com/source/softlandscapes is not an HTTPS link -_site/fr/lecons/intro-aux-bots-twitter.html,1006,http://www.codingblocks.net/videos/generating-music-in-javascript/ is not an HTTPS link -_site/fr/lecons/intro-aux-bots-twitter.html,1012,http://www.zachwhalen.net/posts/how-to-make-a-twitter-bot-with-google-spreadsheets-version-04/ is not an HTTPS link _site/fr/lecons/introduction-a-heurist.html,117,'a' tag is missing a reference _site/fr/lecons/introduction-a-heurist.html,136,'a' tag is missing a reference _site/fr/lecons/introduction-a-heurist.html,173,'a' tag is missing a reference @@ -2702,10 +1368,6 @@ _site/fr/lecons/introduction-a-la-stylometrie-avec-python.html,119,'a' tag is mi _site/fr/lecons/introduction-a-la-stylometrie-avec-python.html,138,'a' tag is missing a reference _site/fr/lecons/introduction-a-la-stylometrie-avec-python.html,175,'a' tag is missing a reference _site/fr/lecons/introduction-a-la-stylometrie-avec-python.html,223,'a' tag is missing a reference -_site/fr/lecons/introduction-a-la-stylometrie-avec-python.html,609,http://www.gutenberg.org/cache/epub/1404/pg1404.txt is not an HTTPS link -_site/fr/lecons/introduction-a-la-stylometrie-avec-python.html,625,http://www.nltk.org/book/ is not an HTTPS link -_site/fr/lecons/introduction-a-la-stylometrie-avec-python.html,629,http://www.nltk.org/book/ is not an HTTPS link -_site/fr/lecons/introduction-a-la-stylometrie-avec-python.html,982,http://www.cis.uni-muenchen.de/~schmid/tools/TreeTagger/ is not an HTTPS link _site/fr/lecons/introduction-aux-carnets-jupyter-notebooks.html,123,'a' tag is missing a reference _site/fr/lecons/introduction-aux-carnets-jupyter-notebooks.html,142,'a' tag is missing a reference _site/fr/lecons/introduction-aux-carnets-jupyter-notebooks.html,179,'a' tag is missing a reference @@ -2716,24 +1378,14 @@ _site/fr/lecons/introduction-et-installation.html,121,'a' tag is missing a refer _site/fr/lecons/introduction-et-installation.html,140,'a' tag is missing a reference _site/fr/lecons/introduction-et-installation.html,177,'a' tag is missing a reference _site/fr/lecons/introduction-et-installation.html,225,'a' tag is missing a reference -_site/fr/lecons/introduction-et-installation.html,544,http://www.python.org/ is not an HTTPS link -_site/fr/lecons/introduction-et-installation.html,544,http://www.crummy.com/software/BeautifulSoup/ is not an HTTPS link -_site/fr/lecons/introduction-et-installation.html,544,http://www.activestate.com/komodo-edit is not an HTTPS link -_site/fr/lecons/introduction-et-installation.html,544,http://wiki.python.org/moin/PythonEditors/ is not an HTTPS link -_site/fr/lecons/introduction-et-installation.html,547,http://sebastianraschka.com/Articles/2014_python_2_3_key_diff.html is not an HTTPS link _site/fr/lecons/manipuler-chaines-caracteres-python.html,121,'a' tag is missing a reference _site/fr/lecons/manipuler-chaines-caracteres-python.html,140,'a' tag is missing a reference _site/fr/lecons/manipuler-chaines-caracteres-python.html,177,'a' tag is missing a reference _site/fr/lecons/manipuler-chaines-caracteres-python.html,225,'a' tag is missing a reference -_site/fr/lecons/manipuler-chaines-caracteres-python.html,713,http://www.worldcat.org/oclc/1061273329 is not an HTTPS link _site/fr/lecons/nettoyer-ses-donnees-avec-openrefine.html,123,'a' tag is missing a reference _site/fr/lecons/nettoyer-ses-donnees-avec-openrefine.html,142,'a' tag is missing a reference _site/fr/lecons/nettoyer-ses-donnees-avec-openrefine.html,179,'a' tag is missing a reference _site/fr/lecons/nettoyer-ses-donnees-avec-openrefine.html,227,'a' tag is missing a reference -_site/fr/lecons/nettoyer-ses-donnees-avec-openrefine.html,568,http://openrefine.org is not an HTTPS link -_site/fr/lecons/nettoyer-ses-donnees-avec-openrefine.html,587,http://vis.stanford.edu/papers/wrangler/ is not an HTTPS link -_site/fr/lecons/nettoyer-ses-donnees-avec-openrefine.html,587,http://openrefine.org is not an HTTPS link -_site/fr/lecons/nettoyer-ses-donnees-avec-openrefine.html,589,http://www.loc.gov/index.html is not an HTTPS link _site/fr/lecons/normaliser-donnees-textuelles-python.html,121,'a' tag is missing a reference _site/fr/lecons/normaliser-donnees-textuelles-python.html,140,'a' tag is missing a reference _site/fr/lecons/normaliser-donnees-textuelles-python.html,177,'a' tag is missing a reference @@ -2746,33 +1398,10 @@ _site/fr/lecons/preserver-ses-donnees-de-recherche.html,119,'a' tag is missing a _site/fr/lecons/preserver-ses-donnees-de-recherche.html,138,'a' tag is missing a reference _site/fr/lecons/preserver-ses-donnees-de-recherche.html,175,'a' tag is missing a reference _site/fr/lecons/preserver-ses-donnees-de-recherche.html,223,'a' tag is missing a reference -_site/fr/lecons/preserver-ses-donnees-de-recherche.html,608,http://notepad-plus-plus.org is not an HTTPS link -_site/fr/lecons/preserver-ses-donnees-de-recherche.html,639,http://cradledincaricature.com/2014/02/06/comic-art-beyond-the-print-shop/ is not an HTTPS link -_site/fr/lecons/preserver-ses-donnees-de-recherche.html,646,http://www.theguardian.com/uk-news/2014/feb/20/rebekah-brooks-rupert-murdoch-phone-hacking-trial is not an HTTPS link -_site/fr/lecons/preserver-ses-donnees-de-recherche.html,653,http://www.cartoons.ac.uk/record/SBD0931 is not an HTTPS link -_site/fr/lecons/preserver-ses-donnees-de-recherche.html,660,http://www.oldbaileyonline.org/browse.jsp?ref=OA16780417 is not an HTTPS link -_site/fr/lecons/preserver-ses-donnees-de-recherche.html,766,http://historyonics.blogspot.co.uk/2014/01/judging-book-by-its-url.html is not an HTTPS link -_site/fr/lecons/preserver-ses-donnees-de-recherche.html,770,http://earlymodernnotes.wordpress.com/2013/05/18/unclean-unclean-what-historians-can-do-about-sharing-our-messy-research-data/ is not an HTTPS link -_site/fr/lecons/preserver-ses-donnees-de-recherche.html,783,http://britishlibrary.typepad.co.uk/collectioncare/2013/09/the-twelve-principles-of-digital-preservation.html is not an HTTPS link -_site/fr/lecons/preserver-ses-donnees-de-recherche.html,792,http://data-archive.ac.uk/create-manage/document is not an HTTPS link _site/fr/lecons/redaction-durable-avec-pandoc-et-markdown.html,121,'a' tag is missing a reference _site/fr/lecons/redaction-durable-avec-pandoc-et-markdown.html,140,'a' tag is missing a reference _site/fr/lecons/redaction-durable-avec-pandoc-et-markdown.html,177,'a' tag is missing a reference _site/fr/lecons/redaction-durable-avec-pandoc-et-markdown.html,225,'a' tag is missing a reference -_site/fr/lecons/redaction-durable-avec-pandoc-et-markdown.html,578,http://notepad-plus-plus.org is not an HTTPS link -_site/fr/lecons/redaction-durable-avec-pandoc-et-markdown.html,858,http://editor.citationstyles.org/about/ is not an HTTPS link -_site/fr/lecons/redaction-durable-avec-pandoc-et-markdown.html,894,http://stackoverflow.com/questions/tagged/pandoc is not an HTTPS link -_site/fr/lecons/redaction-durable-avec-pandoc-et-markdown.html,896,http://web.archive.org/web/20140120195538/http://mashable.com/2013/06/24/markdown-tools/ is not an HTTPS link -_site/fr/lecons/redaction-durable-avec-pandoc-et-markdown.html,896,http://mouapp.com/ is not an HTTPS link -_site/fr/lecons/redaction-durable-avec-pandoc-et-markdown.html,896,http://writemonkey.com is not an HTTPS link -_site/fr/lecons/redaction-durable-avec-pandoc-et-markdown.html,896,http://www.sublimetext.com/ is not an HTTPS link -_site/fr/lecons/redaction-durable-avec-pandoc-et-markdown.html,896,http://prose.io is not an HTTPS link -_site/fr/lecons/redaction-durable-avec-pandoc-et-markdown.html,896,http://www.authorea.com is not an HTTPS link -_site/fr/lecons/redaction-durable-avec-pandoc-et-markdown.html,896,http://www.draftin.com is not an HTTPS link -_site/fr/lecons/redaction-durable-avec-pandoc-et-markdown.html,898,http://github.com/fauno/jekyll-pandoc-multiple-formats is not an HTTPS link -_site/fr/lecons/redaction-durable-avec-pandoc-et-markdown.html,898,http://jaspervdj.be/hakyll/ is not an HTTPS link -_site/fr/lecons/redaction-durable-avec-pandoc-et-markdown.html,900,http://readthedocs.org is not an HTTPS link -_site/fr/lecons/redaction-durable-avec-pandoc-et-markdown.html,921,http://www.antipope.org/charlie/blog-static/2013/10/why-microsoft-word-must-die.html is not an HTTPS link _site/fr/lecons/reutilisation-de-code-et-modularite.html,121,'a' tag is missing a reference _site/fr/lecons/reutilisation-de-code-et-modularite.html,140,'a' tag is missing a reference _site/fr/lecons/reutilisation-de-code-et-modularite.html,177,'a' tag is missing a reference @@ -2781,14 +1410,10 @@ _site/fr/lecons/telecharger-des-pages-web-avec-python.html,121,'a' tag is missin _site/fr/lecons/telecharger-des-pages-web-avec-python.html,140,'a' tag is missing a reference _site/fr/lecons/telecharger-des-pages-web-avec-python.html,177,'a' tag is missing a reference _site/fr/lecons/telecharger-des-pages-web-avec-python.html,225,'a' tag is missing a reference -_site/fr/lecons/telecharger-des-pages-web-avec-python.html,566,http://www.oldbaileyonline.org/ is not an HTTPS link -_site/fr/lecons/telecharger-des-pages-web-avec-python.html,630,http://www.oldbaileyonline.org/browse.jsp?foo=bar&path=sessionsPapers/17800628.xml&div=t17800628-33&xml=yes is not an HTTPS link -_site/fr/lecons/telecharger-des-pages-web-avec-python.html,630,http://www.oldbaileyonline.org/images.jsp?doc=178006280084 is not an HTTPS link _site/fr/lecons/transcription-automatisee-graphies-non-latines.html,133,'a' tag is missing a reference _site/fr/lecons/transcription-automatisee-graphies-non-latines.html,152,'a' tag is missing a reference _site/fr/lecons/transcription-automatisee-graphies-non-latines.html,189,'a' tag is missing a reference _site/fr/lecons/transcription-automatisee-graphies-non-latines.html,237,'a' tag is missing a reference -_site/fr/lecons/transcription-automatisee-graphies-non-latines.html,2250,http://doi.org/10.30687/arm/9372-8175/2022/01/005 is not an HTTPS link _site/fr/lecons/travailler-avec-des-fichiers-texte.html,121,'a' tag is missing a reference _site/fr/lecons/travailler-avec-des-fichiers-texte.html,140,'a' tag is missing a reference _site/fr/lecons/travailler-avec-des-fichiers-texte.html,177,'a' tag is missing a reference @@ -2797,7 +1422,6 @@ _site/fr/nos-soutiens.html,88,'a' tag is missing a reference _site/fr/nos-soutiens.html,107,'a' tag is missing a reference _site/fr/nos-soutiens.html,144,'a' tag is missing a reference _site/fr/nos-soutiens.html,192,'a' tag is missing a reference -_site/fr/nos-soutiens.html,275,http://cdrh.unl.edu/ is not an HTTPS link _site/fr/pi.html,88,'a' tag is missing a reference _site/fr/pi.html,107,'a' tag is missing a reference _site/fr/pi.html,144,'a' tag is missing a reference @@ -2822,22 +1446,6 @@ _site/fr/recherche.html,88,'a' tag is missing a reference _site/fr/recherche.html,107,'a' tag is missing a reference _site/fr/recherche.html,144,'a' tag is missing a reference _site/fr/recherche.html,192,'a' tag is missing a reference -_site/fr/recherche.html,255,http://niche-canada.org/wp-content/uploads/2013/09/programming-historian-1.pdf is not an HTTPS link -_site/fr/recherche.html,262,http://www.diva-portal.org/smash/record.jsf?pid=diva2%3A1508542&dswid=7551 is not an HTTPS link -_site/fr/recherche.html,267,http://jitp.commons.gc.cuny.edu/review-of-the-programming-historian/ is not an HTTPS link -_site/fr/recherche.html,279,http://www.digitalhumanities.org/dhq/vol/16/2/000585/000585.html is not an HTTPS link -_site/fr/recherche.html,285,http://www.tandfonline.com/doi/full/10.1080/13555502.2017.1301179 is not an HTTPS link -_site/fr/recherche.html,287,http://web.archive.org/web/20180713014622/http://dhcommons.org/journal/issue-1/editorial-sustainability-and-open-peer-review-programming-historian is not an HTTPS link -_site/fr/recherche.html,288,http://www.themacroscope.org/2.0/ is not an HTTPS link -_site/fr/recherche.html,296,http://doi.org/10.5281/zenodo.3813763 is not an HTTPS link -_site/fr/recherche.html,309,http://www.iea.usp.br/eventos/historia-digital-educacao-caminhos-cruzados is not an HTTPS link -_site/fr/recherche.html,396,http://ixa2.si.ehu.eus/intele/?q=webinars is not an HTTPS link -_site/fr/recherche.html,399,http://walshbr.com/blog/the-programming-historian-and-editorial-process-in-digital-publishing/ is not an HTTPS link -_site/fr/recherche.html,412,http://fredgibbs.net/assets/images/ph-poster/final-board.png is not an HTTPS link -_site/fr/recherche.html,425,http://niche-canada.org/2018/03/23/a-decade-of-programming-historians/ is not an HTTPS link -_site/fr/recherche.html,426,http://fredgibbs.net/posts/reflections-former-PH-editor is not an HTTPS link -_site/fr/recherche.html,427,http://clionauta.hypotheses.org/16979 is not an HTTPS link -_site/fr/recherche.html,429,http://humanidadesdigitales.net/blog/2017/03/17/the-programming-historian-en-espanol/ is not an HTTPS link _site/index.html,87,'a' tag is missing a reference _site/index.html,106,'a' tag is missing a reference _site/index.html,143,'a' tag is missing a reference @@ -2851,14 +1459,10 @@ _site/posts/DH-Award-2017.html,106,'a' tag is missing a reference _site/posts/DH-Award-2017.html,125,'a' tag is missing a reference _site/posts/DH-Award-2017.html,162,'a' tag is missing a reference _site/posts/DH-Award-2017.html,188,'a' tag is missing a reference -_site/posts/DH-Award-2017.html,281,http://dhawards.org/dhawards2017/results/ is not an HTTPS link _site/posts/FR-team.html,107,'a' tag is missing a reference _site/posts/FR-team.html,126,'a' tag is missing a reference _site/posts/FR-team.html,163,'a' tag is missing a reference _site/posts/FR-team.html,189,'a' tag is missing a reference -_site/posts/FR-team.html,285,http://www.parthenos-project.eu/ is not an HTTPS link -_site/posts/FR-team.html,285,http://www.iperionch.eu/ is not an HTTPS link -_site/posts/FR-team.html,289,http://www.humanisti.ca/ is not an HTTPS link _site/posts/Open-Education-Week.html,107,'a' tag is missing a reference _site/posts/Open-Education-Week.html,126,'a' tag is missing a reference _site/posts/Open-Education-Week.html,163,'a' tag is missing a reference @@ -2871,10 +1475,6 @@ _site/posts/PH-commitment-to-diversity.html,107,'a' tag is missing a reference _site/posts/PH-commitment-to-diversity.html,126,'a' tag is missing a reference _site/posts/PH-commitment-to-diversity.html,163,'a' tag is missing a reference _site/posts/PH-commitment-to-diversity.html,189,'a' tag is missing a reference -_site/posts/PH-commitment-to-diversity.html,283,http://www.aauw.org/research/why-so-few/ is not an HTTPS link -_site/posts/PH-commitment-to-diversity.html,371,http://web.archive.org/web/20160507170231/http://www.usnews.com/news/blogs/data-mine/2016/02/18/study-shows-women-are-better-coders-but-only-when-gender-is-hidden is not an HTTPS link -_site/posts/PH-commitment-to-diversity.html,377,http://www.pyladies.com/ is not an HTTPS link -_site/posts/PH-commitment-to-diversity.html,377,http://femtechnet.org/ is not an HTTPS link _site/posts/PH-contributors.html,107,'a' tag is missing a reference _site/posts/PH-contributors.html,126,'a' tag is missing a reference _site/posts/PH-contributors.html,163,'a' tag is missing a reference @@ -2887,26 +1487,6 @@ _site/posts/Uses-Of-The-Programming-Historian.html,105,'a' tag is missing a refe _site/posts/Uses-Of-The-Programming-Historian.html,124,'a' tag is missing a reference _site/posts/Uses-Of-The-Programming-Historian.html,161,'a' tag is missing a reference _site/posts/Uses-Of-The-Programming-Historian.html,187,'a' tag is missing a reference -_site/posts/Uses-Of-The-Programming-Historian.html,291,http://discontents.com.au/unremembering-the-forgotten is not an HTTPS link -_site/posts/Uses-Of-The-Programming-Historian.html,293,http://lj.libraryjournal.com/2014/09/opinion/not-dead-yet/connecting-researchers-to-new-digital-tools-not-dead-yet/#_ is not an HTTPS link -_site/posts/Uses-Of-The-Programming-Historian.html,294,http://muse.jhu.edu/login?auth=0&type=summary&url=/journals/ieee_annals_of_the_history_of_computing/v036/36.2.turkel.html is not an HTTPS link -_site/posts/Uses-Of-The-Programming-Historian.html,295,http://journalofdigitalhumanities.org/2-1/dh-contribution-to-topic-modeling/ is not an HTTPS link -_site/posts/Uses-Of-The-Programming-Historian.html,296,http://tedunderwood.com/2012/12/14/what-can-topic-models-of-pmla-teach-us-about-the-history-of-literary-scholarship/ is not an HTTPS link -_site/posts/Uses-Of-The-Programming-Historian.html,297,http://aisel.aisnet.org/cgi/viewcontent.cgi?article=1072&context=amcis2009 is not an HTTPS link -_site/posts/Uses-Of-The-Programming-Historian.html,307,http://grad.craftingdigitalhistory.ca/weekly.html is not an HTTPS link -_site/posts/Uses-Of-The-Programming-Historian.html,317,http://adamcrymble.org/intro-to-digital-history-2015/ is not an HTTPS link -_site/posts/Uses-Of-The-Programming-Historian.html,318,http://web.archive.org/web/20150905233647/https://library.uoregon.edu/node/4570 is not an HTTPS link -_site/posts/Uses-Of-The-Programming-Historian.html,325,http://lincolnmullen.com/files/clio3.syllabus.hist698.2014f.pdf is not an HTTPS link -_site/posts/Uses-Of-The-Programming-Historian.html,326,http://www.wilkohardenberg.net/content/Hardenberg_DigitalHistory_Hist795.pdf is not an HTTPS link -_site/posts/Uses-Of-The-Programming-Historian.html,327,http://www.christophermchurch.com/draft-for-new-course-digital-toolbox-for-historians-unr/ is not an HTTPS link -_site/posts/Uses-Of-The-Programming-Historian.html,330,http://intro-dh-2014.andyschocket.net/syllabus/ is not an HTTPS link -_site/posts/Uses-Of-The-Programming-Historian.html,331,http://dh2014.umwblogs.org/syllabus/ is not an HTTPS link -_site/posts/Uses-Of-The-Programming-Historian.html,332,http://devdh.org/files/downloads/Guiliano_Digital_History_Syllabus_Fall2014_IUPUI.pdf is not an HTTPS link -_site/posts/Uses-Of-The-Programming-Historian.html,341,http://web.archive.org/web/20180127231436/http://www.english.upenn.edu/~jenglish/Courses/Fall2014/505Syllabus.pdf is not an HTTPS link -_site/posts/Uses-Of-The-Programming-Historian.html,342,http://digitalhumanities.rice.edu/fall-2013-syllabus/ is not an HTTPS link -_site/posts/Uses-Of-The-Programming-Historian.html,344,http://dh.chadblack.net/info/syllabus/ is not an HTTPS link -_site/posts/Uses-Of-The-Programming-Historian.html,351,http://www.scottbot.net/HIAL/wp-content/uploads/2012/09/Wilkens_DH_Syllabus_Init.pdf is not an HTTPS link -_site/posts/Uses-Of-The-Programming-Historian.html,375,http://prosop.org is not an HTTPS link _site/posts/ad-hoc-translation.html,105,'a' tag is missing a reference _site/posts/ad-hoc-translation.html,124,'a' tag is missing a reference _site/posts/ad-hoc-translation.html,161,'a' tag is missing a reference @@ -2915,11 +1495,6 @@ _site/posts/adding-to-library-catalogue.html,105,'a' tag is missing a reference _site/posts/adding-to-library-catalogue.html,124,'a' tag is missing a reference _site/posts/adding-to-library-catalogue.html,161,'a' tag is missing a reference _site/posts/adding-to-library-catalogue.html,187,'a' tag is missing a reference -_site/posts/adding-to-library-catalogue.html,279,http://purdue-primo-prod.hosted.exlibrisgroup.com/PURDUE:everything:PURDUE_ALMA51671812890001081 is not an HTTPS link -_site/posts/adding-to-library-catalogue.html,279,http://www.worldcat.org/oclc/951537099 is not an HTTPS link -_site/posts/adding-to-library-catalogue.html,281,http://www.twitter.com/Literature_Geek is not an HTTPS link -_site/posts/adding-to-library-catalogue.html,287,http://www.worldcat.org/oclc/951537099 is not an HTTPS link -_site/posts/adding-to-library-catalogue.html,287,http://purdue-primo-prod.hosted.exlibrisgroup.com/PURDUE:everything:PURDUE_ALMA51671812890001081 is not an HTTPS link _site/posts/anisa-welcome.html,105,'a' tag is missing a reference _site/posts/anisa-welcome.html,124,'a' tag is missing a reference _site/posts/anisa-welcome.html,161,'a' tag is missing a reference @@ -2928,12 +1503,10 @@ _site/posts/anna-maria-sichani.html,108,'a' tag is missing a reference _site/posts/anna-maria-sichani.html,127,'a' tag is missing a reference _site/posts/anna-maria-sichani.html,164,'a' tag is missing a reference _site/posts/anna-maria-sichani.html,190,'a' tag is missing a reference -_site/posts/anna-maria-sichani.html,285,http://adho.org/ is not an HTTPS link _site/posts/announcing-new-team-spanish-language-editors.html,107,'a' tag is missing a reference _site/posts/announcing-new-team-spanish-language-editors.html,126,'a' tag is missing a reference _site/posts/announcing-new-team-spanish-language-editors.html,163,'a' tag is missing a reference _site/posts/announcing-new-team-spanish-language-editors.html,189,'a' tag is missing a reference -_site/posts/announcing-new-team-spanish-language-editors.html,291,http://eadh.org/ is not an HTTPS link _site/posts/appel-a-propositions.html,107,'a' tag is missing a reference _site/posts/appel-a-propositions.html,126,'a' tag is missing a reference _site/posts/appel-a-propositions.html,163,'a' tag is missing a reference @@ -2950,9 +1523,6 @@ _site/posts/bienvenue-ph-fr.html,105,'a' tag is missing a reference _site/posts/bienvenue-ph-fr.html,124,'a' tag is missing a reference _site/posts/bienvenue-ph-fr.html,161,'a' tag is missing a reference _site/posts/bienvenue-ph-fr.html,187,'a' tag is missing a reference -_site/posts/bienvenue-ph-fr.html,279,http://programminghistorian.org/fr is not an HTTPS link -_site/posts/bienvenue-ph-fr.html,285,http://dsharp.library.cmu.edu/ is not an HTTPS link -_site/posts/bienvenue-ph-fr.html,289,http://archives.mundaneum.org/fr/versions-digitalisees/schema-de-paul-otlet-documentation-et-telecommunication is not an HTTPS link _site/posts/bogota-workshop-report.html,107,'a' tag is missing a reference _site/posts/bogota-workshop-report.html,126,'a' tag is missing a reference _site/posts/bogota-workshop-report.html,163,'a' tag is missing a reference @@ -2961,8 +1531,6 @@ _site/posts/bolentin-informativo.html,105,'a' tag is missing a reference _site/posts/bolentin-informativo.html,124,'a' tag is missing a reference _site/posts/bolentin-informativo.html,161,'a' tag is missing a reference _site/posts/bolentin-informativo.html,187,'a' tag is missing a reference -_site/posts/bolentin-informativo.html,318,http://ach2019.ach.org is not an HTTPS link -_site/posts/bolentin-informativo.html,326,http://humanidadesdigitaleshispanicas.es/resolucion-convocatoria-i-edicion-premios-hdh/ is not an HTTPS link _site/posts/boletin-informativo-junio20.html,107,'a' tag is missing a reference _site/posts/boletin-informativo-junio20.html,126,'a' tag is missing a reference _site/posts/boletin-informativo-junio20.html,163,'a' tag is missing a reference @@ -2983,7 +1551,6 @@ _site/posts/buletin-de-information.html,105,'a' tag is missing a reference _site/posts/buletin-de-information.html,124,'a' tag is missing a reference _site/posts/buletin-de-information.html,161,'a' tag is missing a reference _site/posts/buletin-de-information.html,187,'a' tag is missing a reference -_site/posts/buletin-de-information.html,315,http://ach2019.ach.org is not an HTTPS link _site/posts/bulletin-de-information-juin20.html,107,'a' tag is missing a reference _site/posts/bulletin-de-information-juin20.html,126,'a' tag is missing a reference _site/posts/bulletin-de-information-juin20.html,163,'a' tag is missing a reference @@ -3008,7 +1575,6 @@ _site/posts/bulletin-issue-01.html,105,'a' tag is missing a reference _site/posts/bulletin-issue-01.html,124,'a' tag is missing a reference _site/posts/bulletin-issue-01.html,161,'a' tag is missing a reference _site/posts/bulletin-issue-01.html,187,'a' tag is missing a reference -_site/posts/bulletin-issue-01.html,287,http://dhawards.org/dhawards2022/results/ is not an HTTPS link _site/posts/bulletin-issue-02.html,105,'a' tag is missing a reference _site/posts/bulletin-issue-02.html,124,'a' tag is missing a reference _site/posts/bulletin-issue-02.html,161,'a' tag is missing a reference @@ -3017,12 +1583,10 @@ _site/posts/bulletin-issue-03.html,105,'a' tag is missing a reference _site/posts/bulletin-issue-03.html,124,'a' tag is missing a reference _site/posts/bulletin-issue-03.html,161,'a' tag is missing a reference _site/posts/bulletin-issue-03.html,187,'a' tag is missing a reference -_site/posts/bulletin-issue-03.html,347,http://tinyurl.com/PH-patreon is not an HTTPS link _site/posts/bulletin-issue-04.html,105,'a' tag is missing a reference _site/posts/bulletin-issue-04.html,124,'a' tag is missing a reference _site/posts/bulletin-issue-04.html,161,'a' tag is missing a reference _site/posts/bulletin-issue-04.html,187,'a' tag is missing a reference -_site/posts/bulletin-issue-04.html,325,http://dhawards.org/dhawards2022/results/ is not an HTTPS link _site/posts/bulletin-issue-05.html,105,'a' tag is missing a reference _site/posts/bulletin-issue-05.html,124,'a' tag is missing a reference _site/posts/bulletin-issue-05.html,161,'a' tag is missing a reference @@ -3031,17 +1595,14 @@ _site/posts/bulletin-issue-06.html,105,'a' tag is missing a reference _site/posts/bulletin-issue-06.html,124,'a' tag is missing a reference _site/posts/bulletin-issue-06.html,161,'a' tag is missing a reference _site/posts/bulletin-issue-06.html,187,'a' tag is missing a reference -_site/posts/bulletin-issue-06.html,337,http://tinyurl.com/support-PH is not an HTTPS link _site/posts/call-for-editors-en.html,105,'a' tag is missing a reference _site/posts/call-for-editors-en.html,124,'a' tag is missing a reference _site/posts/call-for-editors-en.html,161,'a' tag is missing a reference _site/posts/call-for-editors-en.html,187,'a' tag is missing a reference -_site/posts/call-for-editors-en.html,288,http://jitp.commons.gc.cuny.edu/review-of-the-programming-historian/ is not an HTTPS link _site/posts/call-for-editors.html,105,'a' tag is missing a reference _site/posts/call-for-editors.html,124,'a' tag is missing a reference _site/posts/call-for-editors.html,161,'a' tag is missing a reference _site/posts/call-for-editors.html,187,'a' tag is missing a reference -_site/posts/call-for-editors.html,288,http://jitp.commons.gc.cuny.edu/review-of-the-programming-historian/ is not an HTTPS link _site/posts/call-for-fr-members.html,105,'a' tag is missing a reference _site/posts/call-for-fr-members.html,124,'a' tag is missing a reference _site/posts/call-for-fr-members.html,161,'a' tag is missing a reference @@ -3051,12 +1612,10 @@ _site/posts/call-to-action.html,126,'a' tag is missing a reference _site/posts/call-to-action.html,163,'a' tag is missing a reference _site/posts/call-to-action.html,189,'a' tag is missing a reference _site/posts/call-to-action.html,285,'a' tag is missing a reference -_site/posts/call-to-action.html,295,http://languagelog.ldc.upenn.edu/nll/?p=5315 is not an HTTPS link _site/posts/cfp-jisc-ph.html,107,'a' tag is missing a reference _site/posts/cfp-jisc-ph.html,126,'a' tag is missing a reference _site/posts/cfp-jisc-ph.html,163,'a' tag is missing a reference _site/posts/cfp-jisc-ph.html,189,'a' tag is missing a reference -_site/posts/cfp-jisc-ph.html,308,http://go-dh.github.io/mincomp/about/ is not an HTTPS link _site/posts/charlotte-welcome.html,105,'a' tag is missing a reference _site/posts/charlotte-welcome.html,124,'a' tag is missing a reference _site/posts/charlotte-welcome.html,161,'a' tag is missing a reference @@ -3085,23 +1644,14 @@ _site/posts/corpus-linguistics-in-action.html,105,'a' tag is missing a reference _site/posts/corpus-linguistics-in-action.html,124,'a' tag is missing a reference _site/posts/corpus-linguistics-in-action.html,161,'a' tag is missing a reference _site/posts/corpus-linguistics-in-action.html,187,'a' tag is missing a reference -_site/posts/corpus-linguistics-in-action.html,279,http://clic.bham.ac.uk/ is not an HTTPS link -_site/posts/corpus-linguistics-in-action.html,281,http://www.birmingham.ac.uk/schools/edacs/departments/englishlanguage/research/projects/clic/index.aspx is not an HTTPS link -_site/posts/corpus-linguistics-in-action.html,287,http://clic.bham.ac.uk is not an HTTPS link -_site/posts/corpus-linguistics-in-action.html,287,http://www.birmingham.ac.uk/schools/edacs/departments/englishlanguage/research/projects/clic/index.aspx is not an HTTPS link -_site/posts/corpus-linguistics-in-action.html,354,http://www.gutenberg.org/ebooks/766 is not an HTTPS link -_site/posts/corpus-linguistics-in-action.html,354,http://www.gutenberg.org/ebooks/821 is not an HTTPS link -_site/posts/corpus-linguistics-in-action.html,430,http://www.euppublishing.com/doi/full/10.3366/cor.2016.0102 is not an HTTPS link _site/posts/december-newsletter.html,107,'a' tag is missing a reference _site/posts/december-newsletter.html,126,'a' tag is missing a reference _site/posts/december-newsletter.html,163,'a' tag is missing a reference _site/posts/december-newsletter.html,189,'a' tag is missing a reference -_site/posts/december-newsletter.html,304,http://ahlist.org/wp-content/uploads/2021/11/AHLIST-2021-PROGRAM_Virtual_FINAL.pdf is not an HTTPS link _site/posts/dh-award-2016.html,106,'a' tag is missing a reference _site/posts/dh-award-2016.html,125,'a' tag is missing a reference _site/posts/dh-award-2016.html,162,'a' tag is missing a reference _site/posts/dh-award-2016.html,188,'a' tag is missing a reference -_site/posts/dh-award-2016.html,281,http://dhawards.org/dhawards2016/results/ is not an HTTPS link _site/posts/dh-publishing-assistant.html,105,'a' tag is missing a reference _site/posts/dh-publishing-assistant.html,124,'a' tag is missing a reference _site/posts/dh-publishing-assistant.html,161,'a' tag is missing a reference @@ -3110,20 +1660,10 @@ _site/posts/digital-storytelling-immigrant-stories.html,105,'a' tag is missing a _site/posts/digital-storytelling-immigrant-stories.html,124,'a' tag is missing a reference _site/posts/digital-storytelling-immigrant-stories.html,161,'a' tag is missing a reference _site/posts/digital-storytelling-immigrant-stories.html,187,'a' tag is missing a reference -_site/posts/digital-storytelling-immigrant-stories.html,279,http://cla.umn.edu/ihrc/research/immigrant-stories is not an HTTPS link -_site/posts/digital-storytelling-immigrant-stories.html,281,http://immigrants.mndigital.org/exhibits/show/immigrantstories-exhibit/item/508 is not an HTTPS link -_site/posts/digital-storytelling-immigrant-stories.html,285,http://immigrants.mndigital.org/exhibits/show/immigrantstories-exhibit is not an HTTPS link -_site/posts/digital-storytelling-immigrant-stories.html,285,http://immigrants.mndigital.org/items/browse is not an HTTPS link -_site/posts/digital-storytelling-immigrant-stories.html,287,http://immigrants.mndigital.org/exhibits/show/immigrantstories1975 is not an HTTPS link _site/posts/distant-reading-in-the-undergraduate-classroom.html,105,'a' tag is missing a reference _site/posts/distant-reading-in-the-undergraduate-classroom.html,124,'a' tag is missing a reference _site/posts/distant-reading-in-the-undergraduate-classroom.html,161,'a' tag is missing a reference _site/posts/distant-reading-in-the-undergraduate-classroom.html,187,'a' tag is missing a reference -_site/posts/distant-reading-in-the-undergraduate-classroom.html,286,http://home.heinonline.org/ is not an HTTPS link -_site/posts/distant-reading-in-the-undergraduate-classroom.html,288,http://voyant-tools.org/ is not an HTTPS link -_site/posts/distant-reading-in-the-undergraduate-classroom.html,300,http://voyant-tools.org/ is not an HTTPS link -_site/posts/distant-reading-in-the-undergraduate-classroom.html,312,http://www.themacroscope.org/?page_id=391 is not an HTTPS link -_site/posts/distant-reading-in-the-undergraduate-classroom.html,314,http://tedunderwood.com/2015/06/04/seven-ways-humanists-are-using-computers-to-understand-text/ is not an HTTPS link _site/posts/dois-for-ph.html,107,'a' tag is missing a reference _site/posts/dois-for-ph.html,126,'a' tag is missing a reference _site/posts/dois-for-ph.html,163,'a' tag is missing a reference @@ -3168,27 +1708,15 @@ _site/posts/history-of-protest.html,108,'a' tag is missing a reference _site/posts/history-of-protest.html,127,'a' tag is missing a reference _site/posts/history-of-protest.html,164,'a' tag is missing a reference _site/posts/history-of-protest.html,190,'a' tag is missing a reference -_site/posts/history-of-protest.html,283,http://www.manchesteruniversitypress.co.uk/9781526116703/ is not an HTTPS link -_site/posts/history-of-protest.html,285,http://labs.bl.uk/British+Library+Labs+Competition is not an HTTPS link -_site/posts/history-of-protest.html,285,http://politicalmeetingsmapper.co.uk is not an HTTPS link -_site/posts/history-of-protest.html,289,http://www.tandfonline.com/doi/full/10.1080/13555502.2017.1301179 is not an HTTPS link _site/posts/how-we-moved-to-github.html,105,'a' tag is missing a reference _site/posts/how-we-moved-to-github.html,124,'a' tag is missing a reference _site/posts/how-we-moved-to-github.html,161,'a' tag is missing a reference _site/posts/how-we-moved-to-github.html,187,'a' tag is missing a reference -_site/posts/how-we-moved-to-github.html,277,http://en.wikipedia.org/wiki/Static_web_page is not an HTTPS link -_site/posts/how-we-moved-to-github.html,277,http://pages.github.com is not an HTTPS link -_site/posts/how-we-moved-to-github.html,281,http://jekyllrb.com is not an HTTPS link _site/posts/how-we-moved-to-github.html,281,'a' tag is missing a reference -_site/posts/how-we-moved-to-github.html,293,http://jekyllrb.com/docs/frontmatter/ is not an HTTPS link -_site/posts/how-we-moved-to-github.html,540,http://jekyllrb.com/docs/frontmatter/ is not an HTTPS link _site/posts/infrastructure-at-ph.html,106,'a' tag is missing a reference _site/posts/infrastructure-at-ph.html,125,'a' tag is missing a reference _site/posts/infrastructure-at-ph.html,162,'a' tag is missing a reference _site/posts/infrastructure-at-ph.html,188,'a' tag is missing a reference -_site/posts/infrastructure-at-ph.html,286,http://jekyllrb.com/ is not an HTTPS link -_site/posts/infrastructure-at-ph.html,300,http://web.archive.org/ is not an HTTPS link -_site/posts/infrastructure-at-ph.html,323,http://jekyllrb.com/docs/plugins/ is not an HTTPS link _site/posts/jennifer-isasi-jose-a-motilla.html,106,'a' tag is missing a reference _site/posts/jennifer-isasi-jose-a-motilla.html,125,'a' tag is missing a reference _site/posts/jennifer-isasi-jose-a-motilla.html,162,'a' tag is missing a reference @@ -3225,7 +1753,6 @@ _site/posts/merci-les-amis.html,107,'a' tag is missing a reference _site/posts/merci-les-amis.html,126,'a' tag is missing a reference _site/posts/merci-les-amis.html,163,'a' tag is missing a reference _site/posts/merci-les-amis.html,189,'a' tag is missing a reference -_site/posts/merci-les-amis.html,287,http://www.chartes.psl.eu/fr/rubrique-admissions/master-technologies-numeriques-appliquees-histoire is not an HTTPS link _site/posts/mid-year-21-newsletter.html,107,'a' tag is missing a reference _site/posts/mid-year-21-newsletter.html,126,'a' tag is missing a reference _site/posts/mid-year-21-newsletter.html,163,'a' tag is missing a reference @@ -3234,8 +1761,6 @@ _site/posts/mid-year-newsletter.html,107,'a' tag is missing a reference _site/posts/mid-year-newsletter.html,126,'a' tag is missing a reference _site/posts/mid-year-newsletter.html,163,'a' tag is missing a reference _site/posts/mid-year-newsletter.html,189,'a' tag is missing a reference -_site/posts/mid-year-newsletter.html,287,http://programminghistorian.org/fr is not an HTTPS link -_site/posts/mid-year-newsletter.html,356,http://ach2019.ach.org is not an HTTPS link _site/posts/model-workshop.html,106,'a' tag is missing a reference _site/posts/model-workshop.html,125,'a' tag is missing a reference _site/posts/model-workshop.html,162,'a' tag is missing a reference @@ -3248,7 +1773,6 @@ _site/posts/new-lessons-page.html,105,'a' tag is missing a reference _site/posts/new-lessons-page.html,124,'a' tag is missing a reference _site/posts/new-lessons-page.html,161,'a' tag is missing a reference _site/posts/new-lessons-page.html,187,'a' tag is missing a reference -_site/posts/new-lessons-page.html,299,http://listjs.com/ is not an HTTPS link _site/posts/new-navigation.html,107,'a' tag is missing a reference _site/posts/new-navigation.html,126,'a' tag is missing a reference _site/posts/new-navigation.html,163,'a' tag is missing a reference @@ -3257,9 +1781,6 @@ _site/posts/newsletter-april-21.html,107,'a' tag is missing a reference _site/posts/newsletter-april-21.html,126,'a' tag is missing a reference _site/posts/newsletter-april-21.html,163,'a' tag is missing a reference _site/posts/newsletter-april-21.html,189,'a' tag is missing a reference -_site/posts/newsletter-april-21.html,289,http://walshbr.com/blog/the-programming-historian-and-editorial-process-in-digital-publishing/ is not an HTTPS link -_site/posts/newsletter-april-21.html,297,http://ixa2.si.ehu.eus/intele/?q=webinars is not an HTTPS link -_site/posts/newsletter-april-21.html,367,http://dhawards.org/dhawards2020/results/ is not an HTTPS link _site/posts/newsletter-june20.html,107,'a' tag is missing a reference _site/posts/newsletter-june20.html,126,'a' tag is missing a reference _site/posts/newsletter-june20.html,163,'a' tag is missing a reference @@ -3300,14 +1821,10 @@ _site/posts/premio-hdh-2018.html,106,'a' tag is missing a reference _site/posts/premio-hdh-2018.html,125,'a' tag is missing a reference _site/posts/premio-hdh-2018.html,162,'a' tag is missing a reference _site/posts/premio-hdh-2018.html,188,'a' tag is missing a reference -_site/posts/premio-hdh-2018.html,281,http://humanidadesdigitaleshispanicas.es/resolucion-convocatoria-i-edicion-premios-hdh/ is not an HTTPS link -_site/posts/premio-hdh-2018.html,283,http://humanidadesdigitaleshispanicas.es is not an HTTPS link _site/posts/presentando-al-nuevo-equipo-de-editores-de-contenidos-en-espanol.html,107,'a' tag is missing a reference _site/posts/presentando-al-nuevo-equipo-de-editores-de-contenidos-en-espanol.html,126,'a' tag is missing a reference _site/posts/presentando-al-nuevo-equipo-de-editores-de-contenidos-en-espanol.html,163,'a' tag is missing a reference _site/posts/presentando-al-nuevo-equipo-de-editores-de-contenidos-en-espanol.html,189,'a' tag is missing a reference -_site/posts/presentando-al-nuevo-equipo-de-editores-de-contenidos-en-espanol.html,287,http://neogranadina.org/ is not an HTTPS link -_site/posts/presentando-al-nuevo-equipo-de-editores-de-contenidos-en-espanol.html,291,http://eadh.org/ is not an HTTPS link _site/posts/proghist-trustee-advert.html,107,'a' tag is missing a reference _site/posts/proghist-trustee-advert.html,126,'a' tag is missing a reference _site/posts/proghist-trustee-advert.html,163,'a' tag is missing a reference @@ -3324,24 +1841,14 @@ _site/posts/programming-historian-live-london.html,107,'a' tag is missing a refe _site/posts/programming-historian-live-london.html,126,'a' tag is missing a reference _site/posts/programming-historian-live-london.html,163,'a' tag is missing a reference _site/posts/programming-historian-live-london.html,189,'a' tag is missing a reference -_site/posts/programming-historian-live-london.html,292,http://proghistlive.eventbrite.co.uk is not an HTTPS link -_site/posts/programming-historian-live-london.html,294,http://www.software.ac.uk/ is not an HTTPS link -_site/posts/programming-historian-live-london.html,294,http://www.bl.uk/ is not an HTTPS link -_site/posts/programming-historian-live-london.html,294,http://www.history.ac.uk/ is not an HTTPS link _site/posts/promoting-digital-archives.html,105,'a' tag is missing a reference _site/posts/promoting-digital-archives.html,124,'a' tag is missing a reference _site/posts/promoting-digital-archives.html,161,'a' tag is missing a reference _site/posts/promoting-digital-archives.html,187,'a' tag is missing a reference -_site/posts/promoting-digital-archives.html,279,http://library.si.edu/event/colorourcollections-coloring-event is not an HTTPS link -_site/posts/promoting-digital-archives.html,296,http://www.instructables.com/id/How-to-Make-a-Coloring-Book/?ALLSTEPS is not an HTTPS link -_site/posts/promoting-digital-archives.html,302,http://www.colourlovers.com/ is not an HTTPS link -_site/posts/promoting-digital-archives.html,305,http://www.openculture.com/2016/02/download-free-coloring-books-from-world-class-libraries-museums.html is not an HTTPS link _site/posts/reintroducing-the-ph-blog.html,105,'a' tag is missing a reference _site/posts/reintroducing-the-ph-blog.html,124,'a' tag is missing a reference _site/posts/reintroducing-the-ph-blog.html,161,'a' tag is missing a reference _site/posts/reintroducing-the-ph-blog.html,187,'a' tag is missing a reference -_site/posts/reintroducing-the-ph-blog.html,283,http://humanitiesactionlab.org is not an HTTPS link -_site/posts/reintroducing-the-ph-blog.html,283,http://gitmomemory.org is not an HTTPS link _site/posts/retirement-and-sustainability-policies.html,105,'a' tag is missing a reference _site/posts/retirement-and-sustainability-policies.html,124,'a' tag is missing a reference _site/posts/retirement-and-sustainability-policies.html,161,'a' tag is missing a reference @@ -3358,18 +1865,15 @@ _site/posts/september-newsletter.html,107,'a' tag is missing a reference _site/posts/september-newsletter.html,126,'a' tag is missing a reference _site/posts/september-newsletter.html,163,'a' tag is missing a reference _site/posts/september-newsletter.html,189,'a' tag is missing a reference -_site/posts/september-newsletter.html,303,http://ach.org/ is not an HTTPS link _site/posts/sonic-word-clouds.html,105,'a' tag is missing a reference _site/posts/sonic-word-clouds.html,124,'a' tag is missing a reference _site/posts/sonic-word-clouds.html,161,'a' tag is missing a reference _site/posts/sonic-word-clouds.html,187,'a' tag is missing a reference -_site/posts/sonic-word-clouds.html,279,http://www.musicalgorithms.org/3.2/ is not an HTTPS link _site/posts/spanish-editor.html,105,'a' tag is missing a reference _site/posts/spanish-editor.html,124,'a' tag is missing a reference _site/posts/spanish-editor.html,161,'a' tag is missing a reference _site/posts/spanish-editor.html,187,'a' tag is missing a reference _site/posts/spanish-editor.html,277,'a' tag is missing a reference -_site/posts/spanish-editor.html,288,http://jitp.commons.gc.cuny.edu/review-of-the-programming-historian/ is not an HTTPS link _site/posts/subject-specialist-editor.html,106,'a' tag is missing a reference _site/posts/subject-specialist-editor.html,125,'a' tag is missing a reference _site/posts/subject-specialist-editor.html,162,'a' tag is missing a reference @@ -3394,8 +1898,6 @@ _site/posts/welcome-martin-grandjean.html,107,'a' tag is missing a reference _site/posts/welcome-martin-grandjean.html,126,'a' tag is missing a reference _site/posts/welcome-martin-grandjean.html,163,'a' tag is missing a reference _site/posts/welcome-martin-grandjean.html,189,'a' tag is missing a reference -_site/posts/welcome-martin-grandjean.html,281,http://www.martingrandjean.ch/complex-structures-and-international-organizations/ is not an HTTPS link -_site/posts/welcome-martin-grandjean.html,281,http://www.martingrandjean.ch is not an HTTPS link _site/posts/welcome-mc-boucher.html,105,'a' tag is missing a reference _site/posts/welcome-mc-boucher.html,124,'a' tag is missing a reference _site/posts/welcome-mc-boucher.html,161,'a' tag is missing a reference @@ -3404,30 +1906,22 @@ _site/posts/welcome-ph-fr.html,105,'a' tag is missing a reference _site/posts/welcome-ph-fr.html,124,'a' tag is missing a reference _site/posts/welcome-ph-fr.html,161,'a' tag is missing a reference _site/posts/welcome-ph-fr.html,187,'a' tag is missing a reference -_site/posts/welcome-ph-fr.html,279,http://programminghistorian.org/fr is not an HTTPS link -_site/posts/welcome-ph-fr.html,285,http://dsharp.library.cmu.edu/ is not an HTTPS link -_site/posts/welcome-ph-fr.html,289,http://archives.mundaneum.org/fr/versions-digitalisees/schema-de-paul-otlet-documentation-et-telecommunication is not an HTTPS link _site/posts/welcome-to-ph2.html,107,'a' tag is missing a reference _site/posts/welcome-to-ph2.html,126,'a' tag is missing a reference _site/posts/welcome-to-ph2.html,163,'a' tag is missing a reference _site/posts/welcome-to-ph2.html,189,'a' tag is missing a reference -_site/posts/welcome-to-ph2.html,289,http://niche-canada.org/programming-historian is not an HTTPS link _site/posts/welcome-zoe-leblanc.html,108,'a' tag is missing a reference _site/posts/welcome-zoe-leblanc.html,127,'a' tag is missing a reference _site/posts/welcome-zoe-leblanc.html,164,'a' tag is missing a reference _site/posts/welcome-zoe-leblanc.html,190,'a' tag is missing a reference -_site/posts/welcome-zoe-leblanc.html,285,http://scholarslab.org is not an HTTPS link _site/pt/apoiadores.html,88,'a' tag is missing a reference _site/pt/apoiadores.html,107,'a' tag is missing a reference _site/pt/apoiadores.html,144,'a' tag is missing a reference _site/pt/apoiadores.html,203,'a' tag is missing a reference -_site/pt/apoiadores.html,275,http://cdrh.unl.edu/ is not an HTTPS link _site/pt/contribua.html,88,'a' tag is missing a reference _site/pt/contribua.html,107,'a' tag is missing a reference _site/pt/contribua.html,144,'a' tag is missing a reference _site/pt/contribua.html,203,'a' tag is missing a reference -_site/pt/contribua.html,292,http://www.worldcat.org/title/programming-historian/oclc/951537099 is not an HTTPS link -_site/pt/contribua.html,294,http://purdue-primo-prod.hosted.exlibrisgroup.com/primo_library/libweb/action/dlDisplay.do?vid=PURDUE&search_scope=everything&docId=PURDUE_ALMA51671812890001081&fn=permalink is not an HTTPS link _site/pt/directrizes-autor.html,88,'a' tag is missing a reference _site/pt/directrizes-autor.html,107,'a' tag is missing a reference _site/pt/directrizes-autor.html,144,'a' tag is missing a reference @@ -3436,7 +1930,6 @@ _site/pt/directrizes-editor.html,88,'a' tag is missing a reference _site/pt/directrizes-editor.html,107,'a' tag is missing a reference _site/pt/directrizes-editor.html,144,'a' tag is missing a reference _site/pt/directrizes-editor.html,203,'a' tag is missing a reference -_site/pt/directrizes-editor.html,591,http://www.loc.gov/maps/collections is not an HTTPS link _site/pt/directrizes-revisor.html,88,'a' tag is missing a reference _site/pt/directrizes-revisor.html,107,'a' tag is missing a reference _site/pt/directrizes-revisor.html,144,'a' tag is missing a reference @@ -3453,72 +1946,6 @@ _site/pt/equipe.html,88,'a' tag is missing a reference _site/pt/equipe.html,107,'a' tag is missing a reference _site/pt/equipe.html,144,'a' tag is missing a reference _site/pt/equipe.html,203,'a' tag is missing a reference -_site/pt/equipe.html,310,http://twitter.com/maxcarlons is not an HTTPS link -_site/pt/equipe.html,313,http://github.com/carlonim is not an HTTPS link -_site/pt/equipe.html,414,http://github.com/lachapot is not an HTTPS link -_site/pt/equipe.html,512,http://twitter.com/cosovschi is not an HTTPS link -_site/pt/equipe.html,515,http://github.com/digitalkosovski is not an HTTPS link -_site/pt/equipe.html,618,http://github.com/caiocmello is not an HTTPS link -_site/pt/equipe.html,1182,http://github.com/semanticnoodles is not an HTTPS link -_site/pt/equipe.html,1278,http://twitter.com/nabsiddiqui is not an HTTPS link -_site/pt/equipe.html,1281,http://github.com/nabsiddiqui is not an HTTPS link -_site/pt/equipe.html,1631,http://twitter.com/giulia_taurino is not an HTTPS link -_site/pt/equipe.html,1634,http://github.com/giuliataurino is not an HTTPS link -_site/pt/equipe.html,1804,http://www.alexwermercolan.com/ is not an HTTPS link -_site/pt/equipe.html,1810,http://twitter.com/alexwermercolan is not an HTTPS link -_site/pt/equipe.html,1813,http://github.com/hawc2 is not an HTTPS link -_site/pt/equipe.html,2059,http://www.mariajoseafanador.com is not an HTTPS link -_site/pt/equipe.html,2065,http://twitter.com/mariajoafana is not an HTTPS link -_site/pt/equipe.html,2068,http://github.com/mariajoafana is not an HTTPS link -_site/pt/equipe.html,2534,http://twitter.com/IsaGribomont is not an HTTPS link -_site/pt/equipe.html,2537,http://github.com/isag91 is not an HTTPS link -_site/pt/equipe.html,2745,http://twitter.com/espejolento is not an HTTPS link -_site/pt/equipe.html,2748,http://github.com/silviaegt is not an HTTPS link -_site/pt/equipe.html,3036,http://twitter.com/jenniferisve is not an HTTPS link -_site/pt/equipe.html,3039,http://github.com/jenniferisasi is not an HTTPS link -_site/pt/equipe.html,3361,http://twitter.com/enetreseles is not an HTTPS link -_site/pt/equipe.html,3364,http://github.com/nllano is not an HTTPS link -_site/pt/equipe.html,3568,http://twitter.com/jgob is not an HTTPS link -_site/pt/equipe.html,3571,http://github.com/joshuagob is not an HTTPS link -_site/pt/equipe.html,3863,http://twitter.com/rivaquiroga is not an HTTPS link -_site/pt/equipe.html,3866,http://github.com/rivaquiroga is not an HTTPS link -_site/pt/equipe.html,4157,http://github.com/nivaca is not an HTTPS link -_site/pt/equipe.html,4370,http://github.com/marie-flesch is not an HTTPS link -_site/pt/equipe.html,4513,http://github.com/matgille is not an HTTPS link -_site/pt/equipe.html,4746,http://github.com/mhersent is not an HTTPS link -_site/pt/equipe.html,4804,http://twitter.com/superHH is not an HTTPS link -_site/pt/equipe.html,5056,http://github.com/DMathelier is not an HTTPS link -_site/pt/equipe.html,5190,http://twitter.com/emilienschultz is not an HTTPS link -_site/pt/equipe.html,5193,http://github.com/emilienschultz is not an HTTPS link -_site/pt/equipe.html,5317,http://twitter.com/davvalent is not an HTTPS link -_site/pt/equipe.html,5320,http://github.com/davvalent is not an HTTPS link -_site/pt/equipe.html,5449,http://github.com/AlexandreWa is not an HTTPS link -_site/pt/equipe.html,5584,http://github.com/josircg is not an HTTPS link -_site/pt/equipe.html,5842,http://twitter.com/danielalvesfcsh is not an HTTPS link -_site/pt/equipe.html,5845,http://github.com/DanielAlvesLABDH is not an HTTPS link -_site/pt/equipe.html,6107,http://twitter.com/ericbrasiln is not an HTTPS link -_site/pt/equipe.html,6110,http://github.com/ericbrasiln is not an HTTPS link -_site/pt/equipe.html,6301,http://github.com/luisferla is not an HTTPS link -_site/pt/equipe.html,6543,http://twitter.com/jimmy_medeiros is not an HTTPS link -_site/pt/equipe.html,6546,http://github.com/JimmyMedeiros82 is not an HTTPS link -_site/pt/equipe.html,6781,http://github.com/joanacvp is not an HTTPS link -_site/pt/equipe.html,7027,http://twitter.com/araceletorres is not an HTTPS link -_site/pt/equipe.html,7030,http://github.com/aracele is not an HTTPS link -_site/pt/equipe.html,7286,http://twitter.com/j_w_baker is not an HTTPS link -_site/pt/equipe.html,7289,http://github.com/drjwbaker is not an HTTPS link -_site/pt/equipe.html,7721,http://adamcrymble.org is not an HTTPS link -_site/pt/equipe.html,7727,http://twitter.com/Adam_Crymble is not an HTTPS link -_site/pt/equipe.html,7730,http://github.com/acrymble is not an HTTPS link -_site/pt/equipe.html,8198,http://github.com/adamfarquhar is not an HTTPS link -_site/pt/equipe.html,8258,http://twitter.com/jenniferisve is not an HTTPS link -_site/pt/equipe.html,8261,http://github.com/jenniferisasi is not an HTTPS link -_site/pt/equipe.html,8589,http://twitter.com/rivaquiroga is not an HTTPS link -_site/pt/equipe.html,8592,http://github.com/rivaquiroga is not an HTTPS link -_site/pt/equipe.html,8878,http://twitter.com/amsichani is not an HTTPS link -_site/pt/equipe.html,8881,http://github.com/amsichani is not an HTTPS link -_site/pt/equipe.html,9221,http://twitter.com/AnisaHawes is not an HTTPS link -_site/pt/equipe.html,9224,http://github.com/anisa-hawes is not an HTTPS link -_site/pt/equipe.html,9433,http://github.com/charlottejmc is not an HTTPS link _site/pt/eventos.html,88,'a' tag is missing a reference _site/pt/eventos.html,107,'a' tag is missing a reference _site/pt/eventos.html,144,'a' tag is missing a reference @@ -3539,7 +1966,6 @@ _site/pt/licoes/HTML-lista-palavras-1.html,121,'a' tag is missing a reference _site/pt/licoes/HTML-lista-palavras-1.html,140,'a' tag is missing a reference _site/pt/licoes/HTML-lista-palavras-1.html,177,'a' tag is missing a reference _site/pt/licoes/HTML-lista-palavras-1.html,236,'a' tag is missing a reference -_site/pt/licoes/HTML-lista-palavras-1.html,558,http://www.w3schools.com/html/ is not an HTTPS link _site/pt/licoes/HTML-lista-palavras-2.html,121,'a' tag is missing a reference _site/pt/licoes/HTML-lista-palavras-2.html,140,'a' tag is missing a reference _site/pt/licoes/HTML-lista-palavras-2.html,177,'a' tag is missing a reference @@ -3552,20 +1978,14 @@ _site/pt/licoes/analise-correspondencia-pesquisa-historica-R.html,135,'a' tag is _site/pt/licoes/analise-correspondencia-pesquisa-historica-R.html,154,'a' tag is missing a reference _site/pt/licoes/analise-correspondencia-pesquisa-historica-R.html,191,'a' tag is missing a reference _site/pt/licoes/analise-correspondencia-pesquisa-historica-R.html,250,'a' tag is missing a reference -_site/pt/licoes/analise-correspondencia-pesquisa-historica-R.html,703,http://factominer.free.fr/ is not an HTTPS link _site/pt/licoes/analise-sentimento-R-syuzhet.html,119,'a' tag is missing a reference _site/pt/licoes/analise-sentimento-R-syuzhet.html,138,'a' tag is missing a reference _site/pt/licoes/analise-sentimento-R-syuzhet.html,175,'a' tag is missing a reference _site/pt/licoes/analise-sentimento-R-syuzhet.html,234,'a' tag is missing a reference -_site/pt/licoes/analise-sentimento-R-syuzhet.html,584,http://saifmohammad.com/WebPages/NRC-Emotion-Lexicon.htm is not an HTTPS link -_site/pt/licoes/analise-sentimento-R-syuzhet.html,588,http://www.matthewjockers.net/page/2/ is not an HTTPS link _site/pt/licoes/analise-sentimento-exploracao-dados.html,119,'a' tag is missing a reference _site/pt/licoes/analise-sentimento-exploracao-dados.html,138,'a' tag is missing a reference _site/pt/licoes/analise-sentimento-exploracao-dados.html,175,'a' tag is missing a reference _site/pt/licoes/analise-sentimento-exploracao-dados.html,234,'a' tag is missing a reference -_site/pt/licoes/analise-sentimento-exploracao-dados.html,605,http://www.nltk.org/install.html is not an HTTPS link -_site/pt/licoes/analise-sentimento-exploracao-dados.html,617,http://www.nltk.org/_modules/nltk/sentiment/vader.html is not an HTTPS link -_site/pt/licoes/analise-sentimento-exploracao-dados.html,639,http://www.nltk.org/_modules/nltk/sentiment/vader.html is not an HTTPS link _site/pt/licoes/aplicacao-web-interativa-r-shiny-leaflet.html,119,'a' tag is missing a reference _site/pt/licoes/aplicacao-web-interativa-r-shiny-leaflet.html,138,'a' tag is missing a reference _site/pt/licoes/aplicacao-web-interativa-r-shiny-leaflet.html,175,'a' tag is missing a reference @@ -3574,37 +1994,22 @@ _site/pt/licoes/autoria-sustentavel-texto-simples-pandoc-markdown.html,121,'a' t _site/pt/licoes/autoria-sustentavel-texto-simples-pandoc-markdown.html,140,'a' tag is missing a reference _site/pt/licoes/autoria-sustentavel-texto-simples-pandoc-markdown.html,177,'a' tag is missing a reference _site/pt/licoes/autoria-sustentavel-texto-simples-pandoc-markdown.html,236,'a' tag is missing a reference -_site/pt/licoes/autoria-sustentavel-texto-simples-pandoc-markdown.html,868,http://writemonkey.com/ is not an HTTPS link -_site/pt/licoes/autoria-sustentavel-texto-simples-pandoc-markdown.html,868,http://prose.io/ is not an HTTPS link -_site/pt/licoes/autoria-sustentavel-texto-simples-pandoc-markdown.html,868,http://www.draftin.com/ is not an HTTPS link -_site/pt/licoes/autoria-sustentavel-texto-simples-pandoc-markdown.html,870,http://gitit.net/ is not an HTTPS link -_site/pt/licoes/autoria-sustentavel-texto-simples-pandoc-markdown.html,870,http://jaspervdj.be/hakyll/ is not an HTTPS link -_site/pt/licoes/autoria-sustentavel-texto-simples-pandoc-markdown.html,884,http://www.antipope.org/charlie/blog-static/2013/10/why-microsoft-word-must-die.html is not an HTTPS link _site/pt/licoes/camadas-vetoriais-qgis.html,123,'a' tag is missing a reference _site/pt/licoes/camadas-vetoriais-qgis.html,142,'a' tag is missing a reference _site/pt/licoes/camadas-vetoriais-qgis.html,179,'a' tag is missing a reference _site/pt/licoes/camadas-vetoriais-qgis.html,238,'a' tag is missing a reference -_site/pt/licoes/camadas-vetoriais-qgis.html,1090,http://geospatialhistorian.wordpress.com/ is not an HTTPS link _site/pt/licoes/contagem-mineracao-dados-investigacao-unix.html,121,'a' tag is missing a reference _site/pt/licoes/contagem-mineracao-dados-investigacao-unix.html,140,'a' tag is missing a reference _site/pt/licoes/contagem-mineracao-dados-investigacao-unix.html,177,'a' tag is missing a reference _site/pt/licoes/contagem-mineracao-dados-investigacao-unix.html,236,'a' tag is missing a reference -_site/pt/licoes/contagem-mineracao-dados-investigacao-unix.html,562,http://www.7-zip.org/ is not an HTTPS link -_site/pt/licoes/contagem-mineracao-dados-investigacao-unix.html,576,http://en.wikipedia.org/wiki/Tab-separated_values is not an HTTPS link -_site/pt/licoes/contagem-mineracao-dados-investigacao-unix.html,641,http://williamjturkel.net/2013/06/15/basic-text-analysis-with-command-line-tools-in-linux/ is not an HTTPS link -_site/pt/licoes/contagem-mineracao-dados-investigacao-unix.html,642,http://williamjturkel.net/2013/06/20/pattern-matching-and-permuted-term-indexing-with-command-line-tools-in-linux/ is not an HTTPS link _site/pt/licoes/contar-frequencias-palavras-python.html,121,'a' tag is missing a reference _site/pt/licoes/contar-frequencias-palavras-python.html,140,'a' tag is missing a reference _site/pt/licoes/contar-frequencias-palavras-python.html,177,'a' tag is missing a reference _site/pt/licoes/contar-frequencias-palavras-python.html,236,'a' tag is missing a reference -_site/pt/licoes/contar-frequencias-palavras-python.html,596,http://docs.python.org/tutorial/datastructures.html#list-comprehensions is not an HTTPS link -_site/pt/licoes/contar-frequencias-palavras-python.html,721,http://ir.dcs.gla.ac.uk/resources/linguistic_utils/stop_words is not an HTTPS link _site/pt/licoes/criacao-visualizacao-ficheiros-html-python.html,121,'a' tag is missing a reference _site/pt/licoes/criacao-visualizacao-ficheiros-html-python.html,140,'a' tag is missing a reference _site/pt/licoes/criacao-visualizacao-ficheiros-html-python.html,177,'a' tag is missing a reference _site/pt/licoes/criacao-visualizacao-ficheiros-html-python.html,236,'a' tag is missing a reference -_site/pt/licoes/criacao-visualizacao-ficheiros-html-python.html,553,http://www.w3schools.com/html/default.asp is not an HTTPS link -_site/pt/licoes/criacao-visualizacao-ficheiros-html-python.html,559,http://www.w3schools.com/tags/tag_doctype.asp is not an HTTPS link _site/pt/licoes/criar-exposicao-omeka.html,121,'a' tag is missing a reference _site/pt/licoes/criar-exposicao-omeka.html,140,'a' tag is missing a reference _site/pt/licoes/criar-exposicao-omeka.html,177,'a' tag is missing a reference @@ -3613,55 +2018,35 @@ _site/pt/licoes/download-automatico-wget.html,119,'a' tag is missing a reference _site/pt/licoes/download-automatico-wget.html,138,'a' tag is missing a reference _site/pt/licoes/download-automatico-wget.html,175,'a' tag is missing a reference _site/pt/licoes/download-automatico-wget.html,234,'a' tag is missing a reference -_site/pt/licoes/download-automatico-wget.html,662,http://www.gnu.org/software/wget/ is not an HTTPS link -_site/pt/licoes/download-automatico-wget.html,662,http://ftp.gnu.org/gnu/wget/ is not an HTTPS link _site/pt/licoes/download-multiplos-registros-query-strings.html,119,'a' tag is missing a reference _site/pt/licoes/download-multiplos-registros-query-strings.html,138,'a' tag is missing a reference _site/pt/licoes/download-multiplos-registros-query-strings.html,175,'a' tag is missing a reference _site/pt/licoes/download-multiplos-registros-query-strings.html,234,'a' tag is missing a reference -_site/pt/licoes/download-multiplos-registros-query-strings.html,549,http://www.oldbaileyonline.org/ is not an HTTPS link -_site/pt/licoes/download-multiplos-registros-query-strings.html,565,http://www.oldbaileyonline.org/browse.jsp?id=t17800628-33&div=t17800628-33 is not an HTTPS link -_site/pt/licoes/download-multiplos-registros-query-strings.html,603,http://www.oldbaileyonline.org/forms/formMain.jsp is not an HTTPS link -_site/pt/licoes/download-multiplos-registros-query-strings.html,1148,http://docs.python.org/tutorial/errors.html is not an HTTPS link -_site/pt/licoes/download-multiplos-registros-query-strings.html,1293,http://www.oldbaileyonline.org/static/DocAPI.jsp is not an HTTPS link -_site/pt/licoes/download-multiplos-registros-query-strings.html,1294,http://stackoverflow.com/questions/273192/python-best-way-to-create-directory-if-it-doesnt-exist-for-file-write is not an HTTPS link _site/pt/licoes/download-paginas-web-python.html,121,'a' tag is missing a reference _site/pt/licoes/download-paginas-web-python.html,140,'a' tag is missing a reference _site/pt/licoes/download-paginas-web-python.html,177,'a' tag is missing a reference _site/pt/licoes/download-paginas-web-python.html,236,'a' tag is missing a reference -_site/pt/licoes/download-paginas-web-python.html,624,http://www.oldbaileyonline.org/browse.jsp?foo=bar&path=sessionsPapers/17800628.xml&div=t17800628-33&xml=yes is not an HTTPS link -_site/pt/licoes/download-paginas-web-python.html,624,http://www.oldbaileyonline.org/images.jsp?doc=178006280084 is not an HTTPS link _site/pt/licoes/explorar-analisar-dados-rede-python.html,125,'a' tag is missing a reference _site/pt/licoes/explorar-analisar-dados-rede-python.html,144,'a' tag is missing a reference _site/pt/licoes/explorar-analisar-dados-rede-python.html,181,'a' tag is missing a reference _site/pt/licoes/explorar-analisar-dados-rede-python.html,240,'a' tag is missing a reference -_site/pt/licoes/explorar-analisar-dados-rede-python.html,621,http://www.oxforddnb.com is not an HTTPS link -_site/pt/licoes/explorar-analisar-dados-rede-python.html,621,http://www.sixdegreesoffrancisbacon.com is not an HTTPS link -_site/pt/licoes/explorar-analisar-dados-rede-python.html,897,http://sixdegreesoffrancisbacon.com/ is not an HTTPS link _site/pt/licoes/extrair-paginas-ilustradas-com-python.html,119,'a' tag is missing a reference _site/pt/licoes/extrair-paginas-ilustradas-com-python.html,138,'a' tag is missing a reference _site/pt/licoes/extrair-paginas-ilustradas-com-python.html,175,'a' tag is missing a reference _site/pt/licoes/extrair-paginas-ilustradas-com-python.html,234,'a' tag is missing a reference -_site/pt/licoes/extrair-paginas-ilustradas-com-python.html,685,http://web.archive.org/web/20190115051900/https://conda.io/docs/_downloads/conda-cheatsheet.pdf is not an HTTPS link _site/pt/licoes/extrair-palavras-chave.html,119,'a' tag is missing a reference _site/pt/licoes/extrair-palavras-chave.html,138,'a' tag is missing a reference _site/pt/licoes/extrair-palavras-chave.html,175,'a' tag is missing a reference _site/pt/licoes/extrair-palavras-chave.html,234,'a' tag is missing a reference -_site/pt/licoes/extrair-palavras-chave.html,554,http://www.british-history.ac.uk/alumni-oxon/1500-1714 is not an HTTPS link _site/pt/licoes/geocodificando-qgis.html,119,'a' tag is missing a reference _site/pt/licoes/geocodificando-qgis.html,138,'a' tag is missing a reference _site/pt/licoes/geocodificando-qgis.html,175,'a' tag is missing a reference _site/pt/licoes/geocodificando-qgis.html,234,'a' tag is missing a reference _site/pt/licoes/geocodificando-qgis.html,599,[url](https://www.oracle.com/java/technologies/downloads/#java8) is an invalid URL -_site/pt/licoes/geocodificando-qgis.html,602,http://www.british-history.ac.uk/alumni-oxon/1500-1714 is not an HTTPS link -_site/pt/licoes/geocodificando-qgis.html,684,http://www.county-borders.co.uk/ is not an HTTPS link -_site/pt/licoes/geocodificando-qgis.html,769,http://www.gazetteer.org.uk/index.php is not an HTTPS link -_site/pt/licoes/geocodificando-qgis.html,781,http://www.county-borders.co.uk/ is not an HTTPS link _site/pt/licoes/georreferenciamento-qgis.html,123,'a' tag is missing a reference _site/pt/licoes/georreferenciamento-qgis.html,142,'a' tag is missing a reference _site/pt/licoes/georreferenciamento-qgis.html,179,'a' tag is missing a reference _site/pt/licoes/georreferenciamento-qgis.html,238,'a' tag is missing a reference -_site/pt/licoes/georreferenciamento-qgis.html,612,http://www.gov.pe.ca/gis/license_agreement.php3?name=lot_town&file_format=SHP is not an HTTPS link _site/pt/licoes/git-ferramenta-metodologica-projetos-historia-1.html,117,'a' tag is missing a reference _site/pt/licoes/git-ferramenta-metodologica-projetos-historia-1.html,136,'a' tag is missing a reference _site/pt/licoes/git-ferramenta-metodologica-projetos-historia-1.html,173,'a' tag is missing a reference @@ -3678,7 +2063,6 @@ _site/pt/licoes/instalacao-mac.html,121,'a' tag is missing a reference _site/pt/licoes/instalacao-mac.html,140,'a' tag is missing a reference _site/pt/licoes/instalacao-mac.html,177,'a' tag is missing a reference _site/pt/licoes/instalacao-mac.html,236,'a' tag is missing a reference -_site/pt/licoes/instalacao-mac.html,538,http://support.apple.com/kb/ht1427 is not an HTTPS link _site/pt/licoes/instalacao-modulos-python-pip.html,119,'a' tag is missing a reference _site/pt/licoes/instalacao-modulos-python-pip.html,138,'a' tag is missing a reference _site/pt/licoes/instalacao-modulos-python-pip.html,175,'a' tag is missing a reference @@ -3691,8 +2075,6 @@ _site/pt/licoes/introducao-ao-markdown.html,119,'a' tag is missing a reference _site/pt/licoes/introducao-ao-markdown.html,138,'a' tag is missing a reference _site/pt/licoes/introducao-ao-markdown.html,175,'a' tag is missing a reference _site/pt/licoes/introducao-ao-markdown.html,234,'a' tag is missing a reference -_site/pt/licoes/introducao-ao-markdown.html,572,http://daringfireball.net/projects/markdown/ is not an HTTPS link -_site/pt/licoes/introducao-ao-markdown.html,576,http://github.com is not an HTTPS link _site/pt/licoes/introducao-codificacao-textos-tei-1.html,119,'a' tag is missing a reference _site/pt/licoes/introducao-codificacao-textos-tei-1.html,138,'a' tag is missing a reference _site/pt/licoes/introducao-codificacao-textos-tei-1.html,175,'a' tag is missing a reference @@ -3701,36 +2083,14 @@ _site/pt/licoes/introducao-dados-abertos-conectados.html,119,'a' tag is missing _site/pt/licoes/introducao-dados-abertos-conectados.html,138,'a' tag is missing a reference _site/pt/licoes/introducao-dados-abertos-conectados.html,175,'a' tag is missing a reference _site/pt/licoes/introducao-dados-abertos-conectados.html,234,'a' tag is missing a reference -_site/pt/licoes/introducao-dados-abertos-conectados.html,575,http://linkeddatacatalog.dws.informatik.uni-mannheim.de/state/ is not an HTTPS link -_site/pt/licoes/introducao-dados-abertos-conectados.html,604,http://www.oxforddnb.com is not an HTTPS link -_site/pt/licoes/introducao-dados-abertos-conectados.html,616,http://www.geonames.org/ is not an HTTPS link -_site/pt/licoes/introducao-dados-abertos-conectados.html,750,http://semanticweb.org/wiki/Main_Page.html is not an HTTPS link -_site/pt/licoes/introducao-dados-abertos-conectados.html,752,http://web.archive.org/web/20170715094229/http://www.musicontology.com/ is not an HTTPS link -_site/pt/licoes/introducao-dados-abertos-conectados.html,752,http://web.archive.org/web/20170718143925/http://musicontology.com/docs/getting-started.html is not an HTTPS link -_site/pt/licoes/introducao-dados-abertos-conectados.html,870,http://web.archive.org/web/20170718143925/http://musicontology.com/docs/getting-started.html is not an HTTPS link -_site/pt/licoes/introducao-dados-abertos-conectados.html,926,http://www.easyrdf.org/converter is not an HTTPS link -_site/pt/licoes/introducao-dados-abertos-conectados.html,938,http://dbpedia.org/snorql/ is not an HTTPS link -_site/pt/licoes/introducao-dados-abertos-conectados.html,1038,http://dbpedia.org/class/yago/WikicatBritishHistorians is not an HTTPS link -_site/pt/licoes/introducao-dados-abertos-conectados.html,1038,http://dbpedia.org/class/yago/WikicatWomenHistorians is not an HTTPS link -_site/pt/licoes/introducao-dados-abertos-conectados.html,1077,http://www.snee.com/bobdc.blog/ is not an HTTPS link -_site/pt/licoes/introducao-dados-abertos-conectados.html,1081,http://linkeddata.org/guides-and-tutorials is not an HTTPS link -_site/pt/licoes/introducao-dados-abertos-conectados.html,1083,http://linkeddatacatalog.dws.informatik.uni-mannheim.de/state/ is not an HTTPS link -_site/pt/licoes/introducao-dados-abertos-conectados.html,1089,http://www.ahrc.ac.uk/ is not an HTTPS link _site/pt/licoes/introducao-estilometria-python.html,119,'a' tag is missing a reference _site/pt/licoes/introducao-estilometria-python.html,138,'a' tag is missing a reference _site/pt/licoes/introducao-estilometria-python.html,175,'a' tag is missing a reference _site/pt/licoes/introducao-estilometria-python.html,234,'a' tag is missing a reference -_site/pt/licoes/introducao-estilometria-python.html,805,http://jupyter.org/ is not an HTTPS link -_site/pt/licoes/introducao-estilometria-python.html,805,http://jupyterlab.readthedocs.io/en/stable/getting_started/installation.html is not an HTTPS link _site/pt/licoes/introducao-instalacao-python.html,121,'a' tag is missing a reference _site/pt/licoes/introducao-instalacao-python.html,140,'a' tag is missing a reference _site/pt/licoes/introducao-instalacao-python.html,177,'a' tag is missing a reference _site/pt/licoes/introducao-instalacao-python.html,236,'a' tag is missing a reference -_site/pt/licoes/introducao-instalacao-python.html,544,http://www.python.org/ is not an HTTPS link -_site/pt/licoes/introducao-instalacao-python.html,544,http://www.crummy.com/software/BeautifulSoup/ is not an HTTPS link -_site/pt/licoes/introducao-instalacao-python.html,546,http://www.activestate.com/komodo-edit is not an HTTPS link -_site/pt/licoes/introducao-instalacao-python.html,546,http://wiki.python.org/moin/PythonEditors/ is not an HTTPS link -_site/pt/licoes/introducao-instalacao-python.html,554,http://sebastianraschka.com/Articles/2014_python_2_3_key_diff.html is not an HTTPS link _site/pt/licoes/introducao-jupyter-notebooks.html,123,'a' tag is missing a reference _site/pt/licoes/introducao-jupyter-notebooks.html,142,'a' tag is missing a reference _site/pt/licoes/introducao-jupyter-notebooks.html,179,'a' tag is missing a reference @@ -3739,20 +2099,14 @@ _site/pt/licoes/introducao-linha-comando-bash.html,121,'a' tag is missing a refe _site/pt/licoes/introducao-linha-comando-bash.html,140,'a' tag is missing a reference _site/pt/licoes/introducao-linha-comando-bash.html,177,'a' tag is missing a reference _site/pt/licoes/introducao-linha-comando-bash.html,236,'a' tag is missing a reference -_site/pt/licoes/introducao-linha-comando-bash.html,564,http://www.tldp.org/LDP/Bash-Beginners-Guide/html/chap_01.html is not an HTTPS link -_site/pt/licoes/introducao-linha-comando-bash.html,786,http://www.gutenberg.org/ebooks/2600 is not an HTTPS link _site/pt/licoes/introducao-mysql-r.html,119,'a' tag is missing a reference _site/pt/licoes/introducao-mysql-r.html,138,'a' tag is missing a reference _site/pt/licoes/introducao-mysql-r.html,175,'a' tag is missing a reference _site/pt/licoes/introducao-mysql-r.html,234,'a' tag is missing a reference -_site/pt/licoes/introducao-mysql-r.html,910,http://dev.mysql.com/downloads/workbench/ is not an HTTPS link -_site/pt/licoes/introducao-mysql-r.html,1818,http://web.archive.org/web/20171228130133/https://www.ntu.edu.sg/home/ehchua/programming/sql/MySQL_Beginner.html is not an HTTPS link _site/pt/licoes/introducao-omeka-net.html,119,'a' tag is missing a reference _site/pt/licoes/introducao-omeka-net.html,138,'a' tag is missing a reference _site/pt/licoes/introducao-omeka-net.html,175,'a' tag is missing a reference _site/pt/licoes/introducao-omeka-net.html,234,'a' tag is missing a reference -_site/pt/licoes/introducao-omeka-net.html,544,http://www.omeka.net is not an HTTPS link -_site/pt/licoes/introducao-omeka-net.html,978,http://info.omeka.net/ is not an HTTPS link _site/pt/licoes/investigar-literatura-lusofona-literateca.html,117,'a' tag is missing a reference _site/pt/licoes/investigar-literatura-lusofona-literateca.html,136,'a' tag is missing a reference _site/pt/licoes/investigar-literatura-lusofona-literateca.html,173,'a' tag is missing a reference @@ -3761,20 +2115,10 @@ _site/pt/licoes/limpar-dados-openrefine.html,123,'a' tag is missing a reference _site/pt/licoes/limpar-dados-openrefine.html,142,'a' tag is missing a reference _site/pt/licoes/limpar-dados-openrefine.html,179,'a' tag is missing a reference _site/pt/licoes/limpar-dados-openrefine.html,238,'a' tag is missing a reference -_site/pt/licoes/limpar-dados-openrefine.html,574,http://openrefine.org is not an HTTPS link _site/pt/licoes/manipulacao-transformacao-dados-r.html,119,'a' tag is missing a reference _site/pt/licoes/manipulacao-transformacao-dados-r.html,138,'a' tag is missing a reference _site/pt/licoes/manipulacao-transformacao-dados-r.html,175,'a' tag is missing a reference _site/pt/licoes/manipulacao-transformacao-dados-r.html,234,'a' tag is missing a reference -_site/pt/licoes/manipulacao-transformacao-dados-r.html,567,http://hadley.nz/ is not an HTTPS link -_site/pt/licoes/manipulacao-transformacao-dados-r.html,585,http://tidyverse.org/ is not an HTTPS link -_site/pt/licoes/manipulacao-transformacao-dados-r.html,590,http://magrittr.tidyverse.org is not an HTTPS link -_site/pt/licoes/manipulacao-transformacao-dados-r.html,591,http://ggplot2.tidyverse.org/ is not an HTTPS link -_site/pt/licoes/manipulacao-transformacao-dados-r.html,591,http://www.springer.com/us/book/9780387245447 is not an HTTPS link -_site/pt/licoes/manipulacao-transformacao-dados-r.html,592,http://tibble.tidyverse.org/ is not an HTTPS link -_site/pt/licoes/manipulacao-transformacao-dados-r.html,694,http://stefanbache.dk/ is not an HTTPS link -_site/pt/licoes/manipulacao-transformacao-dados-r.html,694,http://hadley.nz/ is not an HTTPS link -_site/pt/licoes/manipulacao-transformacao-dados-r.html,1018,http://www.ggplot2.org is not an HTTPS link _site/pt/licoes/manipular-strings-python.html,121,'a' tag is missing a reference _site/pt/licoes/manipular-strings-python.html,140,'a' tag is missing a reference _site/pt/licoes/manipular-strings-python.html,177,'a' tag is missing a reference @@ -3783,14 +2127,10 @@ _site/pt/licoes/nocoes-basicas-R-dados-tabulares.html,119,'a' tag is missing a r _site/pt/licoes/nocoes-basicas-R-dados-tabulares.html,138,'a' tag is missing a reference _site/pt/licoes/nocoes-basicas-R-dados-tabulares.html,175,'a' tag is missing a reference _site/pt/licoes/nocoes-basicas-R-dados-tabulares.html,234,'a' tag is missing a reference -_site/pt/licoes/nocoes-basicas-R-dados-tabulares.html,1054,http://dh-r.lincolnmullen.com/ is not an HTTPS link _site/pt/licoes/nocoes-basicas-paginas-web-html.html,121,'a' tag is missing a reference _site/pt/licoes/nocoes-basicas-paginas-web-html.html,140,'a' tag is missing a reference _site/pt/licoes/nocoes-basicas-paginas-web-html.html,177,'a' tag is missing a reference _site/pt/licoes/nocoes-basicas-paginas-web-html.html,236,'a' tag is missing a reference -_site/pt/licoes/nocoes-basicas-paginas-web-html.html,583,http://www.w3schools.com/html/default.asp is not an HTTPS link -_site/pt/licoes/nocoes-basicas-paginas-web-html.html,636,http://www.w3schools.com/html/default.asp is not an HTTPS link -_site/pt/licoes/nocoes-basicas-paginas-web-html.html,637,http://www.w3schools.com/html/html5_intro.asp is not an HTTPS link _site/pt/licoes/normalizacao-dados-textuais-python.html,121,'a' tag is missing a reference _site/pt/licoes/normalizacao-dados-textuais-python.html,140,'a' tag is missing a reference _site/pt/licoes/normalizacao-dados-textuais-python.html,177,'a' tag is missing a reference @@ -3803,29 +2143,14 @@ _site/pt/licoes/preservar-os-seus-dados-de-investigacao.html,119,'a' tag is miss _site/pt/licoes/preservar-os-seus-dados-de-investigacao.html,138,'a' tag is missing a reference _site/pt/licoes/preservar-os-seus-dados-de-investigacao.html,175,'a' tag is missing a reference _site/pt/licoes/preservar-os-seus-dados-de-investigacao.html,234,'a' tag is missing a reference -_site/pt/licoes/preservar-os-seus-dados-de-investigacao.html,691,http://notepad-plus-plus.org/ is not an HTTPS link -_site/pt/licoes/preservar-os-seus-dados-de-investigacao.html,693,http://komodoide.com/komodo-edit/ is not an HTTPS link -_site/pt/licoes/preservar-os-seus-dados-de-investigacao.html,762,http://homensenaviosdobacalhau.cm-ilhavo.pt/header/diretorio/showppl/17606 is not an HTTPS link -_site/pt/licoes/preservar-os-seus-dados-de-investigacao.html,940,http://historyonics.blogspot.co.uk/2014/01/judging-book-by-its-url.html is not an HTTPS link -_site/pt/licoes/preservar-os-seus-dados-de-investigacao.html,944,http://earlymodernnotes.wordpress.com/2013/05/18/unclean-unclean-what-historians-can-do-about-sharing-our-messy-research-data/ is not an HTTPS link -_site/pt/licoes/preservar-os-seus-dados-de-investigacao.html,957,http://britishlibrary.typepad.co.uk/collectioncare/2013/09/the-twelve-principles-of-digital-preservation.html is not an HTTPS link -_site/pt/licoes/preservar-os-seus-dados-de-investigacao.html,966,http://data-archive.ac.uk/create-manage/document is not an HTTPS link _site/pt/licoes/processamento-basico-texto-r.html,121,'a' tag is missing a reference _site/pt/licoes/processamento-basico-texto-r.html,140,'a' tag is missing a reference _site/pt/licoes/processamento-basico-texto-r.html,177,'a' tag is missing a reference _site/pt/licoes/processamento-basico-texto-r.html,236,'a' tag is missing a reference -_site/pt/licoes/processamento-basico-texto-r.html,1386,http://www.presidency.ucsb.edu/sou.php is not an HTTPS link _site/pt/licoes/qgis-camadas.html,123,'a' tag is missing a reference _site/pt/licoes/qgis-camadas.html,142,'a' tag is missing a reference _site/pt/licoes/qgis-camadas.html,179,'a' tag is missing a reference _site/pt/licoes/qgis-camadas.html,238,'a' tag is missing a reference -_site/pt/licoes/qgis-camadas.html,607,http://www.gov.pe.ca/gis/download.php3?name=coastline&file_format=SHP is not an HTTPS link -_site/pt/licoes/qgis-camadas.html,608,http://www.gov.pe.ca/gis/download.php3?name=lot_town&file_format=SHP is not an HTTPS link -_site/pt/licoes/qgis-camadas.html,609,http://www.gov.pe.ca/gis/download.php3?name=hydronetwork&file_format=SHP is not an HTTPS link -_site/pt/licoes/qgis-camadas.html,610,http://www.gov.pe.ca/gis/download.php3?name=forest_35&file_format=SHP is not an HTTPS link -_site/pt/licoes/qgis-camadas.html,611,http://www.gov.pe.ca/gis/download.php3?name=nat_parks&file_format=SHP is not an HTTPS link -_site/pt/licoes/qgis-camadas.html,693,http://web.archive.org/web/20180715071501/http://www.qgistutorials.com/pt_BR/docs/working_with_projections.html is not an HTTPS link -_site/pt/licoes/qgis-camadas.html,1308,http://geospatialhistorian.wordpress.com/ is not an HTTPS link _site/pt/licoes/reutilizacao-codigo-modularidade-python.html,121,'a' tag is missing a reference _site/pt/licoes/reutilizacao-codigo-modularidade-python.html,140,'a' tag is missing a reference _site/pt/licoes/reutilizacao-codigo-modularidade-python.html,177,'a' tag is missing a reference @@ -3838,31 +2163,6 @@ _site/pt/licoes/som-dados-sonificacao-historiadores.html,119,'a' tag is missing _site/pt/licoes/som-dados-sonificacao-historiadores.html,138,'a' tag is missing a reference _site/pt/licoes/som-dados-sonificacao-historiadores.html,175,'a' tag is missing a reference _site/pt/licoes/som-dados-sonificacao-historiadores.html,234,'a' tag is missing a reference -_site/pt/licoes/som-dados-sonificacao-historiadores.html,560,http://www.digitalhumanities.org/dhq/vol/5/1/000091/000091.html is not an HTTPS link -_site/pt/licoes/som-dados-sonificacao-historiadores.html,564,http://blog.taracopplestone.co.uk/making-things-photobashing-as-archaeological-remediation/ is not an HTTPS link -_site/pt/licoes/som-dados-sonificacao-historiadores.html,564,http://www.samplereality.com/2012/05/02/notes-towards-a-deformed-humanities/ is not an HTTPS link -_site/pt/licoes/som-dados-sonificacao-historiadores.html,564,http://nowviskie.org/2013/resistance-in-the-materials/ is not an HTTPS link -_site/pt/licoes/som-dados-sonificacao-historiadores.html,564,http://nooart.org/post/73353953758/temkin-glitchhumancomputerinteraction is not an HTTPS link -_site/pt/licoes/som-dados-sonificacao-historiadores.html,576,http://musicalgorithms.org/ is not an HTTPS link -_site/pt/licoes/som-dados-sonificacao-historiadores.html,578,http://sonic-pi.net/ is not an HTTPS link -_site/pt/licoes/som-dados-sonificacao-historiadores.html,591,http://www.icad.org/Proceedings/2008/Hermann2008.pdf is not an HTTPS link -_site/pt/licoes/som-dados-sonificacao-historiadores.html,602,http://waxy.org/2015/12/if_drake_was_born_a_piano/ is not an HTTPS link -_site/pt/licoes/som-dados-sonificacao-historiadores.html,606,http://www.icad.org/Proceedings/2008/Hermann2008.pdf is not an HTTPS link -_site/pt/licoes/som-dados-sonificacao-historiadores.html,614,http://musicalgorithms.org/ is not an HTTPS link -_site/pt/licoes/som-dados-sonificacao-historiadores.html,614,http://musicalgorithms.org/3.0/index.html is not an HTTPS link -_site/pt/licoes/som-dados-sonificacao-historiadores.html,672,http://www.lynda.com/GarageBand-tutorials/Importing-audio-tracks/156620/164050-4.html is not an HTTPS link -_site/pt/licoes/som-dados-sonificacao-historiadores.html,710,http://musicalgorithms.org/3.0/index.html is not an HTTPS link -_site/pt/licoes/som-dados-sonificacao-historiadores.html,755,http://www.ethanhein.com/wp/2010/scales-and-emotions/ is not an HTTPS link -_site/pt/licoes/som-dados-sonificacao-historiadores.html,820,http://docs.python-guide.org/en/latest/starting/install/win/ is not an HTTPS link -_site/pt/licoes/som-dados-sonificacao-historiadores.html,824,http://www.howtogeek.com/235101/10-ways-to-open-the-command-prompt-in-windows-10/ is not an HTTPS link -_site/pt/licoes/som-dados-sonificacao-historiadores.html,882,http://www.electronics.dit.ie/staff/tscarff/Music_technology/midi/midi_note_numbers_for_octaves.html is not an HTTPS link -_site/pt/licoes/som-dados-sonificacao-historiadores.html,884,http://trillian.mit.edu/~jc/music/abc/ABCcontrib.html is not an HTTPS link -_site/pt/licoes/som-dados-sonificacao-historiadores.html,887,http://themacroscope.org is not an HTTPS link -_site/pt/licoes/som-dados-sonificacao-historiadores.html,1039,http://sonic-pi.net is not an HTTPS link -_site/pt/licoes/som-dados-sonificacao-historiadores.html,1045,http://puffin.creighton.edu/jesuit/relations/ is not an HTTPS link -_site/pt/licoes/som-dados-sonificacao-historiadores.html,1105,http://library.gwu.edu/scholarly-technology-group/posts/sound-library-work is not an HTTPS link -_site/pt/licoes/som-dados-sonificacao-historiadores.html,1109,http://www.lilypond.org/ is not an HTTPS link -_site/pt/licoes/som-dados-sonificacao-historiadores.html,1113,http://www.trevorowens.org/2012/11/discovery-and-justification-are-different-notes-on-sciencing-the-humanities/ is not an HTTPS link _site/pt/licoes/som-dados-sonificacao-historiadores.html,1118,'a' tag is missing a reference _site/pt/licoes/som-dados-sonificacao-historiadores.html,1119,'a' tag is missing a reference _site/pt/licoes/som-dados-sonificacao-historiadores.html,1120,'a' tag is missing a reference @@ -3872,33 +2172,22 @@ _site/pt/licoes/som-dados-sonificacao-historiadores.html,1123,'a' tag is missing _site/pt/licoes/som-dados-sonificacao-historiadores.html,1124,'a' tag is missing a reference _site/pt/licoes/som-dados-sonificacao-historiadores.html,1128,'a' tag is missing a reference _site/pt/licoes/som-dados-sonificacao-historiadores.html,1130,'a' tag is missing a reference -_site/pt/licoes/som-dados-sonificacao-historiadores.html,1130,http://www.digitalhumanities.org/dhq/vol/5/1/000091/000091.html is not an HTTPS link _site/pt/licoes/som-dados-sonificacao-historiadores.html,1132,'a' tag is missing a reference -_site/pt/licoes/som-dados-sonificacao-historiadores.html,1132,http://www.jstor.org/stable/734136 is not an HTTPS link _site/pt/licoes/som-dados-sonificacao-historiadores.html,1134,'a' tag is missing a reference -_site/pt/licoes/som-dados-sonificacao-historiadores.html,1134,http://www.icad.org/Proceedings/2008/Hermann2008.pdf is not an HTTPS link _site/pt/licoes/som-dados-sonificacao-historiadores.html,1136,'a' tag is missing a reference -_site/pt/licoes/som-dados-sonificacao-historiadores.html,1136,http://motherboard.vice.com/read/the-strange-acoustic-phenomenon-behind-these-wacked-out-versions-of-pop-songs is not an HTTPS link _site/pt/licoes/som-dados-sonificacao-historiadores.html,1138,'a' tag is missing a reference _site/pt/licoes/sumarizacao-narrativas-web-python.html,119,'a' tag is missing a reference _site/pt/licoes/sumarizacao-narrativas-web-python.html,138,'a' tag is missing a reference _site/pt/licoes/sumarizacao-narrativas-web-python.html,175,'a' tag is missing a reference _site/pt/licoes/sumarizacao-narrativas-web-python.html,234,'a' tag is missing a reference -_site/pt/licoes/sumarizacao-narrativas-web-python.html,529,http://arquivo.pt is not an HTTPS link -_site/pt/licoes/sumarizacao-narrativas-web-python.html,822,http://yake.inesctec.pt is not an HTTPS link -_site/pt/licoes/sumarizacao-narrativas-web-python.html,978,http://ecir2019.org/ is not an HTTPS link _site/pt/licoes/trabalhando-ficheiros-texto-python.html,121,'a' tag is missing a reference _site/pt/licoes/trabalhando-ficheiros-texto-python.html,140,'a' tag is missing a reference _site/pt/licoes/trabalhando-ficheiros-texto-python.html,177,'a' tag is missing a reference _site/pt/licoes/trabalhando-ficheiros-texto-python.html,236,'a' tag is missing a reference -_site/pt/licoes/trabalhando-ficheiros-texto-python.html,603,http://docs.python.org/release/2.5.4/ref/keywords.html is not an HTTPS link _site/pt/licoes/transcricao-automatica-grafias-nao-latinas.html,135,'a' tag is missing a reference _site/pt/licoes/transcricao-automatica-grafias-nao-latinas.html,154,'a' tag is missing a reference _site/pt/licoes/transcricao-automatica-grafias-nao-latinas.html,191,'a' tag is missing a reference _site/pt/licoes/transcricao-automatica-grafias-nao-latinas.html,250,'a' tag is missing a reference -_site/pt/licoes/transcricao-automatica-grafias-nao-latinas.html,2193,http://patristica.net/graeca is not an HTTPS link -_site/pt/licoes/transcricao-automatica-grafias-nao-latinas.html,2193,http://stephanus.tlg.uci.edu is not an HTTPS link -_site/pt/licoes/transcricao-automatica-grafias-nao-latinas.html,2304,http://doi.org/10.30687/arm/9372-8175/2022/01/005 is not an HTTPS link _site/pt/licoes/visualizacao-animacao-tabelas-historicas-R.html,119,'a' tag is missing a reference _site/pt/licoes/visualizacao-animacao-tabelas-historicas-R.html,138,'a' tag is missing a reference _site/pt/licoes/visualizacao-animacao-tabelas-historicas-R.html,175,'a' tag is missing a reference @@ -3911,22 +2200,6 @@ _site/pt/pesquisa.html,88,'a' tag is missing a reference _site/pt/pesquisa.html,107,'a' tag is missing a reference _site/pt/pesquisa.html,144,'a' tag is missing a reference _site/pt/pesquisa.html,203,'a' tag is missing a reference -_site/pt/pesquisa.html,253,http://niche-canada.org/wp-content/uploads/2013/09/programming-historian-1.pdf is not an HTTPS link -_site/pt/pesquisa.html,259,http://www.diva-portal.org/smash/record.jsf?pid=diva2%3A1508542&dswid=7551 is not an HTTPS link -_site/pt/pesquisa.html,263,http://jah.oxfordjournals.org/content/103/1/299.2.full is not an HTTPS link -_site/pt/pesquisa.html,264,http://jitp.commons.gc.cuny.edu/review-of-the-programming-historian/ is not an HTTPS link -_site/pt/pesquisa.html,276,http://www.digitalhumanities.org/dhq/vol/16/2/000585/000585.html is not an HTTPS link -_site/pt/pesquisa.html,282,http://www.tandfonline.com/doi/full/10.1080/13555502.2017.1301179 is not an HTTPS link -_site/pt/pesquisa.html,284,http://web.archive.org/web/20180713014622/http://dhcommons.org/journal/issue-1/editorial-sustainability-and-open-peer-review-programming-historian is not an HTTPS link -_site/pt/pesquisa.html,285,http://www.themacroscope.org/2.0/ is not an HTTPS link -_site/pt/pesquisa.html,306,http://www.iea.usp.br/eventos/historia-digital-educacao-caminhos-cruzados is not an HTTPS link -_site/pt/pesquisa.html,390,http://ixa2.si.ehu.eus/intele/?q=webinars is not an HTTPS link -_site/pt/pesquisa.html,393,http://walshbr.com/blog/the-programming-historian-and-editorial-process-in-digital-publishing/ is not an HTTPS link -_site/pt/pesquisa.html,406,http://fredgibbs.net/assets/images/ph-poster/final-board.png is not an HTTPS link -_site/pt/pesquisa.html,419,http://niche-canada.org/2018/03/23/a-decade-of-programming-historians/ is not an HTTPS link -_site/pt/pesquisa.html,420,http://fredgibbs.net/posts/reflections-former-PH-editor is not an HTTPS link -_site/pt/pesquisa.html,421,http://clionauta.hypotheses.org/16979 is not an HTTPS link -_site/pt/pesquisa.html,423,http://humanidadesdigitales.net/blog/2017/03/17/the-programming-historian-en-espanol/ is not an HTTPS link _site/pt/politica-de-privacidade.html,88,'a' tag is missing a reference _site/pt/politica-de-privacidade.html,107,'a' tag is missing a reference _site/pt/politica-de-privacidade.html,144,'a' tag is missing a reference @@ -3943,10 +2216,6 @@ _site/pt/sobre.html,88,'a' tag is missing a reference _site/pt/sobre.html,107,'a' tag is missing a reference _site/pt/sobre.html,144,'a' tag is missing a reference _site/pt/sobre.html,203,'a' tag is missing a reference -_site/pt/sobre.html,266,http://dhawards.org/dhawards2016/results/ is not an HTTPS link -_site/pt/sobre.html,266,http://humanidadesdigitaleshispanicas.es/ is not an HTTPS link -_site/pt/sobre.html,266,http://dhawards.org/dhawards2022/results/ is not an HTTPS link -_site/pt/sobre.html,280,http://digitalhistoryhacks.blogspot.com/2008/01/programming-historian.html is not an HTTPS link _site/pt/vagas.html,88,'a' tag is missing a reference _site/pt/vagas.html,107,'a' tag is missing a reference _site/pt/vagas.html,144,'a' tag is missing a reference @@ -3955,366 +2224,30 @@ _site/translation-concordance.html,86,'a' tag is missing a reference _site/translation-concordance.html,105,'a' tag is missing a reference _site/translation-concordance.html,142,'a' tag is missing a reference _site/translation-concordance.html,168,'a' tag is missing a reference -_site/translation-concordance.html,267,'a' tag is missing a reference -_site/translation-concordance.html,270,'a' tag is missing a reference -_site/translation-concordance.html,273,'a' tag is missing a reference -_site/translation-concordance.html,286,'a' tag is missing a reference -_site/translation-concordance.html,289,'a' tag is missing a reference -_site/translation-concordance.html,292,'a' tag is missing a reference -_site/translation-concordance.html,308,'a' tag is missing a reference -_site/translation-concordance.html,321,'a' tag is missing a reference -_site/translation-concordance.html,327,'a' tag is missing a reference -_site/translation-concordance.html,330,'a' tag is missing a reference -_site/translation-concordance.html,340,'a' tag is missing a reference -_site/translation-concordance.html,346,'a' tag is missing a reference -_site/translation-concordance.html,349,'a' tag is missing a reference -_site/translation-concordance.html,359,'a' tag is missing a reference -_site/translation-concordance.html,366,'a' tag is missing a reference -_site/translation-concordance.html,369,'a' tag is missing a reference -_site/translation-concordance.html,382,'a' tag is missing a reference -_site/translation-concordance.html,388,'a' tag is missing a reference -_site/translation-concordance.html,401,'a' tag is missing a reference -_site/translation-concordance.html,404,'a' tag is missing a reference -_site/translation-concordance.html,407,'a' tag is missing a reference -_site/translation-concordance.html,420,'a' tag is missing a reference -_site/translation-concordance.html,423,'a' tag is missing a reference -_site/translation-concordance.html,426,'a' tag is missing a reference -_site/translation-concordance.html,442,'a' tag is missing a reference -_site/translation-concordance.html,462,'a' tag is missing a reference -_site/translation-concordance.html,479,'a' tag is missing a reference -_site/translation-concordance.html,482,'a' tag is missing a reference -_site/translation-concordance.html,485,'a' tag is missing a reference -_site/translation-concordance.html,501,'a' tag is missing a reference -_site/translation-concordance.html,504,'a' tag is missing a reference -_site/translation-concordance.html,517,'a' tag is missing a reference -_site/translation-concordance.html,523,'a' tag is missing a reference -_site/translation-concordance.html,555,'a' tag is missing a reference -_site/translation-concordance.html,558,'a' tag is missing a reference -_site/translation-concordance.html,561,'a' tag is missing a reference -_site/translation-concordance.html,574,'a' tag is missing a reference -_site/translation-concordance.html,577,'a' tag is missing a reference -_site/translation-concordance.html,580,'a' tag is missing a reference -_site/translation-concordance.html,593,'a' tag is missing a reference -_site/translation-concordance.html,596,'a' tag is missing a reference -_site/translation-concordance.html,631,'a' tag is missing a reference -_site/translation-concordance.html,634,'a' tag is missing a reference -_site/translation-concordance.html,637,'a' tag is missing a reference -_site/translation-concordance.html,650,'a' tag is missing a reference -_site/translation-concordance.html,653,'a' tag is missing a reference -_site/translation-concordance.html,656,'a' tag is missing a reference -_site/translation-concordance.html,669,'a' tag is missing a reference -_site/translation-concordance.html,672,'a' tag is missing a reference -_site/translation-concordance.html,675,'a' tag is missing a reference -_site/translation-concordance.html,688,'a' tag is missing a reference -_site/translation-concordance.html,691,'a' tag is missing a reference -_site/translation-concordance.html,694,'a' tag is missing a reference -_site/translation-concordance.html,704,'a' tag is missing a reference -_site/translation-concordance.html,707,'a' tag is missing a reference -_site/translation-concordance.html,713,'a' tag is missing a reference -_site/translation-concordance.html,723,'a' tag is missing a reference -_site/translation-concordance.html,730,'a' tag is missing a reference -_site/translation-concordance.html,733,'a' tag is missing a reference -_site/translation-concordance.html,752,'a' tag is missing a reference -_site/translation-concordance.html,765,'a' tag is missing a reference -_site/translation-concordance.html,768,'a' tag is missing a reference -_site/translation-concordance.html,771,'a' tag is missing a reference -_site/translation-concordance.html,781,'a' tag is missing a reference -_site/translation-concordance.html,788,'a' tag is missing a reference -_site/translation-concordance.html,791,'a' tag is missing a reference -_site/translation-concordance.html,809,'a' tag is missing a reference -_site/translation-concordance.html,825,'a' tag is missing a reference -_site/translation-concordance.html,828,'a' tag is missing a reference -_site/translation-concordance.html,831,'a' tag is missing a reference -_site/translation-concordance.html,866,'a' tag is missing a reference -_site/translation-concordance.html,885,'a' tag is missing a reference -_site/translation-concordance.html,902,'a' tag is missing a reference -_site/translation-concordance.html,905,'a' tag is missing a reference -_site/translation-concordance.html,908,'a' tag is missing a reference -_site/translation-concordance.html,921,'a' tag is missing a reference -_site/translation-concordance.html,924,'a' tag is missing a reference -_site/translation-concordance.html,927,'a' tag is missing a reference -_site/translation-concordance.html,940,'a' tag is missing a reference -_site/translation-concordance.html,943,'a' tag is missing a reference -_site/translation-concordance.html,946,'a' tag is missing a reference -_site/translation-concordance.html,962,'a' tag is missing a reference -_site/translation-concordance.html,965,'a' tag is missing a reference -_site/translation-concordance.html,978,'a' tag is missing a reference -_site/translation-concordance.html,981,'a' tag is missing a reference -_site/translation-concordance.html,984,'a' tag is missing a reference -_site/translation-concordance.html,997,'a' tag is missing a reference -_site/translation-concordance.html,1000,'a' tag is missing a reference -_site/translation-concordance.html,1003,'a' tag is missing a reference -_site/translation-concordance.html,1019,'a' tag is missing a reference -_site/translation-concordance.html,1022,'a' tag is missing a reference -_site/translation-concordance.html,1051,'a' tag is missing a reference -_site/translation-concordance.html,1057,'a' tag is missing a reference -_site/translation-concordance.html,1060,'a' tag is missing a reference -_site/translation-concordance.html,1073,'a' tag is missing a reference -_site/translation-concordance.html,1076,'a' tag is missing a reference -_site/translation-concordance.html,1079,'a' tag is missing a reference -_site/translation-concordance.html,1092,'a' tag is missing a reference -_site/translation-concordance.html,1095,'a' tag is missing a reference -_site/translation-concordance.html,1098,'a' tag is missing a reference -_site/translation-concordance.html,1111,'a' tag is missing a reference -_site/translation-concordance.html,1117,'a' tag is missing a reference -_site/translation-concordance.html,1130,'a' tag is missing a reference -_site/translation-concordance.html,1133,'a' tag is missing a reference -_site/translation-concordance.html,1136,'a' tag is missing a reference -_site/translation-concordance.html,1152,'a' tag is missing a reference -_site/translation-concordance.html,1171,'a' tag is missing a reference -_site/translation-concordance.html,1174,'a' tag is missing a reference -_site/translation-concordance.html,1184,'a' tag is missing a reference -_site/translation-concordance.html,1190,'a' tag is missing a reference -_site/translation-concordance.html,1193,'a' tag is missing a reference -_site/translation-concordance.html,1207,'a' tag is missing a reference -_site/translation-concordance.html,1226,'a' tag is missing a reference -_site/translation-concordance.html,1229,'a' tag is missing a reference -_site/translation-concordance.html,1245,'a' tag is missing a reference -_site/translation-concordance.html,1248,'a' tag is missing a reference -_site/translation-concordance.html,1264,'a' tag is missing a reference -_site/translation-concordance.html,1267,'a' tag is missing a reference -_site/translation-concordance.html,1270,'a' tag is missing a reference -_site/translation-concordance.html,1283,'a' tag is missing a reference -_site/translation-concordance.html,1286,'a' tag is missing a reference -_site/translation-concordance.html,1289,'a' tag is missing a reference -_site/translation-concordance.html,1302,'a' tag is missing a reference -_site/translation-concordance.html,1305,'a' tag is missing a reference -_site/translation-concordance.html,1308,'a' tag is missing a reference -_site/translation-concordance.html,1356,'a' tag is missing a reference -_site/translation-concordance.html,1362,'a' tag is missing a reference -_site/translation-concordance.html,1365,'a' tag is missing a reference -_site/translation-concordance.html,1378,'a' tag is missing a reference -_site/translation-concordance.html,1384,'a' tag is missing a reference -_site/translation-concordance.html,1397,'a' tag is missing a reference -_site/translation-concordance.html,1400,'a' tag is missing a reference -_site/translation-concordance.html,1416,'a' tag is missing a reference -_site/translation-concordance.html,1419,'a' tag is missing a reference -_site/translation-concordance.html,1422,'a' tag is missing a reference -_site/translation-concordance.html,1439,'a' tag is missing a reference -_site/translation-concordance.html,1452,'a' tag is missing a reference -_site/translation-concordance.html,1458,'a' tag is missing a reference -_site/translation-concordance.html,1461,'a' tag is missing a reference -_site/translation-concordance.html,1475,'a' tag is missing a reference -_site/translation-concordance.html,1478,'a' tag is missing a reference -_site/translation-concordance.html,1481,'a' tag is missing a reference -_site/translation-concordance.html,1497,'a' tag is missing a reference -_site/translation-concordance.html,1500,'a' tag is missing a reference -_site/translation-concordance.html,1533,'a' tag is missing a reference -_site/translation-concordance.html,1536,'a' tag is missing a reference -_site/translation-concordance.html,1549,'a' tag is missing a reference -_site/translation-concordance.html,1552,'a' tag is missing a reference -_site/translation-concordance.html,1555,'a' tag is missing a reference -_site/translation-concordance.html,1575,'a' tag is missing a reference -_site/translation-concordance.html,1578,'a' tag is missing a reference -_site/translation-concordance.html,1595,'a' tag is missing a reference -_site/translation-concordance.html,1598,'a' tag is missing a reference -_site/translation-concordance.html,1611,'a' tag is missing a reference -_site/translation-concordance.html,1614,'a' tag is missing a reference -_site/translation-concordance.html,1617,'a' tag is missing a reference -_site/translation-concordance.html,1630,'a' tag is missing a reference -_site/translation-concordance.html,1633,'a' tag is missing a reference -_site/translation-concordance.html,1636,'a' tag is missing a reference -_site/translation-concordance.html,1649,'a' tag is missing a reference -_site/translation-concordance.html,1652,'a' tag is missing a reference -_site/translation-concordance.html,1655,'a' tag is missing a reference -_site/translation-concordance.html,1687,'a' tag is missing a reference -_site/translation-concordance.html,1690,'a' tag is missing a reference -_site/translation-concordance.html,1693,'a' tag is missing a reference -_site/translation-concordance.html,1706,'a' tag is missing a reference -_site/translation-concordance.html,1709,'a' tag is missing a reference -_site/translation-concordance.html,1712,'a' tag is missing a reference -_site/translation-concordance.html,1725,'a' tag is missing a reference -_site/translation-concordance.html,1728,'a' tag is missing a reference -_site/translation-concordance.html,1731,'a' tag is missing a reference -_site/translation-concordance.html,1744,'a' tag is missing a reference -_site/translation-concordance.html,1747,'a' tag is missing a reference -_site/translation-concordance.html,1750,'a' tag is missing a reference -_site/translation-concordance.html,1763,'a' tag is missing a reference -_site/translation-concordance.html,1766,'a' tag is missing a reference -_site/translation-concordance.html,1769,'a' tag is missing a reference -_site/translation-concordance.html,1801,'a' tag is missing a reference -_site/translation-concordance.html,1804,'a' tag is missing a reference -_site/translation-concordance.html,1807,'a' tag is missing a reference -_site/translation-concordance.html,1823,'a' tag is missing a reference -_site/translation-concordance.html,1842,'a' tag is missing a reference -_site/translation-concordance.html,1845,'a' tag is missing a reference -_site/translation-concordance.html,1858,'a' tag is missing a reference -_site/translation-concordance.html,1861,'a' tag is missing a reference -_site/translation-concordance.html,1864,'a' tag is missing a reference -_site/translation-concordance.html,1878,'a' tag is missing a reference -_site/translation-concordance.html,1884,'a' tag is missing a reference -_site/translation-concordance.html,1894,'a' tag is missing a reference -_site/translation-concordance.html,1901,'a' tag is missing a reference -_site/translation-concordance.html,1904,'a' tag is missing a reference -_site/translation-concordance.html,1914,'a' tag is missing a reference -_site/translation-concordance.html,1920,'a' tag is missing a reference -_site/translation-concordance.html,1933,'a' tag is missing a reference -_site/translation-concordance.html,1939,'a' tag is missing a reference -_site/translation-concordance.html,1942,'a' tag is missing a reference -_site/translation-concordance.html,1952,'a' tag is missing a reference -_site/translation-concordance.html,1958,'a' tag is missing a reference -_site/translation-concordance.html,1961,'a' tag is missing a reference -_site/translation-concordance.html,1978,'a' tag is missing a reference -_site/translation-concordance.html,1981,'a' tag is missing a reference -_site/translation-concordance.html,1991,'a' tag is missing a reference -_site/translation-concordance.html,1994,'a' tag is missing a reference -_site/translation-concordance.html,2000,'a' tag is missing a reference -_site/translation-concordance.html,2035,'a' tag is missing a reference -_site/translation-concordance.html,2038,'a' tag is missing a reference -_site/translation-concordance.html,2052,'a' tag is missing a reference -_site/translation-concordance.html,2055,'a' tag is missing a reference -_site/translation-concordance.html,2058,'a' tag is missing a reference -_site/translation-concordance.html,2072,'a' tag is missing a reference -_site/translation-concordance.html,2088,'a' tag is missing a reference -_site/translation-concordance.html,2091,'a' tag is missing a reference -_site/translation-concordance.html,2094,'a' tag is missing a reference -_site/translation-concordance.html,2110,'a' tag is missing a reference -_site/translation-concordance.html,2113,'a' tag is missing a reference -_site/translation-concordance.html,2116,'a' tag is missing a reference -_site/translation-concordance.html,2129,'a' tag is missing a reference -_site/translation-concordance.html,2151,'a' tag is missing a reference -_site/translation-concordance.html,2167,'a' tag is missing a reference -_site/translation-concordance.html,2170,'a' tag is missing a reference -_site/translation-concordance.html,2173,'a' tag is missing a reference -_site/translation-concordance.html,2205,'a' tag is missing a reference -_site/translation-concordance.html,2208,'a' tag is missing a reference -_site/translation-concordance.html,2211,'a' tag is missing a reference -_site/translation-concordance.html,2227,'a' tag is missing a reference -_site/translation-concordance.html,2262,'a' tag is missing a reference -_site/translation-concordance.html,2265,'a' tag is missing a reference -_site/translation-concordance.html,2268,'a' tag is missing a reference -_site/translation-concordance.html,2281,'a' tag is missing a reference -_site/translation-concordance.html,2284,'a' tag is missing a reference -_site/translation-concordance.html,2287,'a' tag is missing a reference -_site/translation-concordance.html,2319,'a' tag is missing a reference -_site/translation-concordance.html,2322,'a' tag is missing a reference -_site/translation-concordance.html,2325,'a' tag is missing a reference -_site/translation-concordance.html,2341,'a' tag is missing a reference -_site/translation-concordance.html,2360,'a' tag is missing a reference -_site/translation-concordance.html,2363,'a' tag is missing a reference -_site/translation-concordance.html,2373,'a' tag is missing a reference -_site/translation-concordance.html,2376,'a' tag is missing a reference -_site/translation-concordance.html,2382,'a' tag is missing a reference -_site/translation-concordance.html,2411,'a' tag is missing a reference -_site/translation-concordance.html,2417,'a' tag is missing a reference -_site/translation-concordance.html,2420,'a' tag is missing a reference -_site/translation-concordance.html,2433,'a' tag is missing a reference -_site/translation-concordance.html,2436,'a' tag is missing a reference -_site/translation-concordance.html,2474,'a' tag is missing a reference -_site/translation-concordance.html,2487,'a' tag is missing a reference -_site/translation-concordance.html,2493,'a' tag is missing a reference -_site/translation-concordance.html,2496,'a' tag is missing a reference -_site/translation-concordance.html,2512,'a' tag is missing a reference -_site/translation-concordance.html,2515,'a' tag is missing a reference -_site/translation-concordance.html,2529,'a' tag is missing a reference -_site/translation-concordance.html,2532,'a' tag is missing a reference -_site/translation-concordance.html,2551,'a' tag is missing a reference -_site/translation-concordance.html,2567,'a' tag is missing a reference -_site/translation-concordance.html,2570,'a' tag is missing a reference -_site/translation-concordance.html,2573,'a' tag is missing a reference -_site/translation-concordance.html,2586,'a' tag is missing a reference -_site/translation-concordance.html,2589,'a' tag is missing a reference -_site/translation-concordance.html,2605,'a' tag is missing a reference -_site/translation-concordance.html,2608,'a' tag is missing a reference -_site/translation-concordance.html,2611,'a' tag is missing a reference -_site/translation-concordance.html,2621,'a' tag is missing a reference -_site/translation-concordance.html,2624,'a' tag is missing a reference -_site/translation-concordance.html,2627,'a' tag is missing a reference -_site/translation-concordance.html,2667,'a' tag is missing a reference -_site/translation-concordance.html,2670,'a' tag is missing a reference -_site/translation-concordance.html,2683,'a' tag is missing a reference -_site/translation-concordance.html,2686,'a' tag is missing a reference -_site/translation-concordance.html,2689,'a' tag is missing a reference -_site/translation-concordance.html,2702,'a' tag is missing a reference -_site/translation-concordance.html,2705,'a' tag is missing a reference -_site/translation-concordance.html,2708,'a' tag is missing a reference -_site/translation-concordance.html,2724,'a' tag is missing a reference -_site/translation-concordance.html,2727,'a' tag is missing a reference -_site/translation-concordance.html,2740,'a' tag is missing a reference -_site/translation-concordance.html,2743,'a' tag is missing a reference -_site/translation-concordance.html,2746,'a' tag is missing a reference -_site/translation-concordance.html,2756,'a' tag is missing a reference -_site/translation-concordance.html,2759,'a' tag is missing a reference -_site/translation-concordance.html,2781,'a' tag is missing a reference -_site/translation-concordance.html,2784,'a' tag is missing a reference -_site/translation-concordance.html,2797,'a' tag is missing a reference -_site/translation-concordance.html,2800,'a' tag is missing a reference -_site/translation-concordance.html,2803,'a' tag is missing a reference -_site/translation-concordance.html,2816,'a' tag is missing a reference -_site/translation-concordance.html,2819,'a' tag is missing a reference -_site/translation-concordance.html,2822,'a' tag is missing a reference -_site/translation-concordance.html,2835,'a' tag is missing a reference -_site/translation-concordance.html,2841,'a' tag is missing a reference -_site/translation-concordance.html,2873,'a' tag is missing a reference -_site/translation-concordance.html,2876,'a' tag is missing a reference -_site/translation-concordance.html,2879,'a' tag is missing a reference -_site/translation-concordance.html,2893,'a' tag is missing a reference -_site/translation-concordance.html,2896,'a' tag is missing a reference -_site/translation-concordance.html,2899,'a' tag is missing a reference -_site/translation-concordance.html,2909,'a' tag is missing a reference -_site/translation-concordance.html,2915,'a' tag is missing a reference -_site/translation-concordance.html,2918,'a' tag is missing a reference -_site/translation-concordance.html,2931,'a' tag is missing a reference -_site/translation-concordance.html,2934,'a' tag is missing a reference -_site/translation-concordance.html,2966,'a' tag is missing a reference -_site/translation-concordance.html,2969,'a' tag is missing a reference -_site/translation-concordance.html,2972,'a' tag is missing a reference -_site/translation-concordance.html,2985,'a' tag is missing a reference -_site/translation-concordance.html,2992,'a' tag is missing a reference -_site/translation-concordance.html,3008,'a' tag is missing a reference -_site/translation-concordance.html,3011,'a' tag is missing a reference -_site/translation-concordance.html,3014,'a' tag is missing a reference -_site/translation-concordance.html,3046,'a' tag is missing a reference -_site/translation-concordance.html,3049,'a' tag is missing a reference -_site/translation-concordance.html,3052,'a' tag is missing a reference -_site/translation-concordance.html,3115,'a' tag is missing a reference -_site/translation-concordance.html,3118,'a' tag is missing a reference -_site/translation-concordance.html,3121,'a' tag is missing a reference -_site/translation-concordance.html,3420,'a' tag is missing a reference -_site/translation-concordance.html,3423,'a' tag is missing a reference -_site/translation-concordance.html,3426,'a' tag is missing a reference -_site/translation-concordance.html,3458,'a' tag is missing a reference -_site/translation-concordance.html,3461,'a' tag is missing a reference -_site/translation-concordance.html,3464,'a' tag is missing a reference -_site/translation-concordance.html,3496,'a' tag is missing a reference -_site/translation-concordance.html,3499,'a' tag is missing a reference -_site/translation-concordance.html,3502,'a' tag is missing a reference _site/troubleshooting.html,86,'a' tag is missing a reference _site/troubleshooting.html,105,'a' tag is missing a reference _site/troubleshooting.html,142,'a' tag is missing a reference _site/troubleshooting.html,168,'a' tag is missing a reference -_site/troubleshooting.html,303,http://stackoverflow.com/ is not an HTTPS link -_site/troubleshooting.html,303,http://mail.python.org/mailman/listinfo/tutor is not an HTTPS link -_site/troubleshooting.html,322,http://web.archive.org/web/20130101093828/http://stackoverflow.com/faq is not an HTTPS link -_site/troubleshooting.html,373,http://wiki.python.org/moin/BeginnersGuide/NonProgrammers is not an HTTPS link -_site/troubleshooting.html,375,http://learnpython.org/ is not an HTTPS link -_site/troubleshooting.html,379,http://www.w3schools.com/html/default.asp is not an HTTPS link -_site/troubleshooting.html,387,http://wiki.python.org/moin/BeginnersGuide/Programmers is not an HTTPS link -_site/troubleshooting.html,389,http://docs.python.org/ is not an HTTPS link -_site/troubleshooting.html,392,http://www.diveintopython.net is not an HTTPS link -_site/troubleshooting.html,398,http://www.worldcat.org/oclc/156890981 is not an HTTPS link -_site/troubleshooting.html,399,http://www.worldcat.org/oclc/65765375 is not an HTTPS link -_site/troubleshooting.html,400,http://www.worldcat.org/oclc/59007845 is not an HTTPS link _site/404.html,303,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/404.html,313,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/404.md failed (status code 429) -_site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html,199,External link http://www.sheffield.ac.uk/hri/ failed: Forbidden (status code 403) -_site/assets/mapping-with-python-leaflet/exercises/exercise00 - original/mymap.html,3,External link http://cdn.leafletjs.com/leaflet-0.6.4/leaflet.css failed with something very wrong. -_site/assets/mapping-with-python-leaflet/exercises/exercise00 - original/mymap.html,4,External link http://cdn.leafletjs.com/leaflet-0.6.4/leaflet.js failed with something very wrong. -_site/assets/mapping-with-python-leaflet/exercises/exercise01/mymap.html,3,External link http://cdn.leafletjs.com/leaflet-0.6.4/leaflet.css failed with something very wrong. -_site/assets/mapping-with-python-leaflet/exercises/exercise01/mymap.html,4,External link http://cdn.leafletjs.com/leaflet-0.6.4/leaflet.js failed with something very wrong. -_site/assets/mapping-with-python-leaflet/exercises/exercise02/mymap.html,3,External link http://cdn.leafletjs.com/leaflet-0.6.4/leaflet.css failed with something very wrong. -_site/assets/mapping-with-python-leaflet/exercises/exercise02/mymap.html,4,External link http://cdn.leafletjs.com/leaflet-0.6.4/leaflet.js failed with something very wrong. -_site/assets/mapping-with-python-leaflet/exercises/exercise03/mymap.html,3,External link http://cdn.leafletjs.com/leaflet-0.6.4/leaflet.css failed with something very wrong. -_site/assets/mapping-with-python-leaflet/exercises/exercise03/mymap.html,4,External link http://cdn.leafletjs.com/leaflet-0.6.4/leaflet.js failed with something very wrong. -_site/assets/mapping-with-python-leaflet/exercises/exercise04/mymap.html,3,External link http://cdn.leafletjs.com/leaflet-0.6.4/leaflet.css failed with something very wrong. -_site/assets/mapping-with-python-leaflet/exercises/exercise04/mymap.html,4,External link http://cdn.leafletjs.com/leaflet-0.6.4/leaflet.js failed with something very wrong. -_site/assets/mapping-with-python-leaflet/exercises/exercise05/mymap.html,3,External link http://cdn.leafletjs.com/leaflet-0.6.4/leaflet.css failed with something very wrong. -_site/assets/mapping-with-python-leaflet/exercises/exercise05/mymap.html,4,External link http://cdn.leafletjs.com/leaflet-0.6.4/leaflet.js failed with something very wrong. -_site/assets/mapping-with-python-leaflet/map/mymap-onepage.html,3,External link http://cdn.leafletjs.com/leaflet-0.6.4/leaflet.css failed with something very wrong. -_site/assets/mapping-with-python-leaflet/map/mymap-onepage.html,4,External link http://cdn.leafletjs.com/leaflet-0.6.4/leaflet.js failed with something very wrong. -_site/assets/mapping-with-python-leaflet/map/mymap.html,3,External link http://cdn.leafletjs.com/leaflet-0.6.4/leaflet.css failed with something very wrong. -_site/assets/mapping-with-python-leaflet/map/mymap.html,4,External link http://cdn.leafletjs.com/leaflet-0.6.4/leaflet.js failed with something very wrong. -_site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html,199,External link http://www.sheffield.ac.uk/hri/ failed: Forbidden (status code 403) +_site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html,199,External link https://www.sheffield.ac.uk/hri/ failed: Forbidden (status code 403) +_site/assets/mapping-with-python-leaflet/exercises/exercise00 - original/mymap.html,3,External link https://cdn.leafletjs.com/leaflet-0.6.4/leaflet.css failed with something very wrong. +_site/assets/mapping-with-python-leaflet/exercises/exercise00 - original/mymap.html,4,External link https://cdn.leafletjs.com/leaflet-0.6.4/leaflet.js failed with something very wrong. +_site/assets/mapping-with-python-leaflet/exercises/exercise01/mymap.html,3,External link https://cdn.leafletjs.com/leaflet-0.6.4/leaflet.css failed with something very wrong. +_site/assets/mapping-with-python-leaflet/exercises/exercise01/mymap.html,4,External link https://cdn.leafletjs.com/leaflet-0.6.4/leaflet.js failed with something very wrong. +_site/assets/mapping-with-python-leaflet/exercises/exercise02/mymap.html,3,External link https://cdn.leafletjs.com/leaflet-0.6.4/leaflet.css failed with something very wrong. +_site/assets/mapping-with-python-leaflet/exercises/exercise02/mymap.html,4,External link https://cdn.leafletjs.com/leaflet-0.6.4/leaflet.js failed with something very wrong. +_site/assets/mapping-with-python-leaflet/exercises/exercise03/mymap.html,3,External link https://cdn.leafletjs.com/leaflet-0.6.4/leaflet.css failed with something very wrong. +_site/assets/mapping-with-python-leaflet/exercises/exercise03/mymap.html,4,External link https://cdn.leafletjs.com/leaflet-0.6.4/leaflet.js failed with something very wrong. +_site/assets/mapping-with-python-leaflet/exercises/exercise04/mymap.html,3,External link https://cdn.leafletjs.com/leaflet-0.6.4/leaflet.css failed with something very wrong. +_site/assets/mapping-with-python-leaflet/exercises/exercise04/mymap.html,4,External link https://cdn.leafletjs.com/leaflet-0.6.4/leaflet.js failed with something very wrong. +_site/assets/mapping-with-python-leaflet/exercises/exercise05/mymap.html,3,External link https://cdn.leafletjs.com/leaflet-0.6.4/leaflet.css failed with something very wrong. +_site/assets/mapping-with-python-leaflet/exercises/exercise05/mymap.html,4,External link https://cdn.leafletjs.com/leaflet-0.6.4/leaflet.js failed with something very wrong. +_site/assets/mapping-with-python-leaflet/map/mymap-onepage.html,3,External link https://cdn.leafletjs.com/leaflet-0.6.4/leaflet.css failed with something very wrong. +_site/assets/mapping-with-python-leaflet/map/mymap-onepage.html,4,External link https://cdn.leafletjs.com/leaflet-0.6.4/leaflet.js failed with something very wrong. +_site/assets/mapping-with-python-leaflet/map/mymap.html,3,External link https://cdn.leafletjs.com/leaflet-0.6.4/leaflet.css failed with something very wrong. +_site/assets/mapping-with-python-leaflet/map/mymap.html,4,External link https://cdn.leafletjs.com/leaflet-0.6.4/leaflet.js failed with something very wrong. +_site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html,199,External link https://www.sheffield.ac.uk/hri/ failed: Forbidden (status code 403) _site/blog/index.html,506,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/blog/index.html,516,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/blog/index.html failed (status code 429) _site/blog/page10/index.html,454,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -4350,12 +2283,16 @@ _site/blog/page8/index.html,460,External link https://github.com/programminghist _site/blog/page8/index.html,470,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/blog/page8/index.html failed (status code 429) _site/blog/page9/index.html,487,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/blog/page9/index.html,497,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/blog/page9/index.html failed (status code 429) +_site/en/about.html,264,External link https://dhawards.org/dhawards2022/results/ failed with something very wrong. +_site/en/about.html,264,External link https://dhawards.org/dhawards2016/results/ failed with something very wrong. _site/en/about.html,264,External link https://openpublishingawards.org/results/2021/index.html failed with something very wrong. +_site/en/about.html,264,External link https://dhawards.org/dhawards2017/results/ failed with something very wrong. _site/en/about.html,322,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/about.html,332,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/about.md failed (status code 429) _site/en/author-guidelines.html,261,"External link https://docs.google.com/spreadsheets/d/1vrvZTygZLfQRoQildD667Xcgzhf_reQC8Nq4OD-BRIA/edit#gid=0 failed: https://docs.google.com/spreadsheets/d/1vrvZTygZLfQRoQildD667Xcgzhf_reQC8Nq4OD-BRIA/edit exists, but the hash 'gid=0' does not (status code 200)" _site/en/author-guidelines.html,687,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/author-guidelines.html,697,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/author-guidelines.md failed (status code 429) +_site/en/contribute.html,298,External link https://www.worldcat.org/title/programming-historian-en-espanol/oclc/1061292935&referer=brief_results failed (status code 403) _site/en/contribute.html,353,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/contribute.html,363,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/contribute.md failed (status code 429) _site/en/editor-guidelines.html,338,External link https://github.com/programminghistorian/ph-submissions/commits/gh-pages failed (status code 429) @@ -4373,35 +2310,37 @@ _site/en/index.html,336,External link https://github.com/programminghistorian/je _site/en/individual.html,263,External link https://www.patreon.com/join/theprogramminghistorian failed (status code 403) _site/en/individual.html,320,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/individual.html,330,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/individual.md failed (status code 429) -_site/en/ipp.html,269,External link https://www.oecd.org/en/topics/sub-issues/oda-eligibility-and-conditions/dac-list-of-oda-recipients.html failed (status code 403) _site/en/ipp.html,411,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/ipp.html,421,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/ipp.md failed (status code 429) _site/en/jisc-tna-partnership.html,334,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/jisc-tna-partnership.html,344,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/jisc-tna-partnership.md failed (status code 429) _site/en/lesson-retirement-policy.html,359,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lesson-retirement-policy.html,369,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lesson-retirement-policy.md failed (status code 429) +_site/en/lessons/analyzing-documents-with-tfidf.html,1478,External link https://jonathanstray.com/a-full-text-visualization-of-the-iraq-war-logs failed with something very wrong. _site/en/lessons/analyzing-documents-with-tfidf.html,1478,External link https://www.overviewdocs.com failed with something very wrong. +_site/en/lessons/analyzing-documents-with-tfidf.html,1498,External link https://doi.org/10.18653/v1/W18-2502 failed (status code 409) _site/en/lessons/analyzing-documents-with-tfidf.html,1528,External link https://www.overviewdocs.com failed with something very wrong. _site/en/lessons/analyzing-documents-with-tfidf.html,1633,External link https://www.nytimes.com/2019/01/02/obituaries/karen-sparck-jones-overlooked.html failed (status code 403) +_site/en/lessons/analyzing-documents-with-tfidf.html,1651,External link https://jonathanstray.com/a-full-text-visualization-of-the-iraq-war-logs failed with something very wrong. _site/en/lessons/analyzing-documents-with-tfidf.html,1663,External link https://datascience.stackexchange.com/questions/21950/why-we-should-not-feed-lda-with-tfidf failed (status code 403) -_site/en/lessons/analyzing-documents-with-tfidf.html,1666,External link http://journalofdigitalhumanities.org/2-1/words-alone-by-benjamin-m-schmidt/ failed: Moved Permanently (status code 301) +_site/en/lessons/analyzing-documents-with-tfidf.html,1666,External link https://journalofdigitalhumanities.org/2-1/words-alone-by-benjamin-m-schmidt/ failed with something very wrong. _site/en/lessons/analyzing-documents-with-tfidf.html,2207,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/analyzing-documents-with-tfidf.html,2217,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/analyzing-documents-with-tfidf.md failed (status code 429) _site/en/lessons/analyzing-multilingual-text-nltk-spacy-stanza.html,602,"External link https://stanfordnlp.github.io/stanza/models.html#human-languages-supported-by-stanza failed: https://stanfordnlp.github.io/stanza/models.html exists, but the hash 'human-languages-supported-by-stanza' does not (status code 200)" _site/en/lessons/analyzing-multilingual-text-nltk-spacy-stanza.html,1918,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/analyzing-multilingual-text-nltk-spacy-stanza.html,1928,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/analyzing-multilingual-text-nltk-spacy-stanza.md failed (status code 429) -_site/en/lessons/applied-archival-downloading-with-wget.html,471,External link http://www.activehistory.ca failed with something very wrong. -_site/en/lessons/applied-archival-downloading-with-wget.html,631,External link http://nla.gov.au/nla.ms-ms5393-1-s1-v.jpg failed: Server Error (status code 500) -_site/en/lessons/applied-archival-downloading-with-wget.html,633,External link http://nla.gov.au/nla.ms-ms5393-1-s127-v.jpg failed: Server Error (status code 500) -_site/en/lessons/applied-archival-downloading-with-wget.html,680,External link http://memory.loc.gov/cgi-bin/ampage?collId=mtj1&fileName=mtj1page001.db&recNum=1&itemLink=/ammem/collections/jefferson_papers/mtjser1.html&linkText=6 failed: got a time out (response code 0) (status code 0) -_site/en/lessons/applied-archival-downloading-with-wget.html,689,External link http://memory.loc.gov/master/mss/mtj/mtj1/001/0000/ failed: got a time out (response code 0) (status code 0) -_site/en/lessons/applied-archival-downloading-with-wget.html,701,External link http://memory.loc.gov/master/mss/mtj/mtj1/001/0000/ failed: got a time out (response code 0) (status code 0) -_site/en/lessons/applied-archival-downloading-with-wget.html,703,External link http://memory.loc.gov/master/mss/mtj/mtj1/001/0100/ failed: got a time out (response code 0) (status code 0) -_site/en/lessons/applied-archival-downloading-with-wget.html,705,External link http://memory.loc.gov/master/mss/mtj/mtj1/001/0200/ failed: got a time out (response code 0) (status code 0) -_site/en/lessons/applied-archival-downloading-with-wget.html,709,External link http://memory.loc.gov/master/mss/mtj/mtj1/001/1400 failed: got a time out (response code 0) (status code 0) -_site/en/lessons/applied-archival-downloading-with-wget.html,735,External link http://cushing.med.yale.edu/gsdl/collect/mdposter/ failed with something very wrong. -_site/en/lessons/applied-archival-downloading-with-wget.html,748,External link http://cushing.med.yale.edu/images/mdposter/full/poster0001.jpg failed with something very wrong. -_site/en/lessons/applied-archival-downloading-with-wget.html,753,External link http://cushing.med.yale.edu/images/mdposter/full/poster0637.jpg failed with something very wrong. +_site/en/lessons/applied-archival-downloading-with-wget.html,471,External link https://www.activehistory.ca failed with something very wrong. +_site/en/lessons/applied-archival-downloading-with-wget.html,631,External link https://nla.gov.au/nla.ms-ms5393-1-s1-v.jpg failed (status code 500) +_site/en/lessons/applied-archival-downloading-with-wget.html,633,External link https://nla.gov.au/nla.ms-ms5393-1-s127-v.jpg failed (status code 500) +_site/en/lessons/applied-archival-downloading-with-wget.html,680,External link https://memory.loc.gov/cgi-bin/ampage?collId=mtj1&fileName=mtj1page001.db&recNum=1&itemLink=/ammem/collections/jefferson_papers/mtjser1.html&linkText=6 failed (status code 404) +_site/en/lessons/applied-archival-downloading-with-wget.html,689,External link https://memory.loc.gov/master/mss/mtj/mtj1/001/0000/ failed (status code 403) +_site/en/lessons/applied-archival-downloading-with-wget.html,701,External link https://memory.loc.gov/master/mss/mtj/mtj1/001/0000/ failed (status code 403) +_site/en/lessons/applied-archival-downloading-with-wget.html,703,External link https://memory.loc.gov/master/mss/mtj/mtj1/001/0100/ failed (status code 403) +_site/en/lessons/applied-archival-downloading-with-wget.html,705,External link https://memory.loc.gov/master/mss/mtj/mtj1/001/0200/ failed (status code 403) +_site/en/lessons/applied-archival-downloading-with-wget.html,709,External link https://memory.loc.gov/master/mss/mtj/mtj1/001/1400 failed (status code 403) +_site/en/lessons/applied-archival-downloading-with-wget.html,735,External link https://cushing.med.yale.edu/gsdl/collect/mdposter/ failed with something very wrong. +_site/en/lessons/applied-archival-downloading-with-wget.html,748,External link https://cushing.med.yale.edu/images/mdposter/full/poster0001.jpg failed with something very wrong. +_site/en/lessons/applied-archival-downloading-with-wget.html,753,External link https://cushing.med.yale.edu/images/mdposter/full/poster0637.jpg failed with something very wrong. _site/en/lessons/applied-archival-downloading-with-wget.html,1345,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/applied-archival-downloading-with-wget.html,1355,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/applied-archival-downloading-with-wget.md failed (status code 429) _site/en/lessons/automated-downloading-with-wget.html,1502,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -4411,15 +2350,16 @@ _site/en/lessons/basic-text-processing-in-r.html,2147,External link https://gith _site/en/lessons/basic-text-processing-in-r.html,2157,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/basic-text-processing-in-r.md failed (status code 429) _site/en/lessons/beginners-guide-to-twitter-data.html,494,External link https://twitter.com/ failed (status code 400) _site/en/lessons/beginners-guide-to-twitter-data.html,500,External link https://tweetsets.library.gwu.edu/ failed with something very wrong. -_site/en/lessons/beginners-guide-to-twitter-data.html,846,External link https://digitalfellows.commons.gc.cuny.edu/2019/06/03/finding-the-right-tools-for-mapping/ failed: got a time out (response code 0) (status code 0) -_site/en/lessons/beginners-guide-to-twitter-data.html,864,External link https://tweetsets.library.gwu.edu failed with something very wrong. _site/en/lessons/beginners-guide-to-twitter-data.html,864,External link https://tweetsets.library.gwu.edu/full-dataset/ failed with something very wrong. -_site/en/lessons/beginners-guide-to-twitter-data.html,868,External link http://journalofdigitalhumanities.org/1-1/demystifying-networks-by-scott-weingart/ failed: Moved Permanently (status code 301) +_site/en/lessons/beginners-guide-to-twitter-data.html,864,External link https://tweetsets.library.gwu.edu failed with something very wrong. +_site/en/lessons/beginners-guide-to-twitter-data.html,868,External link https://journalofdigitalhumanities.org/1-1/demystifying-networks-by-scott-weingart/ failed with something very wrong. _site/en/lessons/beginners-guide-to-twitter-data.html,2577,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/beginners-guide-to-twitter-data.html,2587,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/beginners-guide-to-twitter-data.md failed (status code 429) -_site/en/lessons/building-static-sites-with-jekyll-github-pages.html,1545,External link http://jekyll-windows.juthilo.com/ failed with something very wrong. +_site/en/lessons/building-static-sites-with-jekyll-github-pages.html,1474,External link https://jekyllthemes.org/ failed with something very wrong. +_site/en/lessons/building-static-sites-with-jekyll-github-pages.html,1545,External link https://jekyll-windows.juthilo.com/ failed with something very wrong. _site/en/lessons/building-static-sites-with-jekyll-github-pages.html,2088,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/building-static-sites-with-jekyll-github-pages.html,2098,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/building-static-sites-with-jekyll-github-pages.md failed (status code 429) +_site/en/lessons/calibrating-radiocarbon-dates-r.html,785,External link https://calib.org failed: got a time out (response code 0) (status code 0) _site/en/lessons/calibrating-radiocarbon-dates-r.html,1209,External link https://doi.org/10.1126/science.105.2735.576 failed (status code 403) _site/en/lessons/calibrating-radiocarbon-dates-r.html,1224,External link https://doi.org/10.1126/science.110.2869.678 failed (status code 403) _site/en/lessons/calibrating-radiocarbon-dates-r.html,1239,External link https://doi.org/10.2307/2684423 failed (status code 403) @@ -4427,6 +2367,7 @@ _site/en/lessons/calibrating-radiocarbon-dates-r.html,1248,External link https:/ _site/en/lessons/calibrating-radiocarbon-dates-r.html,2231,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/calibrating-radiocarbon-dates-r.html,2241,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/calibrating-radiocarbon-dates-r.md failed (status code 429) _site/en/lessons/cleaning-data-with-openrefine.html,539,External link https://powerhouse.com.au/ failed (status code 429) +_site/en/lessons/cleaning-data-with-openrefine.html,579,External link https://vis.stanford.edu/papers/wrangler/ failed with something very wrong. _site/en/lessons/cleaning-data-with-openrefine.html,598,External link https://powerhouse.com.au/ failed (status code 429) _site/en/lessons/cleaning-data-with-openrefine.html,2324,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/cleaning-data-with-openrefine.html,2334,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/cleaning-data-with-openrefine.md failed (status code 429) @@ -4441,8 +2382,8 @@ _site/en/lessons/clustering-with-scikit-learn-in-python.html,2605,External link _site/en/lessons/code-reuse-and-modularity.html,634,External link https://users.astro.ufl.edu/~warner/prog/python.html failed with something very wrong. _site/en/lessons/code-reuse-and-modularity.html,1608,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/code-reuse-and-modularity.html,1618,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/code-reuse-and-modularity.md failed (status code 429) -_site/en/lessons/collaborative-blog-with-jekyll-github.html,1508,External link https://github.com/scholarslab/scholarslab.org/blob/master/docs/authoring-and-editing.md#markdown--formatting failed (status code 429) -_site/en/lessons/collaborative-blog-with-jekyll-github.html,1528,External link http://jekyll-windows.juthilo.com/ failed with something very wrong. +_site/en/lessons/collaborative-blog-with-jekyll-github.html,1508,"External link https://github.com/scholarslab/scholarslab.org/blob/master/docs/authoring-and-editing.md#markdown--formatting failed: https://github.com/scholarslab/scholarslab.org/blob/master/docs/authoring-and-editing.md exists, but the hash 'markdown--formatting' does not (status code 200)" +_site/en/lessons/collaborative-blog-with-jekyll-github.html,1528,External link https://jekyll-windows.juthilo.com/ failed with something very wrong. _site/en/lessons/collaborative-blog-with-jekyll-github.html,3012,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/collaborative-blog-with-jekyll-github.html,3022,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/collaborative-blog-with-jekyll-github.md failed (status code 429) _site/en/lessons/common-similarity-measures.html,701,"External link https://en.wikipedia.org/wiki/Trigonometric_functions#cos failed: https://en.wikipedia.org/wiki/Trigonometric_functions exists, but the hash 'cos' does not (status code 200)" @@ -4463,7 +2404,6 @@ _site/en/lessons/corpus-analysis-with-antconc.html,580,External link https://aca _site/en/lessons/corpus-analysis-with-antconc.html,1561,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/corpus-analysis-with-antconc.html,1571,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/corpus-analysis-with-antconc.md failed (status code 429) _site/en/lessons/corpus-analysis-with-spacy.html,333,External link https://github.com/programminghistorian/ph-submissions/issues/546 failed (status code 429) -_site/en/lessons/corpus-analysis-with-spacy.html,1809,External link https://github.com/explosion/spaCy/blob/master/spacy/glossary.py failed (status code 429) _site/en/lessons/corpus-analysis-with-spacy.html,2014,External link https://doi.org/10.3366/cor.2012.0015 failed (status code 403) _site/en/lessons/corpus-analysis-with-spacy.html,2017,External link https://doi.org/10.3366/cor.2013.0040 failed (status code 403) _site/en/lessons/corpus-analysis-with-spacy.html,2020,External link https://doi.org/10.1177/0741088318819472 failed (status code 403) @@ -4471,7 +2411,8 @@ _site/en/lessons/corpus-analysis-with-spacy.html,2038,External link https://doi. _site/en/lessons/corpus-analysis-with-spacy.html,2576,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/corpus-analysis-with-spacy.html,2586,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/corpus-analysis-with-spacy.md failed (status code 429) _site/en/lessons/correspondence-analysis-in-R.html,354,External link https://github.com/programminghistorian/ph-submissions/issues/78 failed (status code 429) -_site/en/lessons/correspondence-analysis-in-R.html,1125,External link http://www.cbc.ca/news/indigenous/mmiw-inquiry-not-reaching-out-to-families-says-advocates-1.4053694 failed: got a time out (response code 0) (status code 0) +_site/en/lessons/correspondence-analysis-in-R.html,665,External link https://factominer.free.fr/ failed: got a time out (response code 0) (status code 0) +_site/en/lessons/correspondence-analysis-in-R.html,1125,External link https://www.cbc.ca/news/indigenous/mmiw-inquiry-not-reaching-out-to-families-says-advocates-1.4053694 failed with something very wrong. _site/en/lessons/correspondence-analysis-in-R.html,1666,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/correspondence-analysis-in-R.html,1676,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/correspondence-analysis-in-R.md failed (status code 429) _site/en/lessons/counting-frequencies.html,1880,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -4481,20 +2422,12 @@ _site/en/lessons/creating-an-omeka-exhibit.html,1586,External link https://githu _site/en/lessons/creating-and-viewing-html-files-with-python.html,1659,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/creating-and-viewing-html-files-with-python.html,1669,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/creating-and-viewing-html-files-with-python.md failed (status code 429) _site/en/lessons/creating-apis-with-python-and-flask.html,337,External link https://github.com/programminghistorian/ph-submissions/issues/106 failed (status code 429) -_site/en/lessons/creating-apis-with-python-and-flask.html,725,External link http://127.0.0.1:5000/ failed: Forbidden (status code 403) -_site/en/lessons/creating-apis-with-python-and-flask.html,749,External link http://127.0.0.1:5000/ failed: Forbidden (status code 403) -_site/en/lessons/creating-apis-with-python-and-flask.html,844,External link http://127.0.0.1:5000/api/v1/resources/books/all failed: Forbidden (status code 403) -_site/en/lessons/creating-apis-with-python-and-flask.html,921,External link http://127.0.0.1:5000/api/v1/resources/books?id=0 failed: Forbidden (status code 403) -_site/en/lessons/creating-apis-with-python-and-flask.html,930,External link http://127.0.0.1:5000/api/v1/resources/books failed: Forbidden (status code 403) -_site/en/lessons/creating-apis-with-python-and-flask.html,1004,External link https://pro.europeana.eu/resources/apis failed (status code 403) -_site/en/lessons/creating-apis-with-python-and-flask.html,1092,External link http://127.0.0.1:5000/api/v1/resources/books/all failed: Forbidden (status code 403) -_site/en/lessons/creating-apis-with-python-and-flask.html,1093,External link http://127.0.0.1:5000/api/v1/resources/books?author=Connie+Willis failed: Forbidden (status code 403) -_site/en/lessons/creating-apis-with-python-and-flask.html,1094,External link http://127.0.0.1:5000/api/v1/resources/books?author=Connie+Willis&published=1993 failed: Forbidden (status code 403) -_site/en/lessons/creating-apis-with-python-and-flask.html,1095,External link http://127.0.0.1:5000/api/v1/resources/books?published=2010 failed: Forbidden (status code 403) -_site/en/lessons/creating-apis-with-python-and-flask.html,1236,External link https://pro.europeana.eu/ failed (status code 403) -_site/en/lessons/creating-apis-with-python-and-flask.html,1781,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) -_site/en/lessons/creating-apis-with-python-and-flask.html,1791,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/creating-apis-with-python-and-flask.md failed (status code 429) -_site/en/lessons/creating-guis-in-python-for-digital-humanities-projects.html,333,External link https://github.com/programminghistorian/ph-submissions/issues/479 failed (status code 429) +_site/en/lessons/creating-apis-with-python-and-flask.html,667,External link https://flask.pocoo.org/ failed with something very wrong. +_site/en/lessons/creating-apis-with-python-and-flask.html,1006,External link https://pro.europeana.eu/resources/apis failed (status code 403) +_site/en/lessons/creating-apis-with-python-and-flask.html,1239,External link https://hds.essex.ac.uk/ failed with something very wrong. +_site/en/lessons/creating-apis-with-python-and-flask.html,1241,External link https://pro.europeana.eu/ failed (status code 403) +_site/en/lessons/creating-apis-with-python-and-flask.html,1786,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) +_site/en/lessons/creating-apis-with-python-and-flask.html,1796,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/creating-apis-with-python-and-flask.md failed (status code 429) _site/en/lessons/creating-guis-in-python-for-digital-humanities-projects.html,1521,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/creating-guis-in-python-for-digital-humanities-projects.html,1531,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/creating-guis-in-python-for-digital-humanities-projects.md failed (status code 429) _site/en/lessons/creating-mobile-augmented-reality-experiences-in-unity.html,333,External link https://github.com/programminghistorian/ph-submissions/issues/175 failed (status code 429) @@ -4502,30 +2435,30 @@ _site/en/lessons/creating-mobile-augmented-reality-experiences-in-unity.html,564 _site/en/lessons/creating-mobile-augmented-reality-experiences-in-unity.html,599,External link https://www.hpreveal.com/ failed with something very wrong. _site/en/lessons/creating-mobile-augmented-reality-experiences-in-unity.html,1880,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/creating-mobile-augmented-reality-experiences-in-unity.html,1890,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/creating-mobile-augmented-reality-experiences-in-unity.md failed (status code 429) -_site/en/lessons/creating-network-diagrams-from-historical-sources.html,518,External link http://martenduering.com/research/covert-networks-during-the-holocaust/ failed: Moved Permanently (status code 301) -_site/en/lessons/creating-network-diagrams-from-historical-sources.html,948,External link http://www.cambridge.org/us/academic/subjects/sociology/research-methods-sociology-and-criminology/exploratory-social-network-analysis-pajek-2nd-edition failed (status code 403) -_site/en/lessons/creating-network-diagrams-from-historical-sources.html,948,External link http://pajek.imfm.si/doku.php failed: got a time out (response code 0) (status code 0) +_site/en/lessons/creating-network-diagrams-from-historical-sources.html,518,External link https://martenduering.com/research/covert-networks-during-the-holocaust/ failed with something very wrong. +_site/en/lessons/creating-network-diagrams-from-historical-sources.html,948,External link https://pajek.imfm.si/doku.php failed: got a time out (response code 0) (status code 0) _site/en/lessons/creating-network-diagrams-from-historical-sources.html,1485,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/creating-network-diagrams-from-historical-sources.html,1495,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/creating-network-diagrams-from-historical-sources.md failed (status code 429) _site/en/lessons/crowdsourced-data-normalization-with-pandas.html,333,External link https://github.com/programminghistorian/ph-submissions/issues/301 failed (status code 429) +_site/en/lessons/crowdsourced-data-normalization-with-pandas.html,507,External link https://transcribe-bentham.ucl.ac.uk/td/Transcribe_Bentham failed: got a time out (response code 0) (status code 0) _site/en/lessons/crowdsourced-data-normalization-with-pandas.html,509,External link https://www.netflixprize.com/ failed with something very wrong. -_site/en/lessons/crowdsourced-data-normalization-with-pandas.html,883,External link http://pandas-docs.github.io/pandas-docs-travis/user_guide/timeseries.html#timestamp-limitations failed: Not Found (status code 404) +_site/en/lessons/crowdsourced-data-normalization-with-pandas.html,883,External link https://pandas-docs.github.io/pandas-docs-travis/user_guide/timeseries.html#timestamp-limitations failed (status code 404) +_site/en/lessons/crowdsourced-data-normalization-with-pandas.html,990,External link https://curatingmenus.org/articles/against-cleaning/ failed: got a time out (response code 0) (status code 0) _site/en/lessons/crowdsourced-data-normalization-with-pandas.html,1530,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/crowdsourced-data-normalization-with-pandas.html,1540,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/crowdsourced-data-normalization-with-pandas.md failed (status code 429) -_site/en/lessons/data-mining-the-internet-archive.html,569,"External link http://archive.org/stream/lettertowilliaml00doug/39999066767938#page/n0/mode/2up failed: http://archive.org/stream/lettertowilliaml00doug/39999066767938 exists, but the hash 'page/n0/mode/2up' does not (status code 200)" -_site/en/lessons/data-mining-the-internet-archive.html,622,External link http://internetarchive.readthedocs.io/en/latest/quickstart.html#searching failed (status code 404) -_site/en/lessons/data-mining-the-internet-archive.html,653,External link http://internetarchive.readthedocs.io/en/latest/quickstart.html#downloading failed (status code 404) -_site/en/lessons/data-mining-the-internet-archive.html,794,External link http://internetarchive.readthedocs.io/en/latest/quickstart.html#downloading failed (status code 404) +_site/en/lessons/data-mining-the-internet-archive.html,569,"External link https://archive.org/stream/lettertowilliaml00doug/39999066767938#page/n0/mode/2up failed: https://archive.org/stream/lettertowilliaml00doug/39999066767938 exists, but the hash 'page/n0/mode/2up' does not (status code 200)" +_site/en/lessons/data-mining-the-internet-archive.html,622,External link https://internetarchive.readthedocs.io/en/latest/quickstart.html#searching failed (status code 404) +_site/en/lessons/data-mining-the-internet-archive.html,653,External link https://internetarchive.readthedocs.io/en/latest/quickstart.html#downloading failed (status code 404) +_site/en/lessons/data-mining-the-internet-archive.html,794,External link https://internetarchive.readthedocs.io/en/latest/quickstart.html#downloading failed (status code 404) _site/en/lessons/data-mining-the-internet-archive.html,802,"External link https://archive.org/about/faqs.php#140 failed: https://archive.org/about/faqs.php exists, but the hash '140' does not (status code 200)" -_site/en/lessons/data-mining-the-internet-archive.html,973,External link https://github.com/edsu/pymarc/blob/master/pymarc/marcxml.py failed (status code 429) _site/en/lessons/data-mining-the-internet-archive.html,1625,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/data-mining-the-internet-archive.html,1635,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/data-mining-the-internet-archive.md failed (status code 429) _site/en/lessons/data-wrangling-and-management-in-r.html,333,External link https://github.com/programminghistorian/ph-submissions/issues/60 failed (status code 429) _site/en/lessons/data-wrangling-and-management-in-r.html,1695,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/data-wrangling-and-management-in-r.html,1705,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/data-wrangling-and-management-in-r.md failed (status code 429) _site/en/lessons/dealing-with-big-data-and-network-analysis-using-neo4j.html,333,External link https://github.com/programminghistorian/ph-submissions/issues/87 failed (status code 429) -_site/en/lessons/dealing-with-big-data-and-network-analysis-using-neo4j.html,985,External link http://localhost:7474/browser/ failed with something very wrong. -_site/en/lessons/dealing-with-big-data-and-network-analysis-using-neo4j.html,1048,External link http://localhost:7474 failed with something very wrong. +_site/en/lessons/dealing-with-big-data-and-network-analysis-using-neo4j.html,985,External link https://localhost:7474/browser/ failed with something very wrong. +_site/en/lessons/dealing-with-big-data-and-network-analysis-using-neo4j.html,1048,External link https://localhost:7474 failed with something very wrong. _site/en/lessons/dealing-with-big-data-and-network-analysis-using-neo4j.html,1647,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/dealing-with-big-data-and-network-analysis-using-neo4j.html,1657,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/dealing-with-big-data-and-network-analysis-using-neo4j.md failed (status code 429) _site/en/lessons/designing-a-timeline-tabletop-simulator.html,333,External link https://github.com/programminghistorian/ph-submissions/issues/553 failed (status code 429) @@ -4536,24 +2469,26 @@ _site/en/lessons/designing-a-timeline-tabletop-simulator.html,1913,External link _site/en/lessons/designing-a-timeline-tabletop-simulator.html,1923,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/designing-a-timeline-tabletop-simulator.md failed (status code 429) _site/en/lessons/detecting-text-reuse-with-passim.html,335,External link https://github.com/programminghistorian/ph-submissions/issues/305 failed (status code 429) _site/en/lessons/detecting-text-reuse-with-passim.html,573,External link https://www.java.com/fr/download/ failed (status code 403) -_site/en/lessons/detecting-text-reuse-with-passim.html,1226,External link https://github.com/impresso/PH-Passim-tutorial/blob/master/eebo/code/main.py failed (status code 429) -_site/en/lessons/detecting-text-reuse-with-passim.html,1269,External link https://github.com/impresso/impresso-pycommons/blob/master/impresso_commons/text/rebuilder.py failed (status code 429) -_site/en/lessons/detecting-text-reuse-with-passim.html,1411,External link https://github.com/impresso/PH-passim-tutorial/blob/master/explore-passim-output.ipynb failed (status code 429) -_site/en/lessons/detecting-text-reuse-with-passim.html,1488,External link http://dx.doi.org/10.1093/alh/ajv028 failed (status code 403) -_site/en/lessons/detecting-text-reuse-with-passim.html,1489,External link http://dx.doi.org/10.1080/1461670x.2020.1761865 failed (status code 403) -_site/en/lessons/detecting-text-reuse-with-passim.html,1495,External link http://dx.doi.org/10.1145/2682571.2797068 failed (status code 403) +_site/en/lessons/detecting-text-reuse-with-passim.html,1487,External link https://dx.doi.org/10.1093/alh/ajv029 failed (status code 403) +_site/en/lessons/detecting-text-reuse-with-passim.html,1488,External link https://dx.doi.org/10.1093/alh/ajv028 failed (status code 403) +_site/en/lessons/detecting-text-reuse-with-passim.html,1489,External link https://dx.doi.org/10.1080/1461670x.2020.1761865 failed (status code 403) +_site/en/lessons/detecting-text-reuse-with-passim.html,1495,External link https://dx.doi.org/10.1145/2682571.2797068 failed (status code 403) +_site/en/lessons/detecting-text-reuse-with-passim.html,1496,External link https://doi.org/10.18653/v1/D17-1290 failed (status code 409) _site/en/lessons/detecting-text-reuse-with-passim.html,2470,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/detecting-text-reuse-with-passim.html,2480,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/detecting-text-reuse-with-passim.md failed (status code 429) _site/en/lessons/displaying-georeferenced-map-knightlab-storymap-js.html,335,External link https://github.com/programminghistorian/ph-submissions/issues/349 failed (status code 429) _site/en/lessons/displaying-georeferenced-map-knightlab-storymap-js.html,1834,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/displaying-georeferenced-map-knightlab-storymap-js.html,1844,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/displaying-georeferenced-map-knightlab-storymap-js.md failed (status code 429) -_site/en/lessons/downloading-multiple-records-using-query-strings.html,1660,External link http://stackoverflow.com/questions/273192/python-best-way-to-create-directory-if-it-doesnt-exist-for-file-write failed (status code 403) +_site/en/lessons/downloading-multiple-records-using-query-strings.html,1660,External link https://stackoverflow.com/questions/273192/python-best-way-to-create-directory-if-it-doesnt-exist-for-file-write failed (status code 403) _site/en/lessons/downloading-multiple-records-using-query-strings.html,2197,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/downloading-multiple-records-using-query-strings.html,2207,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/downloading-multiple-records-using-query-strings.md failed (status code 429) _site/en/lessons/editing-audio-with-audacity.html,333,External link https://github.com/programminghistorian/ph-submissions/issues/15 failed (status code 429) +_site/en/lessons/editing-audio-with-audacity.html,525,External link https://web.archive.org/web/20161119231053/https://www.indiana.edu:80/~emusic/acoustics/amplitude.htm failed (status code 404) _site/en/lessons/editing-audio-with-audacity.html,1358,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/editing-audio-with-audacity.html,1368,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/editing-audio-with-audacity.md failed (status code 429) _site/en/lessons/exploring-and-analyzing-network-data-with-python.html,343,External link https://github.com/programminghistorian/ph-submissions/issues/92 failed (status code 429) +_site/en/lessons/exploring-and-analyzing-network-data-with-python.html,580,External link https://www.sixdegreesoffrancisbacon.com failed with something very wrong. +_site/en/lessons/exploring-and-analyzing-network-data-with-python.html,852,External link https://sixdegreesoffrancisbacon.com/ failed with something very wrong. _site/en/lessons/exploring-and-analyzing-network-data-with-python.html,2951,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/exploring-and-analyzing-network-data-with-python.html,2961,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/exploring-and-analyzing-network-data-with-python.md failed (status code 429) _site/en/lessons/extracting-illustrated-pages.html,333,External link https://github.com/programminghistorian/ph-submissions/issues/193 failed (status code 429) @@ -4561,9 +2496,9 @@ _site/en/lessons/extracting-illustrated-pages.html,670,External link https://ana _site/en/lessons/extracting-illustrated-pages.html,989,External link https://iiif.archivelab.org/iiif/documentation failed with something very wrong. _site/en/lessons/extracting-illustrated-pages.html,1545,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/extracting-illustrated-pages.html,1555,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/extracting-illustrated-pages.md failed (status code 429) -_site/en/lessons/extracting-keywords.html,603,External link http://stackoverflow.com/questions/3056740/gedit-adds-line-at-end-of-file failed (status code 403) -_site/en/lessons/extracting-keywords.html,660,External link http://stackoverflow.com/questions/11497376/new-line-python failed (status code 403) -_site/en/lessons/extracting-keywords.html,985,External link http://stackoverflow.com/questions/17315635/csv-new-line-character-seen-in-unquoted-field-error failed (status code 403) +_site/en/lessons/extracting-keywords.html,603,External link https://stackoverflow.com/questions/3056740/gedit-adds-line-at-end-of-file failed (status code 403) +_site/en/lessons/extracting-keywords.html,660,External link https://stackoverflow.com/questions/11497376/new-line-python failed (status code 403) +_site/en/lessons/extracting-keywords.html,985,External link https://stackoverflow.com/questions/17315635/csv-new-line-character-seen-in-unquoted-field-error failed (status code 403) _site/en/lessons/extracting-keywords.html,1643,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/extracting-keywords.html,1653,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/extracting-keywords.md failed (status code 429) _site/en/lessons/facial-recognition-ai-python.html,335,External link https://github.com/programminghistorian/ph-submissions/issues/552 failed (status code 429) @@ -4573,7 +2508,7 @@ _site/en/lessons/facial-recognition-ai-python.html,1123,"External link https://i _site/en/lessons/facial-recognition-ai-python.html,2125,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/facial-recognition-ai-python.html,2135,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/facial-recognition-ai-python.md failed (status code 429) _site/en/lessons/fetch-and-parse-data-with-openrefine.html,333,External link https://github.com/programminghistorian/ph-submissions/issues/69 failed (status code 429) -_site/en/lessons/fetch-and-parse-data-with-openrefine.html,570,External link http://www.gutenberg.org/wiki/Gutenberg:Feeds failed (status code 404) +_site/en/lessons/fetch-and-parse-data-with-openrefine.html,570,External link https://www.gutenberg.org/wiki/Gutenberg:Feeds failed (status code 404) _site/en/lessons/fetch-and-parse-data-with-openrefine.html,1086,"External link https://chroniclingamerica.loc.gov/#tab=tab_advanced_search failed: https://chroniclingamerica.loc.gov/ exists, but the hash 'tab=tab_advanced_search' does not (status code 200)" _site/en/lessons/fetch-and-parse-data-with-openrefine.html,2028,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/fetch-and-parse-data-with-openrefine.html,2038,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/fetch-and-parse-data-with-openrefine.md failed (status code 429) @@ -4592,21 +2527,23 @@ _site/en/lessons/geocoding-qgis.html,1499,External link https://github.com/progr _site/en/lessons/geocoding-qgis.html,1509,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/geocoding-qgis.md failed (status code 429) _site/en/lessons/geoparsing-text-with-edinburgh.html,333,External link https://github.com/programminghistorian/ph-submissions/issues/26 failed (status code 429) _site/en/lessons/geoparsing-text-with-edinburgh.html,659,"External link https://en.wikipedia.org/wiki/Lexical_analysis#Tokenization_ failed: https://en.wikipedia.org/wiki/Lexical_analysis exists, but the hash 'Tokenization_' does not (status code 200)" -_site/en/lessons/geoparsing-text-with-edinburgh.html,982,External link http://palimpsest.blogs.edina.ac.uk/ failed with something very wrong. -_site/en/lessons/geoparsing-text-with-edinburgh.html,982,External link http://litlong.org/ failed with something very wrong. -_site/en/lessons/geoparsing-text-with-edinburgh.html,985,External link http://tradingconsequences.blogs.edina.ac.uk/ failed with something very wrong. -_site/en/lessons/geoparsing-text-with-edinburgh.html,995,External link http://www.euppublishing.com/doi/pdfplus/10.3366/ijhac.2015.0136 failed (status code 403) +_site/en/lessons/geoparsing-text-with-edinburgh.html,982,External link https://palimpsest.blogs.edina.ac.uk/ failed with something very wrong. +_site/en/lessons/geoparsing-text-with-edinburgh.html,982,External link https://litlong.org/ failed with something very wrong. +_site/en/lessons/geoparsing-text-with-edinburgh.html,985,External link https://tradingconsequences.blogs.edina.ac.uk/ failed with something very wrong. +_site/en/lessons/geoparsing-text-with-edinburgh.html,993,External link https://www.lrec-conf.org/proceedings/lrec2016/pdf/129_Paper.pdf failed with something very wrong. +_site/en/lessons/geoparsing-text-with-edinburgh.html,995,External link https://www.euppublishing.com/doi/pdfplus/10.3366/ijhac.2015.0136 failed (status code 403) +_site/en/lessons/geoparsing-text-with-edinburgh.html,1001,External link https://doi.org/10.3115/v1/W14-0617 failed (status code 409) _site/en/lessons/geoparsing-text-with-edinburgh.html,1005,External link https://direct.mit.edu/leon/article/45/1/82/46956/GAP-A-NeoGeo-Approach-to-Classical-Resources#.U48IuXWx15Q failed (status code 403) _site/en/lessons/geoparsing-text-with-edinburgh.html,1540,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/geoparsing-text-with-edinburgh.html,1550,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/geoparsing-text-with-edinburgh.md failed (status code 429) -_site/en/lessons/georeferencing-qgis.html,605,External link http://www.gov.pe.ca/gis/license_agreement.php3?name=lot_town&file_format=SHP failed with something very wrong. +_site/en/lessons/georeferencing-qgis.html,605,External link https://www.gov.pe.ca/gis/license_agreement.php3?name=lot_town&file_format=SHP failed: Found (status code 302) _site/en/lessons/georeferencing-qgis.html,723,External link https://islandimagined.ca/islandora/object/imagined:208687 failed (status code 403) _site/en/lessons/georeferencing-qgis.html,958,External link https://islandimagined.ca/islandora/object/imagined:208687 failed (status code 403) _site/en/lessons/georeferencing-qgis.html,2541,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/georeferencing-qgis.html,2551,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/georeferencing-qgis.md failed (status code 429) _site/en/lessons/geospatial-data-analysis.html,337,External link https://github.com/programminghistorian/ph-submissions/issues/102 failed (status code 429) -_site/en/lessons/geospatial-data-analysis.html,495,External link http://www.ats.ucla.edu/stat/r/default.htm failed: got a time out (response code 0) (status code 0) -_site/en/lessons/geospatial-data-analysis.html,775,External link http://www.sciencedirect.com/science/article/pii/S0031405608000073 failed: Forbidden (status code 403) +_site/en/lessons/geospatial-data-analysis.html,495,External link https://www.ats.ucla.edu/stat/r/default.htm failed: got a time out (response code 0) (status code 0) +_site/en/lessons/geospatial-data-analysis.html,775,External link https://www.sciencedirect.com/science/article/pii/S0031405608000073 failed (status code 403) _site/en/lessons/geospatial-data-analysis.html,1316,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/geospatial-data-analysis.html,1326,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/geospatial-data-analysis.md failed (status code 429) _site/en/lessons/getting-started-with-markdown.html,1366,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -4615,8 +2552,7 @@ _site/en/lessons/getting-started-with-mysql-using-r.html,333,External link https _site/en/lessons/getting-started-with-mysql-using-r.html,611,"External link https://www.rstudio.com/products/rstudio/#Desktop failed: https://www.rstudio.com/products/rstudio/ exists, but the hash 'Desktop' does not (status code 200)" _site/en/lessons/getting-started-with-mysql-using-r.html,804,External link https://stackoverflow.com/a/37524283 failed (status code 403) _site/en/lessons/getting-started-with-mysql-using-r.html,1038,External link https://stackoverflow.com/questions/49194719/authentication-plugin-caching-sha2-password-cannot-be-loaded failed (status code 403) -_site/en/lessons/getting-started-with-mysql-using-r.html,1686,External link http://www.jeffblackadar.ca/graham_fellowship/corpus_entities_equity/ failed (status code 301) -_site/en/lessons/getting-started-with-mysql-using-r.html,1706,External link https://github.com/jeffblackadar/getting-started-with-mysql/blob/master/newspaper-search-and-store.R failed (status code 429) +_site/en/lessons/getting-started-with-mysql-using-r.html,1686,External link https://www.jeffblackadar.ca/graham_fellowship/corpus_entities_equity/ failed (status code 301) _site/en/lessons/getting-started-with-mysql-using-r.html,2254,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/getting-started-with-mysql-using-r.html,2264,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/getting-started-with-mysql-using-r.md failed (status code 429) _site/en/lessons/googlemaps-googleearth.html,1284,External link https://github.com/programminghistorian/jekyll/issues/2456 failed (status code 429) @@ -4636,11 +2572,9 @@ _site/en/lessons/image-classification-neural-networks.html,1589,External link ht _site/en/lessons/index.html,4519,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/index.html,4529,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons.md failed (status code 429) _site/en/lessons/installing-omeka.html,333,External link https://github.com/programminghistorian/ph-submissions/issues/6 failed (status code 429) -_site/en/lessons/installing-omeka.html,484,External link http://www.hostgator.com/ failed: Forbidden (status code 403) -_site/en/lessons/installing-omeka.html,500,External link http://support.hostgator.com/articles/hosting-guide/lets-get-started/how-do-i-get-and-use-ssh-access failed (status code 403) _site/en/lessons/installing-omeka.html,1269,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/installing-omeka.html,1279,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/installing-omeka.md failed (status code 429) -_site/en/lessons/installing-python-modules-pip.html,578,External link http://stackoverflow.com/questions/4750806/how-to-install-pip-on-windows failed (status code 403) +_site/en/lessons/installing-python-modules-pip.html,578,External link https://stackoverflow.com/questions/4750806/how-to-install-pip-on-windows failed (status code 403) _site/en/lessons/installing-python-modules-pip.html,1140,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/installing-python-modules-pip.html,1150,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/installing-python-modules-pip.md failed (status code 429) _site/en/lessons/interactive-data-visualization-dashboard.html,333,External link https://github.com/programminghistorian/ph-submissions/issues/609 failed (status code 429) @@ -4655,6 +2589,8 @@ _site/en/lessons/interactive-data-visualization-dashboard.html,1328,External lin _site/en/lessons/interactive-data-visualization-dashboard.html,1866,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/interactive-data-visualization-dashboard.html,1876,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/interactive-data-visualization-dashboard.md failed (status code 429) _site/en/lessons/interactive-text-games-using-twine.html,333,External link https://github.com/programminghistorian/ph-submissions/issues/348 failed (status code 429) +_site/en/lessons/interactive-text-games-using-twine.html,576,External link https://www.depressionquest.com/ failed with something very wrong. +_site/en/lessons/interactive-text-games-using-twine.html,1076,External link https://www.depressionquest.com/ failed with something very wrong. _site/en/lessons/interactive-text-games-using-twine.html,1117,External link https://doi.org/10.1177/1461444811410394 failed (status code 403) _site/en/lessons/interactive-text-games-using-twine.html,1661,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/interactive-text-games-using-twine.html,1671,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/interactive-text-games-using-twine.md failed (status code 429) @@ -4662,29 +2598,36 @@ _site/en/lessons/interactive-visualization-with-plotly.html,333,External link ht _site/en/lessons/interactive-visualization-with-plotly.html,2142,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/interactive-visualization-with-plotly.html,2152,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/interactive-visualization-with-plotly.md failed (status code 429) _site/en/lessons/interrogating-national-narrative-gpt.html,333,External link https://github.com/programminghistorian/ph-submissions/issues/418 failed (status code 429) +_site/en/lessons/interrogating-national-narrative-gpt.html,922,External link https://doi.org/10.18653/v1/2020.acl-main.463 failed (status code 409) _site/en/lessons/interrogating-national-narrative-gpt.html,931,External link https://doi.org/10.1080/01419870.2017.1361544 failed (status code 403) _site/en/lessons/interrogating-national-narrative-gpt.html,934,External link https://doi.org/10.1093/pa/gsaa008 failed (status code 403) _site/en/lessons/interrogating-national-narrative-gpt.html,955,External link https://doi.org/10.1145/3442188.3445922 failed (status code 403) _site/en/lessons/interrogating-national-narrative-gpt.html,1502,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/interrogating-national-narrative-gpt.html,1512,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/interrogating-national-narrative-gpt.md failed (status code 429) +_site/en/lessons/intro-to-bash.html,738,External link https://www.viemu.com/a-why-vi-vim.html failed with something very wrong. _site/en/lessons/intro-to-bash.html,2011,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/intro-to-bash.html,2021,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/intro-to-bash.md failed (status code 429) _site/en/lessons/intro-to-linked-data.html,334,External link https://github.com/programminghistorian/ph-submissions/issues/33 failed (status code 429) -_site/en/lessons/intro-to-linked-data.html,707,External link http://semanticweb.org/wiki/Main_Page.html failed: got a time out (response code 0) (status code 0) -_site/en/lessons/intro-to-linked-data.html,1038,External link http://linkeddata.org/guides-and-tutorials failed: Internal Server Error (status code 500) +_site/en/lessons/intro-to-linked-data.html,531,External link https://linkeddatacatalog.dws.informatik.uni-mannheim.de/state/ failed with something very wrong. +_site/en/lessons/intro-to-linked-data.html,673,External link https://www.history.ac.uk/projects/digital/tobias failed (status code 403) +_site/en/lessons/intro-to-linked-data.html,707,External link https://semanticweb.org/wiki/Main_Page.html failed: got a time out (response code 0) (status code 0) +_site/en/lessons/intro-to-linked-data.html,862,External link https://www.history.ac.uk/projects/digital/tobias failed (status code 403) +_site/en/lessons/intro-to-linked-data.html,1038,External link https://linkeddata.org/guides-and-tutorials failed with something very wrong. +_site/en/lessons/intro-to-linked-data.html,1040,External link https://linkeddatacatalog.dws.informatik.uni-mannheim.de/state/ failed with something very wrong. +_site/en/lessons/intro-to-linked-data.html,1046,External link https://www.history.ac.uk/projects/digital/tobias failed (status code 403) _site/en/lessons/intro-to-linked-data.html,1581,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/intro-to-linked-data.html,1591,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/intro-to-linked-data.md failed (status code 429) _site/en/lessons/intro-to-powershell.html,333,External link https://github.com/programminghistorian/ph-submissions/issues/18 failed (status code 429) _site/en/lessons/intro-to-powershell.html,1730,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/intro-to-powershell.html,1740,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/intro-to-powershell.md failed (status code 429) _site/en/lessons/intro-to-twitterbots.html,339,External link https://github.com/programminghistorian/ph-submissions/issues/75 failed (status code 429) -_site/en/lessons/intro-to-twitterbots.html,521,External link http://www.sciencedirect.com/science/article/pii/S0747563213003129 failed: Forbidden (status code 403) +_site/en/lessons/intro-to-twitterbots.html,521,External link https://www.sciencedirect.com/science/article/pii/S0747563213003129 failed (status code 403) _site/en/lessons/intro-to-twitterbots.html,537,External link https://twitter.com/Every3Minutes failed (status code 400) -_site/en/lessons/intro-to-twitterbots.html,593,External link http://twitter.com/tinyarchae failed (status code 400) -_site/en/lessons/intro-to-twitterbots.html,593,External link https://twitter.com/botarchaeo failed (status code 400) _site/en/lessons/intro-to-twitterbots.html,593,External link https://twitter.com/archaeoglitch failed (status code 400) +_site/en/lessons/intro-to-twitterbots.html,593,External link https://twitter.com/botarchaeo failed (status code 400) +_site/en/lessons/intro-to-twitterbots.html,593,External link https://twitter.com/tinyarchae failed (status code 400) _site/en/lessons/intro-to-twitterbots.html,603,External link https://twitter.com/galaxykate failed (status code 400) -_site/en/lessons/intro-to-twitterbots.html,845,External link http://unicode.org/emoji/charts/full-emoji-list.html failed with something very wrong. +_site/en/lessons/intro-to-twitterbots.html,845,External link https://unicode.org/emoji/charts/full-emoji-list.html failed with something very wrong. _site/en/lessons/intro-to-twitterbots.html,923,External link https://twitter.com/TinyAdv failed (status code 400) _site/en/lessons/intro-to-twitterbots.html,954,External link https://twitter.com/GalaxyKate/lists/tracery-bots failed (status code 400) _site/en/lessons/intro-to-twitterbots.html,1492,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -4697,7 +2640,6 @@ _site/en/lessons/introduction-map-warper.html,1920,External link https://github. _site/en/lessons/introduction-to-ffmpeg.html,333,External link https://github.com/programminghistorian/ph-submissions/issues/186 failed (status code 429) _site/en/lessons/introduction-to-ffmpeg.html,541,External link https://twitter.com/FFmpeg failed (status code 400) _site/en/lessons/introduction-to-ffmpeg.html,557,"External link https://training.ashleyblewer.com/presentations/ffmpeg.html#10 failed: https://training.ashleyblewer.com/presentations/ffmpeg.html exists, but the hash '10' does not (status code 200)" -_site/en/lessons/introduction-to-ffmpeg.html,1124,External link https://github.com/privatezero/NDSR/blob/master/Demystifying_FFmpeg_Slides.pdf failed (status code 429) _site/en/lessons/introduction-to-ffmpeg.html,1687,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/introduction-to-ffmpeg.html,1697,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/introduction-to-ffmpeg.md failed (status code 429) _site/en/lessons/introduction-to-populating-a-website-with-api-data.html,337,External link https://github.com/programminghistorian/ph-submissions/issues/196 failed (status code 429) @@ -4706,13 +2648,12 @@ _site/en/lessons/introduction-to-populating-a-website-with-api-data.html,575,Ext _site/en/lessons/introduction-to-populating-a-website-with-api-data.html,579,External link https://pro.europeana.eu/resources/apis failed (status code 403) _site/en/lessons/introduction-to-populating-a-website-with-api-data.html,585,External link https://pro.europeana.eu/get-api failed (status code 403) _site/en/lessons/introduction-to-populating-a-website-with-api-data.html,617,External link https://www.europeana.eu failed (status code 403) -_site/en/lessons/introduction-to-populating-a-website-with-api-data.html,732,External link http://www.europeana.eu/portal/record/90402/RP_P_OB_84_508.html failed: Forbidden (status code 403) +_site/en/lessons/introduction-to-populating-a-website-with-api-data.html,732,External link https://www.europeana.eu/portal/record/90402/RP_P_OB_84_508.html failed (status code 403) _site/en/lessons/introduction-to-populating-a-website-with-api-data.html,748,External link https://pro.europeana.eu/page/edm-documentation failed (status code 403) _site/en/lessons/introduction-to-populating-a-website-with-api-data.html,750,External link https://pro.europeana.eu/page/europeana-rest-api#console failed (status code 403) _site/en/lessons/introduction-to-populating-a-website-with-api-data.html,800,"External link https://windowsreport.com/xampp-port-80-443-in-use-skype-fix/#.XDM6XGlCfIU failed: https://windowsreport.com/xampp-port-80-443-in-use-skype-fix/ exists, but the hash '.XDM6XGlCfIU' does not (status code 200)" -_site/en/lessons/introduction-to-populating-a-website-with-api-data.html,821,External link http://localhost/dashboard failed with something very wrong. -_site/en/lessons/introduction-to-populating-a-website-with-api-data.html,859,External link http://localhost/helloworld.php failed with something very wrong. -_site/en/lessons/introduction-to-populating-a-website-with-api-data.html,1251,External link http://museum-api.pbworks.com/w/page/21933420/Museum%C2%A0APIs failed: got a time out (response code 0) (status code 0) +_site/en/lessons/introduction-to-populating-a-website-with-api-data.html,821,External link https://localhost/dashboard failed with something very wrong. +_site/en/lessons/introduction-to-populating-a-website-with-api-data.html,859,External link https://localhost/helloworld.php failed with something very wrong. _site/en/lessons/introduction-to-populating-a-website-with-api-data.html,1794,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/introduction-to-populating-a-website-with-api-data.html,1804,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/introduction-to-populating-a-website-with-api-data.md failed (status code 429) _site/en/lessons/introduction-to-stylometry-with-python.html,337,External link https://github.com/programminghistorian/ph-submissions/issues/147 failed (status code 429) @@ -4721,14 +2662,14 @@ _site/en/lessons/introduction-to-stylometry-with-python.html,1863,External link _site/en/lessons/introduction-to-stylometry-with-python.html,1873,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/introduction-to-stylometry-with-python.md failed (status code 429) _site/en/lessons/json-and-jq.html,333,External link https://github.com/programminghistorian/ph-submissions/issues/23 failed (status code 429) _site/en/lessons/json-and-jq.html,615,"External link https://stedolan.github.io/jq/manual/#Invokingjq failed: https://stedolan.github.io/jq/manual/ exists, but the hash 'Invokingjq' does not (status code 200)" -_site/en/lessons/json-and-jq.html,731,External link http://stackoverflow.com/questions/3135325/why-do-vector-indices-in-r-start-with-1-instead-of-0 failed (status code 403) +_site/en/lessons/json-and-jq.html,731,External link https://stackoverflow.com/questions/3135325/why-do-vector-indices-in-r-start-with-1-instead-of-0 failed (status code 403) _site/en/lessons/json-and-jq.html,814,"External link https://stedolan.github.io/jq/manual/#ConditionalsandComparisons failed: https://stedolan.github.io/jq/manual/ exists, but the hash 'ConditionalsandComparisons' does not (status code 200)" _site/en/lessons/json-and-jq.html,1053,"External link https://stedolan.github.io/jq/manual/#join(str) failed: https://stedolan.github.io/jq/manual/#join(str) exists, but the hash 'join(str)' does not (status code 200)" _site/en/lessons/json-and-jq.html,1462,"External link https://stedolan.github.io/jq/manual/#Math failed: https://stedolan.github.io/jq/manual/ exists, but the hash 'Math' does not (status code 200)" _site/en/lessons/json-and-jq.html,1466,"External link https://stedolan.github.io/jq/manual/#if-then-else failed: https://stedolan.github.io/jq/manual/ exists, but the hash 'if-then-else' does not (status code 200)" -_site/en/lessons/json-and-jq.html,1466,"External link https://stedolan.github.io/jq/manual/#Reduce failed: https://stedolan.github.io/jq/manual/ exists, but the hash 'Reduce' does not (status code 200)" _site/en/lessons/json-and-jq.html,1466,"External link https://stedolan.github.io/jq/manual/#Recursion failed: https://stedolan.github.io/jq/manual/ exists, but the hash 'Recursion' does not (status code 200)" -_site/en/lessons/json-and-jq.html,1467,External link http://stackoverflow.com/questions/tagged/jq failed (status code 403) +_site/en/lessons/json-and-jq.html,1466,"External link https://stedolan.github.io/jq/manual/#Reduce failed: https://stedolan.github.io/jq/manual/ exists, but the hash 'Reduce' does not (status code 200)" +_site/en/lessons/json-and-jq.html,1467,External link https://stackoverflow.com/questions/tagged/jq failed (status code 403) _site/en/lessons/json-and-jq.html,2003,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/json-and-jq.html,2013,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/json-and-jq.md failed (status code 429) _site/en/lessons/jupyter-notebooks.html,337,External link https://github.com/programminghistorian/ph-submissions/issues/251 failed (status code 429) @@ -4753,20 +2694,22 @@ _site/en/lessons/mac-installation.html,1628,External link https://github.com/pro _site/en/lessons/manipulating-strings-in-python.html,1741,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/manipulating-strings-in-python.html,1751,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/manipulating-strings-in-python.md failed (status code 429) _site/en/lessons/mapping-with-python-leaflet.html,333,External link https://github.com/programminghistorian/ph-submissions/issues/85 failed (status code 429) -_site/en/lessons/mapping-with-python-leaflet.html,511,"External link http://pandas.pydata.org/pandas-docs/stable/dsintro.html#dataframe failed: http://pandas.pydata.org/pandas-docs/stable/dsintro.html exists, but the hash 'dataframe' does not (status code 200)" -_site/en/lessons/mapping-with-python-leaflet.html,535,External link http://data.london.gov.uk/dataset/historic-census-population failed: Forbidden (status code 403) -_site/en/lessons/mapping-with-python-leaflet.html,555,"External link http://pandas.pydata.org/pandas-docs/stable/dsintro.html#dataframe failed: http://pandas.pydata.org/pandas-docs/stable/dsintro.html exists, but the hash 'dataframe' does not (status code 200)" -_site/en/lessons/mapping-with-python-leaflet.html,573,"External link http://pandas.pydata.org/pandas-docs/stable/install.html#dependencies failed: http://pandas.pydata.org/pandas-docs/stable/install.html exists, but the hash 'dependencies' does not (status code 200)" +_site/en/lessons/mapping-with-python-leaflet.html,511,"External link https://pandas.pydata.org/pandas-docs/stable/dsintro.html#dataframe failed: https://pandas.pydata.org/pandas-docs/stable/dsintro.html exists, but the hash 'dataframe' does not (status code 200)" +_site/en/lessons/mapping-with-python-leaflet.html,535,External link https://data.london.gov.uk/dataset/historic-census-population failed (status code 403) +_site/en/lessons/mapping-with-python-leaflet.html,555,"External link https://pandas.pydata.org/pandas-docs/stable/dsintro.html#dataframe failed: https://pandas.pydata.org/pandas-docs/stable/dsintro.html exists, but the hash 'dataframe' does not (status code 200)" +_site/en/lessons/mapping-with-python-leaflet.html,573,"External link https://pandas.pydata.org/pandas-docs/stable/install.html#dependencies failed: https://pandas.pydata.org/pandas-docs/stable/install.html exists, but the hash 'dependencies' does not (status code 200)" _site/en/lessons/mapping-with-python-leaflet.html,661,External link https://github.com/geopy/geopy/issues/90 failed (status code 429) _site/en/lessons/mapping-with-python-leaflet.html,1087,"External link https://leafletjs.com/reference.html#map-set-methods failed: https://leafletjs.com/reference.html exists, but the hash 'map-set-methods' does not (status code 200)" -_site/en/lessons/mapping-with-python-leaflet.html,1185,External link http://leafletjs.com/SlavaUkraini/reference-1.2.0.html#geojson-oneachfeature failed (status code 404) +_site/en/lessons/mapping-with-python-leaflet.html,1185,External link https://leafletjs.com/SlavaUkraini/reference-1.2.0.html#geojson-oneachfeature failed (status code 404) _site/en/lessons/mapping-with-python-leaflet.html,1960,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/mapping-with-python-leaflet.html,1970,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/mapping-with-python-leaflet.md failed (status code 429) -_site/en/lessons/naive-bayesian.html,915,"External link http://support.sas.com/documentation/cdl/en/statug/63033/HTML/default/viewer.htm#statug_introbayes_sect004.htm failed: http://support.sas.com/documentation/cdl/en/statug/63033/HTML/default/viewer.htm exists, but the hash 'statug_introbayes_sect004.htm' does not (status code 200)" +_site/en/lessons/naive-bayesian.html,915,"External link https://support.sas.com/documentation/cdl/en/statug/63033/HTML/default/viewer.htm#statug_introbayes_sect004.htm failed: https://support.sas.com/documentation/cdl/en/statug/63033/HTML/default/viewer.htm exists, but the hash 'statug_introbayes_sect004.htm' does not (status code 200)" +_site/en/lessons/naive-bayesian.html,2014,External link https://snowball.tartarus.org/ failed with something very wrong. _site/en/lessons/naive-bayesian.html,2590,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/naive-bayesian.html,2600,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/naive-bayesian.md failed (status code 429) _site/en/lessons/normalizing-data.html,1714,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/normalizing-data.html,1724,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/normalizing-data.md failed (status code 429) +_site/en/lessons/ocr-with-google-vision-and-tesseract.html,333,External link https://github.com/programminghistorian/ph-submissions/issues/457 failed (status code 429) _site/en/lessons/ocr-with-google-vision-and-tesseract.html,2037,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/ocr-with-google-vision-and-tesseract.html,2047,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/ocr-with-google-vision-and-tesseract.md failed (status code 429) _site/en/lessons/output-data-as-html-file.html,1755,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -4776,20 +2719,17 @@ _site/en/lessons/output-keywords-in-context-in-html-file.html,1803,External link _site/en/lessons/preserving-your-research-data.html,575,External link https://twitter.com/Girlinthe/status/387166944094199809 failed (status code 400) _site/en/lessons/preserving-your-research-data.html,1463,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/preserving-your-research-data.html,1473,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/preserving-your-research-data.md failed (status code 429) -_site/en/lessons/qgis-layers.html,609,External link http://www.gov.pe.ca/gis/download.php3?name=coastline&file_format=SHP failed with something very wrong. -_site/en/lessons/qgis-layers.html,642,External link http://www.gov.pe.ca/gis/index.php3?number=77865&lang=E failed with something very wrong. _site/en/lessons/qgis-layers.html,2876,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/qgis-layers.html,2886,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/qgis-layers.md failed (status code 429) _site/en/lessons/r-basics-with-tabular-data.html,333,External link https://github.com/programminghistorian/ph-submissions/issues/19 failed (status code 429) _site/en/lessons/r-basics-with-tabular-data.html,1569,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/r-basics-with-tabular-data.html,1579,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/r-basics-with-tabular-data.md failed (status code 429) -_site/en/lessons/research-data-with-unix.html,510,External link https://www.worldcat.org/title/unix-and-linux/oclc/308171076&referer=brief_results failed (status code 403) _site/en/lessons/research-data-with-unix.html,1575,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/research-data-with-unix.html,1585,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/research-data-with-unix.md failed (status code 429) _site/en/lessons/retired/OCR-and-Machine-Translation.html,303,External link https://github.com/programminghistorian/ph-submissions/issues/285 failed (status code 429) _site/en/lessons/retired/OCR-and-Machine-Translation.html,493,External link https://digitalarchive.wilsoncenter.org/document/120500 failed (status code 403) _site/en/lessons/retired/OCR-and-Machine-Translation.html,493,External link https://digitalarchive.wilsoncenter.org/document/119105 failed (status code 403) -_site/en/lessons/retired/OCR-and-Machine-Translation.html,537,External link https://github.com/tesseract-ocr/tesseract/blob/master/doc/tesseract.1.asc failed (status code 429) +_site/en/lessons/retired/OCR-and-Machine-Translation.html,523,External link https://www.fmwconcepts.com/imagemagick/textcleaner/index.php failed with something very wrong. _site/en/lessons/retired/OCR-and-Machine-Translation.html,540,"External link https://www.apertium.org/index.eng.html?dir=arg-cat#translation failed: https://www.apertium.org/index.eng.html?dir=arg-cat exists, but the hash 'translation' does not (status code 200)" _site/en/lessons/retired/OCR-and-Machine-Translation.html,1320,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/retired/OCR-and-Machine-Translation.html,1330,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/retired/OCR-and-Machine-Translation.md failed (status code 429) @@ -4807,32 +2747,32 @@ _site/en/lessons/retired/getting-started-with-github-desktop.html,595,External l _site/en/lessons/retired/getting-started-with-github-desktop.html,1057,External link https://www.hastac.org/blogs/harrisonm/2013/10/12/github-academia-and-collaborative-writing failed (status code 404) _site/en/lessons/retired/getting-started-with-github-desktop.html,1594,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/retired/getting-started-with-github-desktop.html,1604,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/retired/getting-started-with-github-desktop.md failed (status code 429) -_site/en/lessons/retired/graph-databases-and-SPARQL.html,514,External link http://labs.europeana.eu/api/linked-open-data-introduction failed with something very wrong. -_site/en/lessons/retired/graph-databases-and-SPARQL.html,514,External link http://collection.britishmuseum.org failed: got a time out (response code 0) (status code 0) -_site/en/lessons/retired/graph-databases-and-SPARQL.html,590,External link http://palladio.designhumanities.org/ failed with something very wrong. -_site/en/lessons/retired/graph-databases-and-SPARQL.html,794,External link http://collection.britishmuseum.org/sparql failed: got a time out (response code 0) (status code 0) -_site/en/lessons/retired/graph-databases-and-SPARQL.html,816,External link http://collection.britishmuseum.org/id/object/PPA82633 failed: got a time out (response code 0) (status code 0) -_site/en/lessons/retired/graph-databases-and-SPARQL.html,830,External link http://collection.britishmuseum.org/sparql?query=SELECT+*%0D%0AWHERE+%7B%0D%0A++%3Chttp://collection.britishmuseum.org/id/object/PPA82633%3E+?p+?o+.%0D%0A++%7D&_implicit=false&_equivalent=false&_form=/sparql failed: got a time out (response code 0) (status code 0) +_site/en/lessons/retired/graph-databases-and-SPARQL.html,514,External link https://labs.europeana.eu/api/linked-open-data-introduction failed with something very wrong. +_site/en/lessons/retired/graph-databases-and-SPARQL.html,514,External link https://collection.britishmuseum.org failed: got a time out (response code 0) (status code 0) +_site/en/lessons/retired/graph-databases-and-SPARQL.html,590,External link https://palladio.designhumanities.org/ failed with something very wrong. +_site/en/lessons/retired/graph-databases-and-SPARQL.html,794,External link https://collection.britishmuseum.org/sparql failed: got a time out (response code 0) (status code 0) +_site/en/lessons/retired/graph-databases-and-SPARQL.html,816,External link https://collection.britishmuseum.org/id/object/PPA82633 failed: got a time out (response code 0) (status code 0) +_site/en/lessons/retired/graph-databases-and-SPARQL.html,830,External link https://collection.britishmuseum.org/sparql?query=SELECT+*%0D%0AWHERE+%7B%0D%0A++%3Chttp://collection.britishmuseum.org/id/object/PPA82633%3E+?p+?o+.%0D%0A++%7D&_implicit=false&_equivalent=false&_form=/sparql failed: got a time out (response code 0) (status code 0) _site/en/lessons/retired/graph-databases-and-SPARQL.html,928,External link https://collection.britishmuseum.org/sparql#query=PREFIX+bmo:+%3Chttp://www.researchspace.org/ontology/%3E%0APREFIX+skos:+%3Chttp://www.w3.org/2004/02/skos/core%23%3E%0A%0ASELECT+?object%0AWHERE+%7B%0A%0A++%23+Search+for+all+values+of+?object+that+have+a+given+%22object+type%22%0A++?object+bmo:PX_object_type+?object_type+.%0A%0A++%23+That+object+type+should+have+the+label+%22print%22%0A++?object_type+skos:prefLabel+%22print%22+.%0A%7D%0ALIMIT+10 failed: got a time out (response code 0) (status code 0) _site/en/lessons/retired/graph-databases-and-SPARQL.html,1017,"External link https://collection.britishmuseum.org/sparql#query=PREFIX+bmo:+%3Chttp://www.researchspace.org/ontology/%3E%0APREFIX+skos:+%3Chttp://www.w3.org/2004/02/skos/core%23%3E%0APREFIX+ecrm:+%3Chttp://www.cidoc-crm.org/cidoc-crm/%3E%0APREFIX+xsd:+%3Chttp://www.w3.org/2001/XMLSchema%23%3E%0A%0A%23+Return+object+links+and+creation+date%0ASELECT+?object+?date%0AWHERE+%7B%0A%0A++%23+We'll+use+our+previous+command+to+search+only+for%0A++%23+objects+of+type+%22print%22%0A++?object+bmo:PX_object_type+?object_type+.%0A++?object_type+skos:prefLabel+%22print%22+.%0A%0A++%23+We+need+to+link+though+several+nodes+to+find+the%0A++%23+creation+date+associated+with+an+object%0A++?object+ecrm:P108i_was_produced_by+?production+.%0A++?production+ecrm:P9_consists_of+?date_node+.%0A++?date_node+ecrm:P4_has_time-span+?timespan+.%0A++?timespan+ecrm:P82a_begin_of_the_begin+?date+.%0A%0A++%23+As+you+can+see,+we+need+to+connect+quite+a+few+dots%0A++%23+to+get+to+the+date+node!+Now+that+we+have+it,+we+can%0A++%23+filter+our+results.+Because+we+are+filtering+by+date,%0A++%23+we+must+attach+the+tag+%5E%5Exsd:date+after+our+date+strings.%0A++%23+This+tag+tells+the+database+to+interpret+the+string%0A++%23+%221580-01-01%22+as+the+date+1+January+1580.%0A%0A++FILTER(?date+%3E=+%221580-01-01%22%5E%5Exsd:date+&&%0A+++++++++?date+%3C=+%221600-01-01%22%5E%5Exsd:date)%0A%7D failed: got a time out (response code 0) (status code 0)" _site/en/lessons/retired/graph-databases-and-SPARQL.html,1074,"External link https://collection.britishmuseum.org/sparql#query=PREFIX+bmo:+%3Chttp://www.researchspace.org/ontology/%3E%0APREFIX+skos:+%3Chttp://www.w3.org/2004/02/skos/core%23%3E%0APREFIX+ecrm:+%3Chttp://www.cidoc-crm.org/cidoc-crm/%3E%0APREFIX+xsd:+%3Chttp://www.w3.org/2001/XMLSchema%23%3E%0A%0ASELECT+?type+(COUNT(?type)+as+?n)%0AWHERE+%7B%0A++%23+We+still+need+to+indicate+the+?object_type+variable,%0A++%23+however+we+will+not+require+it+to+match+%22print%22+this+time%0A%0A++?object+bmo:PX_object_type+?object_type+.%0A++?object_type+skos:prefLabel+?type+.%0A%0A++%23+Once+again,+we+will+also+filter+by+date%0A++?object+ecrm:P108i_was_produced_by+?production+.%0A++?production+ecrm:P9_consists_of+?date_node+.%0A++?date_node+ecrm:P4_has_time-span+?timespan+.%0A++?timespan+ecrm:P82a_begin_of_the_begin+?date+.%0A++FILTER(?date+%3E=+%221580-01-01%22%5E%5Exsd:date+&&%0A+++++++++?date+%3C=+%221600-01-01%22%5E%5Exsd:date)%0A%7D%0A%23+The+GROUP+BY+command+designates+the+variable+to+tally+by,%0A%23+and+the+ORDER+BY+DESC()+command+sorts+the+results+by%0A%23+descending+number.%0AGROUP+BY+?type%0AORDER+BY+DESC(?n) failed: got a time out (response code 0) (status code 0)" -_site/en/lessons/retired/graph-databases-and-SPARQL.html,1200,External link http://palladio.designhumanities.org/ failed with something very wrong. +_site/en/lessons/retired/graph-databases-and-SPARQL.html,1200,External link https://palladio.designhumanities.org/ failed with something very wrong. _site/en/lessons/retired/graph-databases-and-SPARQL.html,1230,"External link https://collection.britishmuseum.org/sparql?query=%23+Return+object+links+and+creation+date%0D%0APREFIX+bmo:+%3Chttp://collection.britishmuseum.org/id/ontology/%3E%0D%0APREFIX+skos:+%3Chttp://www.w3.org/2004/02/skos/core%23%3E%0D%0APREFIX+ecrm:+%3Chttp://erlangen-crm.org/current/%3E%0D%0APREFIX+xsd:+%3Chttp://www.w3.org/2001/XMLSchema%23%3E%0D%0ASELECT+DISTINCT+?object+?date+?image%0D%0AWHERE+%7B%0D%0A%0D%0A++%23+We'll+use+our+previous+command+to+search+only+for+objects+of+type+%22print%22%0D%0A++?object+bmo:PX_object_type+?object_type+.%0D%0A++?object_type+skos:prefLabel+%22print%22+.%0D%0A%0D%0A++%23+We+need+to+link+though+several+nodes+to+find+the+creation+date+associated%0D%0A++%23+with+an+object%0D%0A++?object+ecrm:P108i_was_produced_by+?production+.%0D%0A++?production+ecrm:P9_consists_of+?date_node+.%0D%0A++?date_node+ecrm:P4_has_time-span+?timespan+.%0D%0A++?timespan+ecrm:P82a_begin_of_the_begin+?date+.%0D%0A%0D%0A++%23+Yes,+we+need+to+connect+quite+a+few+dots+to+get+to+the+date+node!+Now+that%0D%0A++%23+we+have+it,+we+can+filter+our+results.+Because+we+are+filtering+a+date,+we%0D%0A++%23+must+attach+the+xsd:date+tag+to+our+date+strings+so+that+SPARQL+knows+how+to%0D%0A++%23+parse+them.%0D%0A%0D%0A++FILTER(?date+%3E=+%221580-01-01%22%5E%5Exsd:date+%26%26+?date+%3C=+%221600-01-01%22%5E%5Exsd:date)%0D%0A++%0D%0A++?object+bmo:PX_has_main_representation+?image+.%0D%0A%7D%0D%0ALIMIT+100#query=%23+Return+object+links+and+creation+date%0APREFIX+bmo:+%3Chttp://www.researchspace.org/ontology/%3E%0APREFIX+skos:+%3Chttp://www.w3.org/2004/02/skos/core%23%3E%0APREFIX+xsd:+%3Chttp://www.w3.org/2001/XMLSchema%23%3E%0APREFIX+ecrm:+%3Chttp://www.cidoc-crm.org/cidoc-crm/%3E%0ASELECT+DISTINCT+?object+?date+?image%0AWHERE+%7B%0A++%0A++%23+We'll+use+our+previous+command+to+search+only+for+objects+of+type+%22print%22%0A++?object+bmo:PX_object_type+?object_type+.%0A++?object_type+skos:prefLabel+%22print%22+.%0A%0A++%23+We+need+to+link+though+several+nodes+to+find+the+creation+date+associated%0A++%23+with+an+object%0A++?object+ecrm:P108i_was_produced_by+?production+.%0A++?production+ecrm:P9_consists_of+?date_node+.%0A++?date_node+ecrm:P4_has_time-span+?timespan+.%0A++?timespan+ecrm:P82a_begin_of_the_begin+?date+.%0A%0A++%0A++%23+Yes,+we+need+to+connect+quite+a+few+dots+to+get+to+the+date+node!+Now+that%0A++%23+we+have+it,+we+can+filter+our+results.+Because+we+are+filtering+a+date,+we%0A++%23+must+attach+the+xsd:date+tag+to+our+date+strings+so+that+SPARQL+knows+how+to%0A++%23+parse+them.%0A%0A++FILTER(?date+%3E=+%221580-01-01%22%5E%5Exsd:date+&&+?date+%3C=+%221600-01-01%22%5E%5Exsd:date)%0A++%0A++?object+bmo:PX_has_main_representation+?image+.%0A%7D%0ALIMIT+100 failed: got a time out (response code 0) (status code 0)" -_site/en/lessons/retired/graph-databases-and-SPARQL.html,1281,External link http://labs.europeana.eu/api/linked-open-data-SPARQL-endpoint failed with something very wrong. +_site/en/lessons/retired/graph-databases-and-SPARQL.html,1281,External link https://labs.europeana.eu/api/linked-open-data-SPARQL-endpoint failed with something very wrong. _site/en/lessons/retired/graph-databases-and-SPARQL.html,1818,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/retired/graph-databases-and-SPARQL.html,1828,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/retired/graph-databases-and-SPARQL.md failed (status code 429) _site/en/lessons/retired/intro-to-augmented-reality-with-unity.html,303,External link https://github.com/programminghistorian/ph-submissions/issues/17 failed (status code 429) _site/en/lessons/retired/intro-to-augmented-reality-with-unity.html,437,External link https://github.com/programminghistorian/jekyll/issues/717 failed (status code 429) -_site/en/lessons/retired/intro-to-augmented-reality-with-unity.html,490,"External link http://www.digi-capital.com/news/2015/04/augmentedvirtual-reality-to-hit-150-billion-disrupting-mobile-by-2020/#.VbetCU1VhHw failed: http://www.digi-capital.com/news/2015/04/augmentedvirtual-reality-to-hit-150-billion-disrupting-mobile-by-2020/ exists, but the hash '.VbetCU1VhHw' does not (status code 200)" +_site/en/lessons/retired/intro-to-augmented-reality-with-unity.html,490,"External link https://www.digi-capital.com/news/2015/04/augmentedvirtual-reality-to-hit-150-billion-disrupting-mobile-by-2020/#.VbetCU1VhHw failed: https://www.digi-capital.com/news/2015/04/augmentedvirtual-reality-to-hit-150-billion-disrupting-mobile-by-2020/ exists, but the hash '.VbetCU1VhHw' does not (status code 200)" _site/en/lessons/retired/intro-to-augmented-reality-with-unity.html,510,External link https://play.google.com/store/apps/details?id=com.Trace.Dollars&hl=en failed (status code 404) -_site/en/lessons/retired/intro-to-augmented-reality-with-unity.html,516,External link http://docs.unity3d.com/Manual/LearningtheInterface.html failed (status code 404) +_site/en/lessons/retired/intro-to-augmented-reality-with-unity.html,516,External link https://docs.unity3d.com/Manual/LearningtheInterface.html failed (status code 404) _site/en/lessons/retired/intro-to-augmented-reality-with-unity.html,520,External link https://www.aurasma.com/ failed with something very wrong. _site/en/lessons/retired/intro-to-augmented-reality-with-unity.html,571,External link https://www.aurasma.com/ failed with something very wrong. _site/en/lessons/retired/intro-to-augmented-reality-with-unity.html,678,"External link https://developer.android.com/sdk/index.html#Other failed: https://developer.android.com/sdk/index.html exists, but the hash 'Other' does not (status code 200)" -_site/en/lessons/retired/intro-to-augmented-reality-with-unity.html,1335,External link http://docs.unity3d.com/Manual/Transforms.html failed (status code 404) +_site/en/lessons/retired/intro-to-augmented-reality-with-unity.html,1335,External link https://docs.unity3d.com/Manual/Transforms.html failed (status code 404) _site/en/lessons/retired/intro-to-augmented-reality-with-unity.html,2160,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/retired/intro-to-augmented-reality-with-unity.html,2170,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/retired/intro-to-augmented-reality-with-unity.md failed (status code 429) -_site/en/lessons/retired/intro-to-beautiful-soup.html,609,External link http://bioguide.congress.gov/biosearch/biosearch.asp failed: Forbidden (status code 403) +_site/en/lessons/retired/intro-to-beautiful-soup.html,609,External link https://bioguide.congress.gov/biosearch/biosearch.asp failed (status code 403) _site/en/lessons/retired/intro-to-beautiful-soup.html,1720,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/retired/intro-to-beautiful-soup.html,1730,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/retired/intro-to-beautiful-soup.md failed (status code 429) _site/en/lessons/retired/intro-to-the-zotero-api.html,415,External link https://github.com/programminghistorian/jekyll/issues/225 failed (status code 429) @@ -4845,55 +2785,56 @@ _site/en/lessons/sentiment-analysis-syuzhet.html,337,External link https://githu _site/en/lessons/sentiment-analysis-syuzhet.html,1847,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/sentiment-analysis-syuzhet.html,1857,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/sentiment-analysis-syuzhet.md failed (status code 429) _site/en/lessons/sentiment-analysis.html,337,External link https://github.com/programminghistorian/ph-submissions/issues/108 failed (status code 429) -_site/en/lessons/sentiment-analysis.html,549,External link http://journals.sagepub.com/doi/abs/10.1177/1749975514542486 failed (status code 403) -_site/en/lessons/sentiment-analysis.html,622,External link https://github.com/cjhutto/vaderSentiment/blob/master/vaderSentiment/vaderSentiment.py failed (status code 429) +_site/en/lessons/sentiment-analysis.html,549,External link https://journals.sagepub.com/doi/abs/10.1177/1749975514542486 failed (status code 403) _site/en/lessons/sentiment-analysis.html,1443,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/sentiment-analysis.html,1453,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/sentiment-analysis.md failed (status code 429) _site/en/lessons/shiny-leaflet-newspaper-map-tutorial.html,333,External link https://github.com/programminghistorian/ph-submissions/issues/416 failed (status code 429) _site/en/lessons/shiny-leaflet-newspaper-map-tutorial.html,1635,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/shiny-leaflet-newspaper-map-tutorial.html,1645,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/shiny-leaflet-newspaper-map-tutorial.md failed (status code 429) _site/en/lessons/simulating-historical-communication-networks-python.html,339,External link https://github.com/programminghistorian/ph-submissions/issues/605 failed (status code 429) -_site/en/lessons/simulating-historical-communication-networks-python.html,1107,External link https://github.com/projectmesa/mesa/blob/2.4.x-maintenance/mesa/datacollection.py failed (status code 429) -_site/en/lessons/simulating-historical-communication-networks-python.html,1521,External link https://doi.org/10.52842/conf.ecaade.2016.2.485 failed (status code 302) _site/en/lessons/simulating-historical-communication-networks-python.html,1551,External link https://doi.org/10.1177/1059712320922915 failed (status code 403) _site/en/lessons/simulating-historical-communication-networks-python.html,1557,External link https://doi.org/10.1093/oso/9780192857828.001.0001 failed (status code 403) _site/en/lessons/simulating-historical-communication-networks-python.html,1575,External link https://doi.org/10.1098/rsif.2014.0881 failed (status code 403) _site/en/lessons/simulating-historical-communication-networks-python.html,3427,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/simulating-historical-communication-networks-python.html,3437,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/simulating-historical-communication-networks-python.md failed (status code 429) _site/en/lessons/sonification.html,333,External link https://github.com/programminghistorian/ph-submissions/issues/4 failed (status code 429) -_site/en/lessons/sonification.html,521,External link http://blog.taracopplestone.co.uk/making-things-photobashing-as-archaeological-remediation/ failed with something very wrong. -_site/en/lessons/sonification.html,548,External link http://www.icad.org/Proceedings/2008/Hermann2008.pdf failed with something very wrong. -_site/en/lessons/sonification.html,625,External link http://www.lynda.com/GarageBand-tutorials/Importing-audio-tracks/156620/164050-4.html failed (status code 404) -_site/en/lessons/sonification.html,997,External link http://puffin.creighton.edu/jesuit/relations/ failed with something very wrong. -_site/en/lessons/sonification.html,1083,External link http://www.jstor.org/stable/734136 failed (status code 403) -_site/en/lessons/sonification.html,1085,External link http://www.icad.org/Proceedings/2008/Hermann2008.pdf failed with something very wrong. +_site/en/lessons/sonification.html,521,External link https://blog.taracopplestone.co.uk/making-things-photobashing-as-archaeological-remediation/ failed with something very wrong. +_site/en/lessons/sonification.html,548,External link https://www.icad.org/Proceedings/2008/Hermann2008.pdf failed with something very wrong. +_site/en/lessons/sonification.html,625,External link https://www.lynda.com/GarageBand-tutorials/Importing-audio-tracks/156620/164050-4.html failed (status code 404) +_site/en/lessons/sonification.html,836,External link https://abcnotation.com/wiki/abc:standard:v2.1 failed: Not Found (status code 404) +_site/en/lessons/sonification.html,997,External link https://puffin.creighton.edu/jesuit/relations/ failed with something very wrong. +_site/en/lessons/sonification.html,1060,External link https://www.lilypond.org/ failed with something very wrong. +_site/en/lessons/sonification.html,1083,External link https://www.jstor.org/stable/734136 failed (status code 403) +_site/en/lessons/sonification.html,1085,External link https://www.icad.org/Proceedings/2008/Hermann2008.pdf failed with something very wrong. _site/en/lessons/sonification.html,1624,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/sonification.html,1634,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/sonification.md failed (status code 429) _site/en/lessons/space-place-gazetteers.html,335,External link https://github.com/programminghistorian/ph-submissions/issues/580 failed (status code 429) -_site/en/lessons/space-place-gazetteers.html,547,"External link http://bombsight.org/#17/51.50595/-0.10680 failed: http://bombsight.org/ exists, but the hash '17/51.50595/-0.10680' does not (status code 200)" +_site/en/lessons/space-place-gazetteers.html,547,"External link https://bombsight.org/#17/51.50595/-0.10680 failed: https://bombsight.org/ exists, but the hash '17/51.50595/-0.10680' does not (status code 200)" _site/en/lessons/space-place-gazetteers.html,1451,External link https://doi.org/10.1145/3485447.3512026 failed (status code 403) _site/en/lessons/space-place-gazetteers.html,1463,External link https://doi.org/10.1111/j.1467-8306.2005.00481.x failed (status code 403) _site/en/lessons/space-place-gazetteers.html,2438,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/space-place-gazetteers.html,2448,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/space-place-gazetteers.md failed (status code 429) -_site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html,737,External link https://github.com/dhcolumbia/pandoc-workflow/blob/master/pandoctut.bib failed (status code 429) _site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html,1204,"External link https://groups.google.com/forum/#!forum/pandoc-discuss failed: https://groups.google.com/forum/ exists, but the hash '!forum/pandoc-discuss' does not (status code 200)" -_site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html,1207,External link http://stackoverflow.com/questions/tagged/pandoc failed (status code 403) -_site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html,1218,External link http://mouapp.com/ failed with something very wrong. -_site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html,1223,External link http://www.authorea.com failed: Forbidden (status code 403) -_site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html,1224,External link http://www.draftin.com failed: Service Unavailable (status code 503) +_site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html,1207,External link https://stackoverflow.com/questions/tagged/pandoc failed (status code 403) +_site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html,1218,External link https://mouapp.com/ failed with something very wrong. +_site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html,1224,External link https://www.draftin.com failed with something very wrong. _site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html,1269,External link https://github.com/programminghistorian/jekyll/issues/46#issuecomment-59219906 failed (status code 429) _site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html,2246,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.html,2256,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown.md failed (status code 429) _site/en/lessons/temporal-network-analysis-with-r.html,338,External link https://github.com/programminghistorian/ph-submissions/issues/179 failed (status code 429) _site/en/lessons/temporal-network-analysis-with-r.html,1777,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/temporal-network-analysis-with-r.html,1787,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/temporal-network-analysis-with-r.md failed (status code 429) -_site/en/lessons/text-mining-with-extracted-features.html,630,External link http://stackoverflow.com/a/19350234/233577 failed (status code 403) -_site/en/lessons/text-mining-with-extracted-features.html,1800,External link https://github.com/htrc/htrc-feature-reader/blob/master/README.ipynb failed (status code 429) -_site/en/lessons/text-mining-with-extracted-features.html,1804,External link https://github.com/htrc/htrc-feature-reader/blob/master/examples/Within-Book%20Sentiment%20Trends.ipynb failed (status code 429) -_site/en/lessons/text-mining-with-extracted-features.html,1854,External link https://github.com/htrc/htrc-feature-reader/blob/master/examples/ID_to_Rsync_Link.ipynb failed (status code 429) +_site/en/lessons/text-mining-with-extracted-features.html,352,External link https://github.com/programminghistorian/ph-submissions/issues/29 failed (status code 429) +_site/en/lessons/text-mining-with-extracted-features.html,630,External link https://stackoverflow.com/a/19350234/233577 failed (status code 403) _site/en/lessons/text-mining-with-extracted-features.html,1854,External link https://wiki.ucop.edu/display/Curation/PairTree failed with something very wrong. _site/en/lessons/text-mining-with-extracted-features.html,2838,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/text-mining-with-extracted-features.html,2848,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/text-mining-with-extracted-features.md failed (status code 429) +_site/en/lessons/text-mining-youtube-comments.html,337,External link https://github.com/programminghistorian/ph-submissions/issues/374 failed (status code 429) +_site/en/lessons/text-mining-youtube-comments.html,524,External link https://www.allsides.com failed (status code 403) +_site/en/lessons/text-mining-youtube-comments.html,876,External link https://www.wordfish.org/ failed with something very wrong. +_site/en/lessons/text-mining-youtube-comments.html,876,External link https://www.wordfish.org/software.html failed with something very wrong. +_site/en/lessons/text-mining-youtube-comments.html,906,External link https://www.wordfish.org/ failed with something very wrong. +_site/en/lessons/text-mining-youtube-comments.html,906,External link https://www.wordfish.org/software.html failed with something very wrong. _site/en/lessons/text-mining-youtube-comments.html,988,External link https://doi.org/10.1111/j.1540-5907.2008.00338.x failed (status code 403) _site/en/lessons/text-mining-youtube-comments.html,1181,External link https://doi.org/10.1080/14799855.2012.669207 failed (status code 403) _site/en/lessons/text-mining-youtube-comments.html,1181,External link https://doi.org/10.1073/pnas.2101967118 failed (status code 403) @@ -4904,49 +2845,56 @@ _site/en/lessons/text-mining-youtube-comments.html,1187,External link https://do _site/en/lessons/text-mining-youtube-comments.html,1202,External link https://doi.org/10.1111/j.1540-5907.2008.00338.x failed (status code 403) _site/en/lessons/text-mining-youtube-comments.html,2614,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/text-mining-youtube-comments.html,2624,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/text-mining-youtube-comments.md failed (status code 429) -_site/en/lessons/topic-modeling-and-mallet.html,545,External link http://www.worldcat.org/title/reading-machines-toward-an-algorithmic-criticism/oclc/708761605&referer=brief_results failed: Forbidden (status code 403) -_site/en/lessons/topic-modeling-and-mallet.html,1228,External link http://dl.acm.org/citation.cfm?id=944937 failed (status code 403) +_site/en/lessons/topic-modeling-and-mallet.html,1225,External link https://web.archive.org/web/20160704150726/https://www.lisarhody.com:80/some-assembly-required/ failed (status code 404) +_site/en/lessons/topic-modeling-and-mallet.html,1228,External link https://dl.acm.org/citation.cfm?id=944937 failed (status code 403) _site/en/lessons/topic-modeling-and-mallet.html,2649,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/topic-modeling-and-mallet.html,2659,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/topic-modeling-and-mallet.md failed (status code 429) +_site/en/lessons/transcribing-handwritten-text-with-python-and-azure.html,333,External link https://github.com/programminghistorian/ph-submissions/issues/511 failed (status code 429) _site/en/lessons/transcribing-handwritten-text-with-python-and-azure.html,1664,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/transcribing-handwritten-text-with-python-and-azure.html,1674,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/transcribing-handwritten-text-with-python-and-azure.md failed (status code 429) +_site/en/lessons/transforming-xml-with-xsl.html,333,External link https://github.com/programminghistorian/ph-submissions/issues/11 failed (status code 429) _site/en/lessons/transforming-xml-with-xsl.html,535,External link https://irt.kcl.ac.uk/irt2009/ failed with something very wrong. _site/en/lessons/transforming-xml-with-xsl.html,685,External link https://www.java.com/en/download/ failed (status code 403) +_site/en/lessons/transforming-xml-with-xsl.html,728,External link https://scissors-and-paste.net failed with something very wrong. _site/en/lessons/transforming-xml-with-xsl.html,1274,External link https://stackoverflow.com/questions/16811332/cannot-run-java-from-the-windows-powershell-command-prompt failed (status code 403) -_site/en/lessons/transforming-xml-with-xsl.html,1279,External link https://www.computerhope.com/issues/ch000549.htm failed (status code 403) _site/en/lessons/transforming-xml-with-xsl.html,1280,External link https://stackoverflow.com/questions/22465332/setting-path-environment-variable-in-osx-permanently failed (status code 403) _site/en/lessons/transforming-xml-with-xsl.html,1835,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/transforming-xml-with-xsl.html,1845,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/transforming-xml-with-xsl.md failed (status code 429) _site/en/lessons/transliterating.html,1426,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/transliterating.html,1436,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/transliterating.md failed (status code 429) +_site/en/lessons/understanding-creating-word-embeddings.html,337,External link https://github.com/programminghistorian/ph-submissions/issues/555 failed (status code 429) _site/en/lessons/understanding-creating-word-embeddings.html,989,External link https://doi.org/10.1080/01615440.2020.1760157 failed (status code 403) _site/en/lessons/understanding-creating-word-embeddings.html,2404,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/understanding-creating-word-embeddings.html,2414,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/understanding-creating-word-embeddings.md failed (status code 429) -_site/en/lessons/understanding-regular-expressions.html,628,"External link http://archive.org/stream/jstor-4560629/4560629#page/n0/mode/2up failed: http://archive.org/stream/jstor-4560629/4560629 exists, but the hash 'page/n0/mode/2up' does not (status code 200)" +_site/en/lessons/understanding-regular-expressions.html,628,"External link https://archive.org/stream/jstor-4560629/4560629#page/n0/mode/2up failed: https://archive.org/stream/jstor-4560629/4560629 exists, but the hash 'page/n0/mode/2up' does not (status code 200)" +_site/en/lessons/understanding-regular-expressions.html,1430,External link https://dh.obdurodon.org/regex.html failed with something very wrong. _site/en/lessons/understanding-regular-expressions.html,1967,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/understanding-regular-expressions.html,1977,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/understanding-regular-expressions.md failed (status code 429) _site/en/lessons/up-and-running-with-omeka.html,1442,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/up-and-running-with-omeka.html,1452,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/up-and-running-with-omeka.md failed (status code 429) +_site/en/lessons/urban-demographic-data-r-ggplot2.html,335,External link https://github.com/programminghistorian/ph-submissions/issues/606 failed (status code 429) _site/en/lessons/urban-demographic-data-r-ggplot2.html,1203,External link https://doi.org/10.1198/jcgs.2009.07098 failed (status code 403) +_site/en/lessons/urban-demographic-data-r-ggplot2.html,1227,External link https://www.cookbook-r.com/Graphs/ failed with something very wrong. _site/en/lessons/urban-demographic-data-r-ggplot2.html,2207,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/urban-demographic-data-r-ggplot2.html,2217,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/urban-demographic-data-r-ggplot2.md failed (status code 429) -_site/en/lessons/using-javascript-to-create-maps.html,744,External link http://stackoverflow.com/questions/16151018/npm-throws-error-without-sudo/24404451#24404451 failed (status code 403) +_site/en/lessons/using-javascript-to-create-maps.html,340,External link https://github.com/programminghistorian/ph-submissions/issues/32 failed (status code 429) +_site/en/lessons/using-javascript-to-create-maps.html,744,External link https://stackoverflow.com/questions/16151018/npm-throws-error-without-sudo/24404451#24404451 failed (status code 403) _site/en/lessons/using-javascript-to-create-maps.html,1725,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/using-javascript-to-create-maps.html,1735,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/using-javascript-to-create-maps.md failed (status code 429) _site/en/lessons/vector-layers-qgis.html,2646,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/vector-layers-qgis.html,2656,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/vector-layers-qgis.md failed (status code 429) _site/en/lessons/viewing-html-files.html,1609,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/viewing-html-files.html,1619,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/viewing-html-files.md failed (status code 429) -_site/en/lessons/visualizing-with-bokeh.html,612,External link https://github.com/programminghistorian/ph-submissions/tree/gh-pages/assets/visualizing-with-bokeh/visualizing-with-bokeh.ipynb failed (status code 429) -_site/en/lessons/visualizing-with-bokeh.html,626,External link https://github.com/programminghistorian/ph-submissions/tree/gh-pages/assets/visualizing-with-bokeh/visualizing-with-bokeh.ipynb failed (status code 429) +_site/en/lessons/visualizing-with-bokeh.html,333,External link https://github.com/programminghistorian/ph-submissions/issues/152 failed (status code 429) _site/en/lessons/visualizing-with-bokeh.html,723,"External link https://pandas.pydata.org/pandas-docs/stable/tutorials.html#lessons-for-new-pandas-users failed: https://pandas.pydata.org/pandas-docs/stable/tutorials.html exists, but the hash 'lessons-for-new-pandas-users' does not (status code 200)" _site/en/lessons/visualizing-with-bokeh.html,739,"External link https://pandas.pydata.org/pandas-docs/stable/api.html#input-output failed: https://pandas.pydata.org/pandas-docs/stable/api.html exists, but the hash 'input-output' does not (status code 200)" -_site/en/lessons/visualizing-with-bokeh.html,1101,"External link http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases failed: http://pandas.pydata.org/pandas-docs/stable/timeseries.html exists, but the hash 'offset-aliases' does not (status code 200)" -_site/en/lessons/visualizing-with-bokeh.html,1190,"External link http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases failed: http://pandas.pydata.org/pandas-docs/stable/timeseries.html exists, but the hash 'offset-aliases' does not (status code 200)" +_site/en/lessons/visualizing-with-bokeh.html,1101,"External link https://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases failed: https://pandas.pydata.org/pandas-docs/stable/timeseries.html exists, but the hash 'offset-aliases' does not (status code 200)" +_site/en/lessons/visualizing-with-bokeh.html,1190,"External link https://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases failed: https://pandas.pydata.org/pandas-docs/stable/timeseries.html exists, but the hash 'offset-aliases' does not (status code 200)" _site/en/lessons/visualizing-with-bokeh.html,1890,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/visualizing-with-bokeh.html,1900,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/visualizing-with-bokeh.md failed (status code 429) _site/en/lessons/windows-installation.html,1719,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/windows-installation.html,1729,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/windows-installation.md failed (status code 429) +_site/en/lessons/working-with-batches-of-pdf-files.html,333,External link https://github.com/programminghistorian/ph-submissions/issues/258 failed (status code 429) _site/en/lessons/working-with-batches-of-pdf-files.html,707,"External link https://manpages.ubuntu.com/manpages/bionic/en/man1/grep.1.html#regular%20expressions failed: https://manpages.ubuntu.com/manpages/bionic/en/man1/grep.1.html exists, but the hash 'regular%20expressions' does not (status code 200)" _site/en/lessons/working-with-batches-of-pdf-files.html,1414,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/lessons/working-with-batches-of-pdf-files.html,1424,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/working-with-batches-of-pdf-files.md failed (status code 429) @@ -4956,39 +2904,37 @@ _site/en/lessons/working-with-web-pages.html,1731,External link https://github.c _site/en/lessons/working-with-web-pages.html,1741,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/lessons/working-with-web-pages.md failed (status code 429) _site/en/privacy-policy.html,353,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/privacy-policy.html,363,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/privacy-policy.md failed (status code 429) -_site/en/project-team.html,308,External link http://twitter.com/maxcarlons failed (status code 400) -_site/en/project-team.html,510,External link http://twitter.com/cosovschi failed (status code 400) +_site/en/project-team.html,308,External link https://twitter.com/maxcarlons failed (status code 400) +_site/en/project-team.html,510,External link https://twitter.com/cosovschi failed (status code 400) _site/en/project-team.html,1270,External link https://www.nabeelsiddiqui.net/ failed with something very wrong. -_site/en/project-team.html,1276,External link http://twitter.com/nabsiddiqui failed (status code 400) -_site/en/project-team.html,1629,External link http://twitter.com/giulia_taurino failed (status code 400) -_site/en/project-team.html,1808,External link http://twitter.com/alexwermercolan failed (status code 400) -_site/en/project-team.html,2057,External link http://www.mariajoseafanador.com failed: Moved Permanently (status code 301) -_site/en/project-team.html,2063,External link http://twitter.com/mariajoafana failed (status code 400) -_site/en/project-team.html,2532,External link http://twitter.com/IsaGribomont failed (status code 400) -_site/en/project-team.html,2743,External link http://twitter.com/espejolento failed (status code 400) -_site/en/project-team.html,3034,External link http://twitter.com/jenniferisve failed (status code 400) -_site/en/project-team.html,3359,External link http://twitter.com/enetreseles failed (status code 400) -_site/en/project-team.html,3566,External link http://twitter.com/jgob failed (status code 400) -_site/en/project-team.html,3861,External link http://twitter.com/rivaquiroga failed (status code 400) -_site/en/project-team.html,4802,External link http://twitter.com/superHH failed (status code 400) -_site/en/project-team.html,5188,External link http://twitter.com/emilienschultz failed (status code 400) -_site/en/project-team.html,5315,External link http://twitter.com/davvalent failed (status code 400) -_site/en/project-team.html,5840,External link http://twitter.com/danielalvesfcsh failed (status code 400) -_site/en/project-team.html,6105,External link http://twitter.com/ericbrasiln failed (status code 400) -_site/en/project-team.html,6541,External link http://twitter.com/jimmy_medeiros failed (status code 400) -_site/en/project-team.html,7025,External link http://twitter.com/araceletorres failed (status code 400) -_site/en/project-team.html,7284,External link http://twitter.com/j_w_baker failed (status code 400) -_site/en/project-team.html,7725,External link http://twitter.com/Adam_Crymble failed (status code 400) -_site/en/project-team.html,8256,External link http://twitter.com/jenniferisve failed (status code 400) -_site/en/project-team.html,8587,External link http://twitter.com/rivaquiroga failed (status code 400) -_site/en/project-team.html,8876,External link http://twitter.com/amsichani failed (status code 400) -_site/en/project-team.html,9219,External link http://twitter.com/AnisaHawes failed (status code 400) +_site/en/project-team.html,1276,External link https://twitter.com/nabsiddiqui failed (status code 400) +_site/en/project-team.html,1629,External link https://twitter.com/giulia_taurino failed (status code 400) +_site/en/project-team.html,1808,External link https://twitter.com/alexwermercolan failed (status code 400) +_site/en/project-team.html,2057,External link https://www.mariajoseafanador.com failed with something very wrong. +_site/en/project-team.html,2063,External link https://twitter.com/mariajoafana failed (status code 400) +_site/en/project-team.html,2532,External link https://twitter.com/IsaGribomont failed (status code 400) +_site/en/project-team.html,2743,External link https://twitter.com/espejolento failed (status code 400) +_site/en/project-team.html,3034,External link https://twitter.com/jenniferisve failed (status code 400) +_site/en/project-team.html,3359,External link https://twitter.com/enetreseles failed (status code 400) +_site/en/project-team.html,3566,External link https://twitter.com/jgob failed (status code 400) +_site/en/project-team.html,3861,External link https://twitter.com/rivaquiroga failed (status code 400) +_site/en/project-team.html,4802,External link https://twitter.com/superHH failed (status code 400) +_site/en/project-team.html,5188,External link https://twitter.com/emilienschultz failed (status code 400) +_site/en/project-team.html,5315,External link https://twitter.com/davvalent failed (status code 400) +_site/en/project-team.html,5840,External link https://twitter.com/danielalvesfcsh failed (status code 400) +_site/en/project-team.html,6105,External link https://twitter.com/ericbrasiln failed (status code 400) +_site/en/project-team.html,6541,External link https://twitter.com/jimmy_medeiros failed (status code 400) +_site/en/project-team.html,7025,External link https://twitter.com/araceletorres failed (status code 400) +_site/en/project-team.html,7284,External link https://twitter.com/j_w_baker failed (status code 400) +_site/en/project-team.html,7725,External link https://twitter.com/Adam_Crymble failed (status code 400) +_site/en/project-team.html,8256,External link https://twitter.com/jenniferisve failed (status code 400) +_site/en/project-team.html,8587,External link https://twitter.com/rivaquiroga failed (status code 400) +_site/en/project-team.html,8876,External link https://twitter.com/amsichani failed (status code 400) +_site/en/project-team.html,9219,External link https://twitter.com/AnisaHawes failed (status code 400) _site/en/project-team.html,10039,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/project-team.html,10049,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/project-team.md failed (status code 429) _site/en/research.html,264,External link https://academic.oup.com/jah/article-abstract/103/1/299/1751315 failed (status code 403) -_site/en/research.html,265,External link http://jitp.commons.gc.cuny.edu/review-of-the-programming-historian/ failed: got a time out (response code 301) (status code 301) -_site/en/research.html,280,External link https://novaresearch.unl.pt/files/32228034/Ensinar_Humanidades_Digitais.pdf failed (status code 403) -_site/en/research.html,283,External link http://www.tandfonline.com/doi/full/10.1080/13555502.2017.1301179 failed (status code 403) +_site/en/research.html,283,External link https://www.tandfonline.com/doi/full/10.1080/13555502.2017.1301179 failed (status code 403) _site/en/research.html,327,External link https://www.history.ac.uk/our-century/centenary-events/training-teacher-giving-your-first-digital-history-workshop failed (status code 403) _site/en/research.html,363,External link https://www.caurj.gov.br/seminario-solare-reune-desenvolvedores-internacionais-de-software-livre-para-arquitetura-e-urbanismo/ failed (status code 403) _site/en/research.html,370,External link https://www.jisc.ac.uk/events/digifest-2023/programme failed (status code 404) @@ -4996,87 +2942,88 @@ _site/en/research.html,376,External link https://dcdcconference.com/ failed with _site/en/research.html,387,External link https://openpublishingfest.org/calendar.html#event-69/ failed: got a time out (response code 0) (status code 0) _site/en/research.html,391,External link https://2021.dhbenelux.org/schedule/ failed with something very wrong. _site/en/research.html,393,"External link https://msuglobaldh.org/abstracts/#programming-historian failed: https://msuglobaldh.org/abstracts/ exists, but the hash 'programming-historian' does not (status code 200)" +_site/en/research.html,395,External link https://ixa2.si.ehu.eus/intele/?q=webinars failed with something very wrong. _site/en/research.html,483,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/research.html,493,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/research.md failed (status code 429) _site/en/reviewer-guidelines.html,403,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/reviewer-guidelines.html,413,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/reviewer-guidelines.md failed (status code 429) +_site/en/supporters.html,280,External link https://www.sas.ac.uk/ failed (status code 403) _site/en/supporters.html,292,External link https://www.tilburguniversity.edu/ failed (status code 403) +_site/en/supporters.html,304,External link https://www.history.ac.uk/library-digital failed (status code 403) _site/en/supporters.html,334,"External link https://www.sussex.ac.uk/collaborate/business/public-funds#:~:text=Impact%20accelerator%20funds,-From%20law%20to&text=The%20ESRC%20and%20AHRC%20Impact,businesses%20through%20to%20large%20companies failed: https://www.sussex.ac.uk/collaborate/business/public-funds exists, but the hash ':~:text=Impact%20accelerator%20funds,-From%20law%20to&text=The%20ESRC%20and%20AHRC%20Impact,businesses%20through%20to%20large%20companies' does not (status code 200)" -_site/en/supporters.html,335,External link https://www.thebritishacademy.ac.uk/projects/writing-workshops-2018-digital-humanities/ failed (status code 403) _site/en/supporters.html,434,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/supporters.html,444,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/supporters.md failed (status code 429) _site/en/translator-guidelines.html,356,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/translator-guidelines.html,366,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/translator-guidelines.md failed (status code 429) _site/en/vacancies.html,290,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/en/vacancies.html,300,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/vacancies.md failed (status code 429) +_site/es/acerca-de.html,269,External link https://dhawards.org/dhawards2022/results/ failed with something very wrong. +_site/es/acerca-de.html,269,External link https://dhawards.org/dhawards2016/results/ failed with something very wrong. +_site/es/acerca-de.html,269,External link https://dhawards.org/dhawards2017/results/ failed with something very wrong. _site/es/acerca-de.html,269,External link https://openpublishingawards.org/results/2021/index.html failed with something very wrong. _site/es/acerca-de.html,326,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/acerca-de.html,336,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/acerca-de.md failed (status code 429) +_site/es/colaboradores.html,281,External link https://www.sas.ac.uk/ failed (status code 403) _site/es/colaboradores.html,293,External link https://www.tilburguniversity.edu/ failed (status code 403) +_site/es/colaboradores.html,305,External link https://www.history.ac.uk/library-digital failed (status code 403) _site/es/colaboradores.html,335,"External link https://www.sussex.ac.uk/collaborate/business/public-funds#:~:text=Impact%20accelerator%20funds,-From%20law%20to&text=The%20ESRC%20and%20AHRC%20Impact,businesses%20through%20to%20large%20companies failed: https://www.sussex.ac.uk/collaborate/business/public-funds exists, but the hash ':~:text=Impact%20accelerator%20funds,-From%20law%20to&text=The%20ESRC%20and%20AHRC%20Impact,businesses%20through%20to%20large%20companies' does not (status code 200)" -_site/es/colaboradores.html,336,External link https://www.thebritishacademy.ac.uk/projects/writing-workshops-2018-digital-humanities/ failed (status code 403) _site/es/colaboradores.html,435,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/colaboradores.html,445,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/colaboradores.md failed (status code 429) -_site/es/contribuciones.html,258,External link http://www.mecd.gob.es/planes-nacionales/dam/jcr:f20a4ba1-0ed2-445d-9be9-b8b0382562ea/mex-glosario-interpares-total0112.pdf failed with something very wrong. +_site/es/contribuciones.html,258,External link https://www.mecd.gob.es/planes-nacionales/dam/jcr:f20a4ba1-0ed2-445d-9be9-b8b0382562ea/mex-glosario-interpares-total0112.pdf failed with something very wrong. +_site/es/contribuciones.html,303,External link https://www.worldcat.org/title/programming-historian-en-espanol/oclc/1061292935&referer=brief_results failed (status code 403) _site/es/contribuciones.html,357,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/contribuciones.html,367,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/contribuciones.md failed (status code 429) _site/es/donaciones.html,265,External link https://www.patreon.com/join/theprogramminghistorian failed (status code 403) _site/es/donaciones.html,321,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/donaciones.html,331,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/donaciones.md failed (status code 429) -_site/es/equipo-de-proyecto.html,306,External link http://twitter.com/maxcarlons failed (status code 400) -_site/es/equipo-de-proyecto.html,508,External link http://twitter.com/cosovschi failed (status code 400) +_site/es/equipo-de-proyecto.html,306,External link https://twitter.com/maxcarlons failed (status code 400) +_site/es/equipo-de-proyecto.html,508,External link https://twitter.com/cosovschi failed (status code 400) _site/es/equipo-de-proyecto.html,1268,External link https://www.nabeelsiddiqui.net/ failed with something very wrong. -_site/es/equipo-de-proyecto.html,1274,External link http://twitter.com/nabsiddiqui failed (status code 400) -_site/es/equipo-de-proyecto.html,1627,External link http://twitter.com/giulia_taurino failed (status code 400) -_site/es/equipo-de-proyecto.html,1806,External link http://twitter.com/alexwermercolan failed (status code 400) -_site/es/equipo-de-proyecto.html,2055,External link http://www.mariajoseafanador.com failed: Moved Permanently (status code 301) -_site/es/equipo-de-proyecto.html,2061,External link http://twitter.com/mariajoafana failed (status code 400) -_site/es/equipo-de-proyecto.html,2530,External link http://twitter.com/IsaGribomont failed (status code 400) -_site/es/equipo-de-proyecto.html,2741,External link http://twitter.com/espejolento failed (status code 400) -_site/es/equipo-de-proyecto.html,3032,External link http://twitter.com/jenniferisve failed (status code 400) -_site/es/equipo-de-proyecto.html,3357,External link http://twitter.com/enetreseles failed (status code 400) -_site/es/equipo-de-proyecto.html,3564,External link http://twitter.com/jgob failed (status code 400) -_site/es/equipo-de-proyecto.html,3859,External link http://twitter.com/rivaquiroga failed (status code 400) -_site/es/equipo-de-proyecto.html,4800,External link http://twitter.com/superHH failed (status code 400) -_site/es/equipo-de-proyecto.html,5186,External link http://twitter.com/emilienschultz failed (status code 400) -_site/es/equipo-de-proyecto.html,5313,External link http://twitter.com/davvalent failed (status code 400) -_site/es/equipo-de-proyecto.html,5838,External link http://twitter.com/danielalvesfcsh failed (status code 400) -_site/es/equipo-de-proyecto.html,6103,External link http://twitter.com/ericbrasiln failed (status code 400) -_site/es/equipo-de-proyecto.html,6539,External link http://twitter.com/jimmy_medeiros failed (status code 400) -_site/es/equipo-de-proyecto.html,7023,External link http://twitter.com/araceletorres failed (status code 400) -_site/es/equipo-de-proyecto.html,7282,External link http://twitter.com/j_w_baker failed (status code 400) -_site/es/equipo-de-proyecto.html,7723,External link http://twitter.com/Adam_Crymble failed (status code 400) -_site/es/equipo-de-proyecto.html,8254,External link http://twitter.com/jenniferisve failed (status code 400) -_site/es/equipo-de-proyecto.html,8585,External link http://twitter.com/rivaquiroga failed (status code 400) -_site/es/equipo-de-proyecto.html,8874,External link http://twitter.com/amsichani failed (status code 400) -_site/es/equipo-de-proyecto.html,9217,External link http://twitter.com/AnisaHawes failed (status code 400) +_site/es/equipo-de-proyecto.html,1274,External link https://twitter.com/nabsiddiqui failed (status code 400) +_site/es/equipo-de-proyecto.html,1627,External link https://twitter.com/giulia_taurino failed (status code 400) +_site/es/equipo-de-proyecto.html,1806,External link https://twitter.com/alexwermercolan failed (status code 400) +_site/es/equipo-de-proyecto.html,2055,External link https://www.mariajoseafanador.com failed with something very wrong. +_site/es/equipo-de-proyecto.html,2061,External link https://twitter.com/mariajoafana failed (status code 400) +_site/es/equipo-de-proyecto.html,2530,External link https://twitter.com/IsaGribomont failed (status code 400) +_site/es/equipo-de-proyecto.html,2741,External link https://twitter.com/espejolento failed (status code 400) +_site/es/equipo-de-proyecto.html,3032,External link https://twitter.com/jenniferisve failed (status code 400) +_site/es/equipo-de-proyecto.html,3357,External link https://twitter.com/enetreseles failed (status code 400) +_site/es/equipo-de-proyecto.html,3564,External link https://twitter.com/jgob failed (status code 400) +_site/es/equipo-de-proyecto.html,3859,External link https://twitter.com/rivaquiroga failed (status code 400) +_site/es/equipo-de-proyecto.html,4800,External link https://twitter.com/superHH failed (status code 400) +_site/es/equipo-de-proyecto.html,5186,External link https://twitter.com/emilienschultz failed (status code 400) +_site/es/equipo-de-proyecto.html,5313,External link https://twitter.com/davvalent failed (status code 400) +_site/es/equipo-de-proyecto.html,5838,External link https://twitter.com/danielalvesfcsh failed (status code 400) +_site/es/equipo-de-proyecto.html,6103,External link https://twitter.com/ericbrasiln failed (status code 400) +_site/es/equipo-de-proyecto.html,6539,External link https://twitter.com/jimmy_medeiros failed (status code 400) +_site/es/equipo-de-proyecto.html,7023,External link https://twitter.com/araceletorres failed (status code 400) +_site/es/equipo-de-proyecto.html,7282,External link https://twitter.com/j_w_baker failed (status code 400) +_site/es/equipo-de-proyecto.html,7723,External link https://twitter.com/Adam_Crymble failed (status code 400) +_site/es/equipo-de-proyecto.html,8254,External link https://twitter.com/jenniferisve failed (status code 400) +_site/es/equipo-de-proyecto.html,8585,External link https://twitter.com/rivaquiroga failed (status code 400) +_site/es/equipo-de-proyecto.html,8874,External link https://twitter.com/amsichani failed (status code 400) +_site/es/equipo-de-proyecto.html,9217,External link https://twitter.com/AnisaHawes failed (status code 400) _site/es/equipo-de-proyecto.html,10037,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/equipo-de-proyecto.html,10047,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/equipo-de-proyecto.md failed (status code 429) _site/es/eventos.html,296,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/eventos.html,306,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/eventos.md failed (status code 429) _site/es/guia-editor.html,347,External link https://github.com/programminghistorian/ph-submissions/commits/gh-pages failed (status code 429) -_site/es/guia-editor.html,365,External link https://github.com/programminghistorian/ph-submissions/blob/gh-pages/es/PLANTILLA-TRADUCCION.md failed (status code 429) -_site/es/guia-editor.html,386,External link https://github.com/programminghistorian/ph-submissions/blob/gh-pages/es/PLANTILLA-LECCION.md failed (status code 429) _site/es/guia-editor.html,505,"External link https://zenodo.org/record/49873#.V0lazGaGa7o failed: https://zenodo.org/record/49873 exists, but the hash '.V0lazGaGa7o' does not (status code 200)" -_site/es/guia-editor.html,617,External link http://www.europeana.eu/portal/en failed: Forbidden (status code 403) +_site/es/guia-editor.html,617,External link https://www.europeana.eu/portal/en failed (status code 403) _site/es/guia-editor.html,645,"External link https://docs.google.com/spreadsheets/d/1o-C-3WwfcEYWipIFb112tkuM-XOI8pVVpA9_sag9Ph8/edit#gid=904817529 failed: https://docs.google.com/spreadsheets/d/1o-C-3WwfcEYWipIFb112tkuM-XOI8pVVpA9_sag9Ph8/edit exists, but the hash 'gid=904817529' does not (status code 200)" _site/es/guia-editor.html,798,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/guia-editor.html,808,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/guia-editor.md failed (status code 429) _site/es/guia-para-autores.html,265,"External link https://docs.google.com/spreadsheets/d/1vrvZTygZLfQRoQildD667Xcgzhf_reQC8Nq4OD-BRIA/edit#gid=0 failed: https://docs.google.com/spreadsheets/d/1vrvZTygZLfQRoQildD667Xcgzhf_reQC8Nq4OD-BRIA/edit exists, but the hash 'gid=0' does not (status code 200)" _site/es/guia-para-autores.html,683,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/guia-para-autores.html,693,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/guia-para-autores.md failed (status code 429) -_site/es/guia-para-revisores.html,336,External link http://www.mecd.gob.es/planes-nacionales/dam/jcr:f20a4ba1-0ed2-445d-9be9-b8b0382562ea/mex-glosario-interpares-total0112.pdf failed with something very wrong. +_site/es/guia-para-revisores.html,336,External link https://www.mecd.gob.es/planes-nacionales/dam/jcr:f20a4ba1-0ed2-445d-9be9-b8b0382562ea/mex-glosario-interpares-total0112.pdf failed with something very wrong. _site/es/guia-para-revisores.html,444,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/guia-para-revisores.html,454,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/guia-para-revisores.md failed (status code 429) -_site/es/guia-para-traductores.html,260,External link https://github.com/programminghistorian/ph-submissions/blob/gh-pages/es/lista-de-traducciones.md failed (status code 429) _site/es/guia-para-traductores.html,638,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/guia-para-traductores.html,648,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/guia-para-traductores.md failed (status code 429) _site/es/index.html,329,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/index.html,339,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/index.md failed (status code 429) -_site/es/investigacion.html,264,External link http://jitp.commons.gc.cuny.edu/review-of-the-programming-historian failed: got a time out (response code 301) (status code 301) -_site/es/investigacion.html,265,External link http://jitp.commons.gc.cuny.edu/review-of-the-programming-historian failed: got a time out (response code 301) (status code 301) -_site/es/investigacion.html,280,External link https://novaresearch.unl.pt/files/32228034/Ensinar_Humanidades_Digitais.pdf failed (status code 403) -_site/es/investigacion.html,283,External link http://www.tandfonline.com/doi/full/10.1080/13555502.2017.1301179 failed (status code 403) +_site/es/investigacion.html,283,External link https://www.tandfonline.com/doi/full/10.1080/13555502.2017.1301179 failed (status code 403) _site/es/investigacion.html,326,External link https://www.history.ac.uk/our-century/centenary-events/training-teacher-giving-your-first-digital-history-workshop failed (status code 403) _site/es/investigacion.html,362,External link https://www.caurj.gov.br/seminario-solare-reune-desenvolvedores-internacionais-de-software-livre-para-arquitetura-e-urbanismo/ failed (status code 403) _site/es/investigacion.html,369,External link https://www.jisc.ac.uk/events/digifest-2023/programme failed (status code 404) @@ -5084,63 +3031,54 @@ _site/es/investigacion.html,375,External link https://dcdcconference.com/ failed _site/es/investigacion.html,386,External link https://openpublishingfest.org/calendar.html#event-69/ failed: got a time out (response code 0) (status code 0) _site/es/investigacion.html,390,External link https://2021.dhbenelux.org/schedule/ failed with something very wrong. _site/es/investigacion.html,392,"External link https://msuglobaldh.org/abstracts/#programming-historian failed: https://msuglobaldh.org/abstracts/ exists, but the hash 'programming-historian' does not (status code 200)" +_site/es/investigacion.html,394,External link https://ixa2.si.ehu.eus/intele/?q=webinars failed with something very wrong. _site/es/investigacion.html,485,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/investigacion.html,495,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/investigacion.md failed (status code 429) _site/es/jisc-tna-colaboracion.html,336,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/jisc-tna-colaboracion.html,346,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/jisc-tna-colaboracion.md failed (status code 429) -_site/es/lecciones/administracion-de-datos-en-r.html,337,External link https://github.com/programminghistorian/ph-submissions/issues/199 failed (status code 429) _site/es/lecciones/administracion-de-datos-en-r.html,1604,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/lecciones/administracion-de-datos-en-r.html,1614,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/administracion-de-datos-en-r.md failed (status code 429) -_site/es/lecciones/analisis-de-corpus-con-antconc.html,337,External link https://github.com/programminghistorian/ph-submissions/issues/170 failed (status code 429) _site/es/lecciones/analisis-de-corpus-con-antconc.html,630,External link https://academic.oup.com/dsh/article-abstract/8/4/243/928942 failed (status code 403) +_site/es/lecciones/analisis-de-corpus-con-antconc.html,1132,External link https://elies.rediris.es/elies18/ failed with something very wrong. _site/es/lecciones/analisis-de-corpus-con-antconc.html,1675,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/lecciones/analisis-de-corpus-con-antconc.html,1685,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/analisis-de-corpus-con-antconc.md failed (status code 429) -_site/es/lecciones/analisis-de-correspondencia-en-r.html,358,External link https://github.com/programminghistorian/ph-submissions/issues/331 failed (status code 429) _site/es/lecciones/analisis-de-correspondencia-en-r.html,1705,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/lecciones/analisis-de-correspondencia-en-r.html,1715,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/analisis-de-correspondencia-en-r.md failed (status code 429) -_site/es/lecciones/analisis-de-sentimientos-r.html,333,External link https://github.com/programminghistorian/ph-submissions/issues/286 failed (status code 429) _site/es/lecciones/analisis-de-sentimientos-r.html,1544,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/lecciones/analisis-de-sentimientos-r.html,1554,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/analisis-de-sentimientos-r.md failed (status code 429) -_site/es/lecciones/analisis-redes-sociales-teatro-1.html,333,External link https://github.com/programminghistorian/ph-submissions/issues/517 failed (status code 429) _site/es/lecciones/analisis-redes-sociales-teatro-1.html,1448,External link https://doi.org/10.5944/rhd.vol.4.2019.25187 failed: got a time out (response code 302) (status code 302) _site/es/lecciones/analisis-redes-sociales-teatro-1.html,1468,External link https://doi.org/10.5944/rhd.vol.3.2019.23144 failed: got a time out (response code 302) (status code 302) _site/es/lecciones/analisis-redes-sociales-teatro-1.html,2075,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/lecciones/analisis-redes-sociales-teatro-1.html,2085,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/analisis-redes-sociales-teatro-1.md failed (status code 429) -_site/es/lecciones/analisis-redes-sociales-teatro-2.html,333,External link https://github.com/programminghistorian/ph-submissions/issues/547 failed (status code 429) _site/es/lecciones/analisis-redes-sociales-teatro-2.html,511,"External link https://gephi.org/plugins/#/ failed: https://gephi.org/plugins/ exists, but the hash '/' does not (status code 200)" +_site/es/lecciones/analisis-redes-sociales-teatro-2.html,1101,External link https://doi.org/10.1093/llc/fqaa015 failed (status code 403) _site/es/lecciones/analisis-redes-sociales-teatro-2.html,1639,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/lecciones/analisis-redes-sociales-teatro-2.html,1649,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/analisis-redes-sociales-teatro-2.md failed (status code 429) -_site/es/lecciones/analisis-temporal-red.html,341,External link https://github.com/programminghistorian/ph-submissions/issues/218 failed (status code 429) _site/es/lecciones/analisis-temporal-red.html,1764,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/lecciones/analisis-temporal-red.html,1774,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/analisis-temporal-red.md failed (status code 429) -_site/es/lecciones/analisis-voyant-tools.html,337,External link https://github.com/programminghistorian/ph-submissions/issues/211 failed (status code 429) -_site/es/lecciones/analisis-voyant-tools.html,670,External link https://github.com/corpusenespanol/discursos-presidenciales/blob/master/mexico/2007_mx_calderon.txt failed (status code 429) _site/es/lecciones/analisis-voyant-tools.html,1086,External link https://twitter.com/VoyantTools/status/1025458748574326784 failed (status code 400) _site/es/lecciones/analisis-voyant-tools.html,1272,External link https://twitter.com/madvivacious failed (status code 400) _site/es/lecciones/analisis-voyant-tools.html,1275,"External link https://voyant-tools.org/docs/#!/guide/corpuscreator failed: https://voyant-tools.org/docs/ exists, but the hash '!/guide/corpuscreator' does not (status code 200)" _site/es/lecciones/analisis-voyant-tools.html,1816,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/lecciones/analisis-voyant-tools.html,1826,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/analisis-voyant-tools.md failed (status code 429) -_site/es/lecciones/construir-repositorio-de-fuentes.html,337,External link https://github.com/programminghistorian/ph-submissions/issues/188 failed (status code 429) -_site/es/lecciones/construir-repositorio-de-fuentes.html,592,External link http://localhost/ failed with something very wrong. -_site/es/lecciones/construir-repositorio-de-fuentes.html,592,External link http://127.0.0.1 failed with something very wrong. -_site/es/lecciones/construir-repositorio-de-fuentes.html,622,External link http://localhost/phpmyadmin/ failed with something very wrong. -_site/es/lecciones/construir-repositorio-de-fuentes.html,692,External link http://localhost/phpmyadmin failed with something very wrong. -_site/es/lecciones/construir-repositorio-de-fuentes.html,914,"External link http://omeka.org/codex/Plugin_Writing_Best_Practices#Plugin_Directory_Structure failed: http://omeka.org/codex/Plugin_Writing_Best_Practices exists, but the hash 'Plugin_Directory_Structure' does not (status code 200)" -_site/es/lecciones/construir-repositorio-de-fuentes.html,1880,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) -_site/es/lecciones/construir-repositorio-de-fuentes.html,1890,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/construir-repositorio-de-fuentes.md failed (status code 429) +_site/es/lecciones/construir-repositorio-de-fuentes.html,623,External link https://localhost/phpmyadmin/ failed with something very wrong. +_site/es/lecciones/construir-repositorio-de-fuentes.html,693,External link https://localhost/phpmyadmin failed with something very wrong. +_site/es/lecciones/construir-repositorio-de-fuentes.html,915,"External link https://omeka.org/codex/Plugin_Writing_Best_Practices#Plugin_Directory_Structure failed: https://omeka.org/codex/Plugin_Writing_Best_Practices exists, but the hash 'Plugin_Directory_Structure' does not (status code 200)" +_site/es/lecciones/construir-repositorio-de-fuentes.html,1881,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) +_site/es/lecciones/construir-repositorio-de-fuentes.html,1891,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/construir-repositorio-de-fuentes.md failed (status code 429) _site/es/lecciones/contar-frecuencias.html,339,External link https://github.com/programminghistorian/ph-submissions/issues/47 failed (status code 429) _site/es/lecciones/contar-frecuencias.html,1946,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/lecciones/contar-frecuencias.html,1956,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/contar-frecuencias.md failed (status code 429) _site/es/lecciones/corpus-paralelo-lfaligner.html,338,External link https://github.com/programminghistorian/ph-submissions/issues/197 failed (status code 429) +_site/es/lecciones/corpus-paralelo-lfaligner.html,501,External link https://utils.mucattu.com/iso_639-1.html failed with something very wrong. _site/es/lecciones/corpus-paralelo-lfaligner.html,1657,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/lecciones/corpus-paralelo-lfaligner.html,1667,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/corpus-paralelo-lfaligner.md failed (status code 429) _site/es/lecciones/creacion-de-aplicacion-shiny.html,337,External link https://github.com/programminghistorian/ph-submissions/issues/542 failed (status code 429) _site/es/lecciones/creacion-de-aplicacion-shiny.html,1690,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/lecciones/creacion-de-aplicacion-shiny.html,1700,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/creacion-de-aplicacion-shiny.md failed (status code 429) _site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html,337,External link https://github.com/programminghistorian/ph-submissions/issues/58 failed (status code 429) -_site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html,562,External link http://martenduering.com/research/covert-networks-during-the-holocaust/ failed: Moved Permanently (status code 301) -_site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html,993,External link http://www.cambridge.org/us/academic/subjects/sociology/research-methods-sociology-and-criminology/exploratory-social-network-analysis-pajek-2nd-edition failed (status code 403) -_site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html,993,External link http://pajek.imfm.si/doku.php failed: got a time out (response code 0) (status code 0) +_site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html,562,External link https://martenduering.com/research/covert-networks-during-the-holocaust/ failed with something very wrong. +_site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html,993,External link https://pajek.imfm.si/doku.php failed: got a time out (response code 0) (status code 0) _site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html,1533,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.html,1543,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/creando-diagramas-de-redes-desde-fuentes-historicas.md failed (status code 429) _site/es/lecciones/crear-exposicion-con-omeka.html,339,External link https://github.com/programminghistorian/ph-submissions/issues/156 failed (status code 429) @@ -5153,7 +3091,7 @@ _site/es/lecciones/datos-abiertos-enlazados-wikidata.html,335,External link http _site/es/lecciones/datos-abiertos-enlazados-wikidata.html,1966,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/lecciones/datos-abiertos-enlazados-wikidata.html,1976,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/datos-abiertos-enlazados-wikidata.md failed (status code 429) _site/es/lecciones/datos-de-investigacion-con-unix.html,339,External link https://github.com/programminghistorian/ph-submissions/issues/138 failed (status code 429) -_site/es/lecciones/datos-de-investigacion-con-unix.html,560,External link https://www.worldcat.org/title/unix-and-linux/oclc/308171076&referer=brief_results failed (status code 403) +_site/es/lecciones/datos-de-investigacion-con-unix.html,560,External link https://www.worldcat.org/title/unix-y-linux-gua-prctica/oclc/970524006&referer=brief_results failed (status code 403) _site/es/lecciones/datos-de-investigacion-con-unix.html,1621,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/lecciones/datos-de-investigacion-con-unix.html,1631,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/datos-de-investigacion-con-unix.md failed (status code 429) _site/es/lecciones/datos-tabulares-en-r.html,337,External link https://github.com/programminghistorian/ph-submissions/issues/164 failed (status code 429) @@ -5171,28 +3109,26 @@ _site/es/lecciones/descarga-multiples-registros-usando-cadenas-de-consulta.html, _site/es/lecciones/descarga-multiples-registros-usando-cadenas-de-consulta.html,2260,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/lecciones/descarga-multiples-registros-usando-cadenas-de-consulta.html,2270,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/descarga-multiples-registros-usando-cadenas-de-consulta.md failed (status code 429) _site/es/lecciones/editar-audio-con-audacity.html,337,External link https://github.com/programminghistorian/ph-submissions/issues/134 failed (status code 429) +_site/es/lecciones/editar-audio-con-audacity.html,555,External link https://web.archive.org/web/20161119231053/https://www.indiana.edu:80/~emusic/acoustics/amplitude.htm failed (status code 404) _site/es/lecciones/editar-audio-con-audacity.html,1228,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/lecciones/editar-audio-con-audacity.html,1238,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/editar-audio-con-audacity.md failed (status code 429) _site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html,339,External link https://github.com/programminghistorian/ph-submissions/issues/57 failed (status code 429) _site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html,577,External link https://atoms.io/ failed with something very wrong. -_site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html,627,External link https://github.com/dhcolumbia/pandoc-workflow/blob/master/pandoctut.bib failed (status code 429) _site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html,887,"External link https://groups.google.com/forum/#!forum/pandoc-discuss failed: https://groups.google.com/forum/ exists, but the hash '!forum/pandoc-discuss' does not (status code 200)" -_site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html,887,External link http://stackoverflow.com/questions/tagged/pandoc failed (status code 403) -_site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html,889,External link http://mouapp.com/ failed with something very wrong. -_site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html,889,External link http://www.authorea.com failed: Forbidden (status code 403) -_site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html,889,External link http://www.draftin.com failed: Service Unavailable (status code 503) +_site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html,887,External link https://stackoverflow.com/questions/tagged/pandoc failed (status code 403) +_site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html,889,External link https://www.draftin.com failed with something very wrong. +_site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html,889,External link https://mouapp.com/ failed with something very wrong. _site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html,919,External link https://github.com/programminghistorian/jekyll/issues/46#issuecomment-59219906 failed (status code 429) _site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html,1896,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.html,1906,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/escritura-sostenible-usando-pandoc-y-markdown.md failed (status code 429) _site/es/lecciones/exhibicion-con-collection-builder.html,333,External link https://github.com/programminghistorian/ph-submissions/issues/407 failed (status code 429) +_site/es/lecciones/exhibicion-con-collection-builder.html,584,External link https://dna.nust.na/heritage_week/ failed: got a time out (response code 0) (status code 0) +_site/es/lecciones/exhibicion-con-collection-builder.html,592,External link https://www.gimp.org.es/descargar-gimp.html failed with something very wrong. _site/es/lecciones/exhibicion-con-collection-builder.html,611,"External link https://docs.google.com/spreadsheets/d/1Uv9ytll0hysMOH1j-VL1lZx6PWvc1zf3L35sK_4IuzI/edit#gid=0 failed: https://docs.google.com/spreadsheets/d/1Uv9ytll0hysMOH1j-VL1lZx6PWvc1zf3L35sK_4IuzI/edit exists, but the hash 'gid=0' does not (status code 200)" _site/es/lecciones/exhibicion-con-collection-builder.html,1868,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/lecciones/exhibicion-con-collection-builder.html,1878,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/exhibicion-con-collection-builder.md failed (status code 429) _site/es/lecciones/generadores-aventura.html,333,External link https://github.com/programminghistorian/ph-submissions/issues/509 failed (status code 429) -_site/es/lecciones/generadores-aventura.html,488,External link https://github.com/srsergiorodriguez/aventura/blob/master/README_es.md failed (status code 429) -_site/es/lecciones/generadores-aventura.html,586,External link https://github.com/srsergiorodriguez/aventura/blob/master/README_es.md failed (status code 429) -_site/es/lecciones/generadores-aventura.html,608,External link https://github.com/srsergiorodriguez/aventura/blob/master/README.md failed (status code 429) -_site/es/lecciones/generadores-aventura.html,608,External link https://github.com/srsergiorodriguez/aventura/blob/master/README_es.md failed (status code 429) +_site/es/lecciones/generadores-aventura.html,534,External link https://www.spoonbill.org/n+7/ failed with something very wrong. _site/es/lecciones/generadores-aventura.html,1572,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/lecciones/generadores-aventura.html,1582,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/generadores-aventura.md failed (status code 429) _site/es/lecciones/georreferenciacion-visualizacion-con-recogito-y-visone.html,335,External link https://github.com/programminghistorian/ph-submissions/issues/570 failed (status code 429) @@ -5201,7 +3137,9 @@ _site/es/lecciones/georreferenciacion-visualizacion-con-recogito-y-visone.html,1 _site/es/lecciones/georreferenciacion-visualizacion-con-recogito-y-visone.html,2011,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/lecciones/georreferenciacion-visualizacion-con-recogito-y-visone.html,2021,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/georreferenciacion-visualizacion-con-recogito-y-visone.md failed (status code 429) _site/es/lecciones/georreferenciar-qgis.html,345,External link https://github.com/programminghistorian/ph-submissions/issues/163 failed (status code 429) -_site/es/lecciones/georreferenciar-qgis.html,617,External link http://www.gov.pe.ca/gis/license_agreement.php3?name=lot_town&file_format=SHP failed with something very wrong. +_site/es/lecciones/georreferenciar-qgis.html,617,External link https://www.gov.pe.ca/gis/license_agreement.php3?name=lot_town&file_format=SHP failed: Found (status code 302) +_site/es/lecciones/georreferenciar-qgis.html,732,External link https://web.archive.org/web/20180922004858/https://www.islandimagined.ca:80/fedora/repository/imagined:208687 failed (status code 404) +_site/es/lecciones/georreferenciar-qgis.html,909,External link https://web.archive.org/web/20180922004858/https://www.islandimagined.ca:80/fedora/repository/imagined:208687 failed (status code 404) _site/es/lecciones/georreferenciar-qgis.html,2474,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/lecciones/georreferenciar-qgis.html,2484,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/georreferenciar-qgis.md failed (status code 429) _site/es/lecciones/index.html,2840,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -5216,7 +3154,7 @@ _site/es/lecciones/instalacion-windows.html,339,External link https://github.com _site/es/lecciones/instalacion-windows.html,1722,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/lecciones/instalacion-windows.html,1732,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/instalacion-windows.md failed (status code 429) _site/es/lecciones/instalar-modulos-python-pip.html,337,External link https://github.com/programminghistorian/ph-submissions/issues/63 failed (status code 429) -_site/es/lecciones/instalar-modulos-python-pip.html,587,External link http://stackoverflow.com/questions/4750806/how-to-install-pip-on-windows failed (status code 403) +_site/es/lecciones/instalar-modulos-python-pip.html,587,External link https://stackoverflow.com/questions/4750806/how-to-install-pip-on-windows failed (status code 403) _site/es/lecciones/instalar-modulos-python-pip.html,1141,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/lecciones/instalar-modulos-python-pip.html,1151,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/instalar-modulos-python-pip.md failed (status code 429) _site/es/lecciones/intro-a-google-maps-y-google-earth.html,345,External link https://github.com/programminghistorian/ph-submissions/issues/130 failed (status code 429) @@ -5224,12 +3162,12 @@ _site/es/lecciones/intro-a-google-maps-y-google-earth.html,1355,External link ht _site/es/lecciones/intro-a-google-maps-y-google-earth.html,3097,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/lecciones/intro-a-google-maps-y-google-earth.html,3107,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/intro-a-google-maps-y-google-earth.md failed (status code 429) _site/es/lecciones/introduccion-a-bash.html,339,External link https://github.com/programminghistorian/ph-submissions/issues/62 failed (status code 429) +_site/es/lecciones/introduccion-a-bash.html,779,External link https://www.viemu.com/a-why-vi-vim.html failed with something very wrong. _site/es/lecciones/introduccion-a-bash.html,2054,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/lecciones/introduccion-a-bash.html,2064,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/introduccion-a-bash.md failed (status code 429) _site/es/lecciones/introduccion-a-ffmpeg.html,337,External link https://github.com/programminghistorian/ph-submissions/issues/302 failed (status code 429) _site/es/lecciones/introduccion-a-ffmpeg.html,590,External link https://twitter.com/FFmpeg failed (status code 400) _site/es/lecciones/introduccion-a-ffmpeg.html,605,"External link https://training.ashleyblewer.com/presentations/ffmpeg.html#10 failed: https://training.ashleyblewer.com/presentations/ffmpeg.html exists, but the hash '10' does not (status code 200)" -_site/es/lecciones/introduccion-a-ffmpeg.html,1171,External link https://github.com/privatezero/NDSR/blob/master/Demystifying_FFmpeg_Slides.pdf failed (status code 429) _site/es/lecciones/introduccion-a-ffmpeg.html,1731,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/lecciones/introduccion-a-ffmpeg.html,1741,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/introduccion-a-ffmpeg.md failed (status code 429) _site/es/lecciones/introduccion-a-imageplot-y-la-visualizacion-de-metadatos.html,338,External link https://github.com/programminghistorian/ph-submissions/issues/254 failed (status code 429) @@ -5239,7 +3177,6 @@ _site/es/lecciones/introduccion-a-imageplot-y-la-visualizacion-de-metadatos.html _site/es/lecciones/introduccion-a-markdown.html,341,External link https://github.com/programminghistorian/ph-submissions/issues/56 failed (status code 429) _site/es/lecciones/introduccion-a-markdown.html,1393,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/lecciones/introduccion-a-markdown.html,1403,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/introduccion-a-markdown.md failed (status code 429) -_site/es/lecciones/introduccion-a-powershell.html,337,External link https://github.com/programminghistorian/ph-submissions/issues/146 failed (status code 429) _site/es/lecciones/introduccion-a-powershell.html,1769,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/lecciones/introduccion-a-powershell.html,1779,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/introduccion-a-powershell.md failed (status code 429) _site/es/lecciones/introduccion-a-tei-1.html,333,External link https://github.com/programminghistorian/ph-submissions/issues/366 failed (status code 429) @@ -5252,8 +3189,13 @@ _site/es/lecciones/introduccion-al-web-scraping-usando-r.html,333,External link _site/es/lecciones/introduccion-al-web-scraping-usando-r.html,1600,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/lecciones/introduccion-al-web-scraping-usando-r.html,1610,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/introduccion-al-web-scraping-usando-r.md failed (status code 429) _site/es/lecciones/introduccion-datos-abiertos-enlazados.html,337,External link https://github.com/programminghistorian/ph-submissions/issues/142 failed (status code 429) -_site/es/lecciones/introduccion-datos-abiertos-enlazados.html,733,External link http://semanticweb.org/wiki/Main_Page.html failed: got a time out (response code 0) (status code 0) -_site/es/lecciones/introduccion-datos-abiertos-enlazados.html,1077,External link http://linkeddata.org/guides-and-tutorials failed: Internal Server Error (status code 500) +_site/es/lecciones/introduccion-datos-abiertos-enlazados.html,556,External link https://linkeddatacatalog.dws.informatik.uni-mannheim.de/state/ failed with something very wrong. +_site/es/lecciones/introduccion-datos-abiertos-enlazados.html,701,External link https://www.history.ac.uk/projects/digital/tobias failed (status code 403) +_site/es/lecciones/introduccion-datos-abiertos-enlazados.html,733,External link https://semanticweb.org/wiki/Main_Page.html failed: got a time out (response code 0) (status code 0) +_site/es/lecciones/introduccion-datos-abiertos-enlazados.html,887,External link https://www.history.ac.uk/projects/digital/tobias failed (status code 403) +_site/es/lecciones/introduccion-datos-abiertos-enlazados.html,1077,External link https://linkeddata.org/guides-and-tutorials failed with something very wrong. +_site/es/lecciones/introduccion-datos-abiertos-enlazados.html,1083,External link https://linkeddatacatalog.dws.informatik.uni-mannheim.de/state/ failed with something very wrong. +_site/es/lecciones/introduccion-datos-abiertos-enlazados.html,1098,External link https://www.history.ac.uk/projects/digital/tobias failed (status code 403) _site/es/lecciones/introduccion-datos-abiertos-enlazados.html,1635,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/lecciones/introduccion-datos-abiertos-enlazados.html,1645,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/introduccion-datos-abiertos-enlazados.md failed (status code 429) _site/es/lecciones/introduccion-e-instalacion.html,339,External link https://github.com/programminghistorian/ph-submissions/issues/35 failed (status code 429) @@ -5268,24 +3210,24 @@ _site/es/lecciones/lectura-escalable-de-datos-estructurados.html,3402,External l _site/es/lecciones/lectura-escalable-de-datos-estructurados.html,3412,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/lectura-escalable-de-datos-estructurados.md failed (status code 429) _site/es/lecciones/limpieza-de-datos-con-OpenRefine.html,341,External link https://github.com/programminghistorian/ph-submissions/issues/73 failed (status code 429) _site/es/lecciones/limpieza-de-datos-con-OpenRefine.html,579,External link https://powerhouse.com.au/ failed (status code 429) +_site/es/lecciones/limpieza-de-datos-con-OpenRefine.html,591,External link https://vis.stanford.edu/papers/wrangler/ failed with something very wrong. _site/es/lecciones/limpieza-de-datos-con-OpenRefine.html,597,External link https://powerhouse.com.au/ failed (status code 429) _site/es/lecciones/limpieza-de-datos-con-OpenRefine.html,2144,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/lecciones/limpieza-de-datos-con-OpenRefine.html,2154,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/limpieza-de-datos-con-OpenRefine.md failed (status code 429) +_site/es/lecciones/manipular-cadenas-de-caracteres-en-python.html,339,External link https://github.com/programminghistorian/ph-submissions/issues/43 failed (status code 429) _site/es/lecciones/manipular-cadenas-de-caracteres-en-python.html,1722,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/lecciones/manipular-cadenas-de-caracteres-en-python.html,1732,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/manipular-cadenas-de-caracteres-en-python.md failed (status code 429) _site/es/lecciones/mineria-de-datos-en-internet-archive.html,337,External link https://github.com/programminghistorian/ph-submissions/issues/59 failed (status code 429) -_site/es/lecciones/mineria-de-datos-en-internet-archive.html,570,"External link http://archive.org/stream/lettertowilliaml00doug/39999066767938#page/n0/mode/2up failed: http://archive.org/stream/lettertowilliaml00doug/39999066767938 exists, but the hash 'page/n0/mode/2up' does not (status code 200)" -_site/es/lecciones/mineria-de-datos-en-internet-archive.html,595,External link http://internetarchive.readthedocs.io/en/latest/quickstart.html#searching failed (status code 404) -_site/es/lecciones/mineria-de-datos-en-internet-archive.html,613,External link http://internetarchive.readthedocs.io/en/latest/quickstart.html#downloading failed (status code 404) -_site/es/lecciones/mineria-de-datos-en-internet-archive.html,698,External link http://internetarchive.readthedocs.io/en/latest/quickstart.html#downloading failed (status code 404) +_site/es/lecciones/mineria-de-datos-en-internet-archive.html,570,"External link https://archive.org/stream/lettertowilliaml00doug/39999066767938#page/n0/mode/2up failed: https://archive.org/stream/lettertowilliaml00doug/39999066767938 exists, but the hash 'page/n0/mode/2up' does not (status code 200)" +_site/es/lecciones/mineria-de-datos-en-internet-archive.html,595,External link https://internetarchive.readthedocs.io/en/latest/quickstart.html#searching failed (status code 404) +_site/es/lecciones/mineria-de-datos-en-internet-archive.html,613,External link https://internetarchive.readthedocs.io/en/latest/quickstart.html#downloading failed (status code 404) +_site/es/lecciones/mineria-de-datos-en-internet-archive.html,698,External link https://internetarchive.readthedocs.io/en/latest/quickstart.html#downloading failed (status code 404) _site/es/lecciones/mineria-de-datos-en-internet-archive.html,705,"External link https://archive.org/about/faqs.php#140 failed: https://archive.org/about/faqs.php exists, but the hash '140' does not (status code 200)" -_site/es/lecciones/mineria-de-datos-en-internet-archive.html,798,External link https://github.com/edsu/pymarc/blob/master/pymarc/marcxml.py failed (status code 429) _site/es/lecciones/mineria-de-datos-en-internet-archive.html,1410,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/lecciones/mineria-de-datos-en-internet-archive.html,1420,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/mineria-de-datos-en-internet-archive.md failed (status code 429) _site/es/lecciones/normalizar-datos.html,339,External link https://github.com/programminghistorian/ph-submissions/issues/46 failed (status code 429) _site/es/lecciones/normalizar-datos.html,1671,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/lecciones/normalizar-datos.html,1681,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/normalizar-datos.md failed (status code 429) -_site/es/lecciones/palabras-clave-en-contexto-n-grams.html,339,External link https://github.com/programminghistorian/ph-submissions/issues/50 failed (status code 429) _site/es/lecciones/palabras-clave-en-contexto-n-grams.html,1661,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/lecciones/palabras-clave-en-contexto-n-grams.html,1671,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/palabras-clave-en-contexto-n-grams.md failed (status code 429) _site/es/lecciones/poniendo-omeka-a-funcionar.html,337,External link https://github.com/programminghistorian/ph-submissions/issues/61 failed (status code 429) @@ -5309,18 +3251,17 @@ _site/es/lecciones/retirada/introduccion-control-versiones-github-desktop.html,1 _site/es/lecciones/retirada/introduccion-control-versiones-github-desktop.html,1612,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/lecciones/retirada/introduccion-control-versiones-github-desktop.html,1622,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/retirada/introduccion-control-versiones-github-desktop.md failed (status code 429) _site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html,309,External link https://github.com/programminghistorian/ph-submissions/issues/67 failed (status code 429) -_site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html,545,External link http://labs.europeana.eu/api/linked-open-data-introduction failed with something very wrong. -_site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html,545,External link http://collection.britishmuseum.org/ failed: got a time out (response code 0) (status code 0) -_site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html,754,External link http://collection.britishmuseum.org/sparql failed: got a time out (response code 0) (status code 0) -_site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html,772,External link http://collection.britishmuseum.org/resource?uri=http://collection.britishmuseum.org/id/object/PPA82633 failed: got a time out (response code 0) (status code 0) -_site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html,782,External link http://collection.britishmuseum.org/sparql?query=SELECT+*%0D%0AWHERE+%7B%0D%0A++%3Chttp://collection.britishmuseum.org/id/object/PPA82633%3E+?p+?o+.%0D%0A++%7D&_implicit=false&_equivalent=false&_form=/sparql failed: got a time out (response code 0) (status code 0) +_site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html,545,External link https://labs.europeana.eu/api/linked-open-data-introduction failed with something very wrong. +_site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html,545,External link https://collection.britishmuseum.org/ failed: got a time out (response code 0) (status code 0) +_site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html,754,External link https://collection.britishmuseum.org/sparql failed: got a time out (response code 0) (status code 0) +_site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html,772,External link https://collection.britishmuseum.org/resource?uri=https://collection.britishmuseum.org/id/object/PPA82633 failed: got a time out (response code 0) (status code 0) +_site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html,782,External link https://collection.britishmuseum.org/sparql?query=SELECT+*%0D%0AWHERE+%7B%0D%0A++%3Chttp://collection.britishmuseum.org/id/object/PPA82633%3E+?p+?o+.%0D%0A++%7D&_implicit=false&_equivalent=false&_form=/sparql failed: got a time out (response code 0) (status code 0) _site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html,859,External link https://collection.britishmuseum.org/sparql#query=PREFIX+bmo:+%3Chttp://www.researchspace.org/ontology/%3E%0APREFIX+skos:+%3Chttp://www.w3.org/2004/02/skos/core%23%3E%0A%0ASELECT+?object%0AWHERE+%7B%0A%0A++%23+Search+for+all+values+of+?object+that+have+a+given+%22object+type%22%0A++?object+bmo:PX_object_type+?object_type+.%0A%0A++%23+That+object+type+should+have+the+label+%22print%22%0A++?object_type+skos:prefLabel+%22print%22+.%0A%7D%0ALIMIT+10 failed: got a time out (response code 0) (status code 0) _site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html,936,"External link https://collection.britishmuseum.org/sparql#query=PREFIX+bmo:+%3Chttp://www.researchspace.org/ontology/%3E%0APREFIX+skos:+%3Chttp://www.w3.org/2004/02/skos/core%23%3E%0APREFIX+ecrm:+%3Chttp://www.cidoc-crm.org/cidoc-crm/%3E%0APREFIX+xsd:+%3Chttp://www.w3.org/2001/XMLSchema%23%3E%0A%0A%23+Return+object+links+and+creation+date%0ASELECT+?object+?date%0AWHERE+%7B%0A%0A++%23+We'll+use+our+previous+command+to+search+only+for%0A++%23+objects+of+type+%22print%22%0A++?object+bmo:PX_object_type+?object_type+.%0A++?object_type+skos:prefLabel+%22print%22+.%0A%0A++%23+We+need+to+link+though+several+nodes+to+find+the%0A++%23+creation+date+associated+with+an+object%0A++?object+ecrm:P108i_was_produced_by+?production+.%0A++?production+ecrm:P9_consists_of+?date_node+.%0A++?date_node+ecrm:P4_has_time-span+?timespan+.%0A++?timespan+ecrm:P82a_begin_of_the_begin+?date+.%0A%0A++%23+As+you+can+see,+we+need+to+connect+quite+a+few+dots%0A++%23+to+get+to+the+date+node!+Now+that+we+have+it,+we+can%0A++%23+filter+our+results.+Because+we+are+filtering+by+date,%0A++%23+we+must+attach+the+tag+%5E%5Exsd:date+after+our+date+strings.%0A++%23+This+tag+tells+the+database+to+interpret+the+string%0A++%23+%221580-01-01%22+as+the+date+1+January+1580.%0A%0A++FILTER(?date+%3E=+%221580-01-01%22%5E%5Exsd:date+&&%0A+++++++++?date+%3C=+%221600-01-01%22%5E%5Exsd:date)%0A%7D failed: got a time out (response code 0) (status code 0)" _site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html,989,"External link https://collection.britishmuseum.org/sparql#query=PREFIX+bmo:+%3Chttp://www.researchspace.org/ontology/%3E%0APREFIX+skos:+%3Chttp://www.w3.org/2004/02/skos/core%23%3E%0APREFIX+ecrm:+%3Chttp://www.cidoc-crm.org/cidoc-crm/%3E%0APREFIX+xsd:+%3Chttp://www.w3.org/2001/XMLSchema%23%3E%0A%0ASELECT+?type+(COUNT(?type)+as+?n)%0AWHERE+%7B%0A++%23+We+still+need+to+indicate+the+?object_type+variable,%0A++%23+however+we+will+not+require+it+to+match+%22print%22+this+time%0A%0A++?object+bmo:PX_object_type+?object_type+.%0A++?object_type+skos:prefLabel+?type+.%0A%0A++%23+Once+again,+we+will+also+filter+by+date%0A++?object+ecrm:P108i_was_produced_by+?production+.%0A++?production+ecrm:P9_consists_of+?date_node+.%0A++?date_node+ecrm:P4_has_time-span+?timespan+.%0A++?timespan+ecrm:P82a_begin_of_the_begin+?date+.%0A++FILTER(?date+%3E=+%221580-01-01%22%5E%5Exsd:date+&&%0A+++++++++?date+%3C=+%221600-01-01%22%5E%5Exsd:date)%0A%7D%0A%23+The+GROUP+BY+command+designates+the+variable+to+tally+by,%0A%23+and+the+ORDER+BY+DESC()+command+sorts+the+results+by%0A%23+descending+number.%0AGROUP+BY+?type%0AORDER+BY+DESC(?n) failed: got a time out (response code 0) (status code 0)" -_site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html,1064,External link http://www.getty.edu/research/ failed: Permanent Redirect (status code 308) _site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html,1099,"External link https://collection.britishmuseum.org/sparql?query=%23+Return+object+links+and+creation+date%0D%0APREFIX+bmo:+%3Chttp://collection.britishmuseum.org/id/ontology/%3E%0D%0APREFIX+skos:+%3Chttp://www.w3.org/2004/02/skos/core%23%3E%0D%0APREFIX+ecrm:+%3Chttp://erlangen-crm.org/current/%3E%0D%0APREFIX+xsd:+%3Chttp://www.w3.org/2001/XMLSchema%23%3E%0D%0ASELECT+DISTINCT+?object+?date+?image%0D%0AWHERE+%7B%0D%0A%0D%0A++%23+We'll+use+our+previous+command+to+search+only+for+objects+of+type+%22print%22%0D%0A++?object+bmo:PX_object_type+?object_type+.%0D%0A++?object_type+skos:prefLabel+%22print%22+.%0D%0A%0D%0A++%23+We+need+to+link+though+several+nodes+to+find+the+creation+date+associated%0D%0A++%23+with+an+object%0D%0A++?object+ecrm:P108i_was_produced_by+?production+.%0D%0A++?production+ecrm:P9_consists_of+?date_node+.%0D%0A++?date_node+ecrm:P4_has_time-span+?timespan+.%0D%0A++?timespan+ecrm:P82a_begin_of_the_begin+?date+.%0D%0A%0D%0A++%23+Yes,+we+need+to+connect+quite+a+few+dots+to+get+to+the+date+node!+Now+that%0D%0A++%23+we+have+it,+we+can+filter+our+results.+Because+we+are+filtering+a+date,+we%0D%0A++%23+must+attach+the+xsd:date+tag+to+our+date+strings+so+that+SPARQL+knows+how+to%0D%0A++%23+parse+them.%0D%0A%0D%0A++FILTER(?date+%3E=+%221580-01-01%22%5E%5Exsd:date+%26%26+?date+%3C=+%221600-01-01%22%5E%5Exsd:date)%0D%0A++%0D%0A++?object+bmo:PX_has_main_representation+?image+.%0D%0A%7D%0D%0ALIMIT+100#query=%23+Return+object+links+and+creation+date%0APREFIX+bmo:+%3Chttp://www.researchspace.org/ontology/%3E%0APREFIX+skos:+%3Chttp://www.w3.org/2004/02/skos/core%23%3E%0APREFIX+xsd:+%3Chttp://www.w3.org/2001/XMLSchema%23%3E%0APREFIX+ecrm:+%3Chttp://www.cidoc-crm.org/cidoc-crm/%3E%0ASELECT+DISTINCT+?object+?date+?image%0AWHERE+%7B%0A++%0A++%23+We'll+use+our+previous+command+to+search+only+for+objects+of+type+%22print%22%0A++?object+bmo:PX_object_type+?object_type+.%0A++?object_type+skos:prefLabel+%22print%22+.%0A%0A++%23+We+need+to+link+though+several+nodes+to+find+the+creation+date+associated%0A++%23+with+an+object%0A++?object+ecrm:P108i_was_produced_by+?production+.%0A++?production+ecrm:P9_consists_of+?date_node+.%0A++?date_node+ecrm:P4_has_time-span+?timespan+.%0A++?timespan+ecrm:P82a_begin_of_the_begin+?date+.%0A%0A++%0A++%23+Yes,+we+need+to+connect+quite+a+few+dots+to+get+to+the+date+node!+Now+that%0A++%23+we+have+it,+we+can+filter+our+results.+Because+we+are+filtering+a+date,+we%0A++%23+must+attach+the+xsd:date+tag+to+our+date+strings+so+that+SPARQL+knows+how+to%0A++%23+parse+them.%0A%0A++FILTER(?date+%3E=+%221580-01-01%22%5E%5Exsd:date+&&+?date+%3C=+%221600-01-01%22%5E%5Exsd:date)%0A++%0A++?object+bmo:PX_has_main_representation+?image+.%0A%7D%0ALIMIT+100 failed: got a time out (response code 0) (status code 0)" -_site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html,1133,External link http://labs.europeana.eu/api/linked-open-data-SPARQL-endpoint failed with something very wrong. -_site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html,1134,"External link http://vocab.getty.edu/queries#Finding_Subjects failed: http://vocab.getty.edu/queries exists, but the hash 'Finding_Subjects' does not (status code 200)" +_site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html,1133,External link https://labs.europeana.eu/api/linked-open-data-SPARQL-endpoint failed with something very wrong. +_site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html,1134,"External link https://vocab.getty.edu/queries#Finding_Subjects failed: https://vocab.getty.edu/queries exists, but the hash 'Finding_Subjects' does not (status code 200)" _site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html,1672,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/lecciones/retirada/sparql-datos-abiertos-enlazados.html,1682,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/retirada/sparql-datos-abiertos-enlazados.md failed (status code 429) _site/es/lecciones/reutilizacion-de-codigo-y-modularidad.html,339,External link https://github.com/programminghistorian/ph-submissions/issues/41 failed (status code 429) @@ -5328,7 +3269,7 @@ _site/es/lecciones/reutilizacion-de-codigo-y-modularidad.html,616,External link _site/es/lecciones/reutilizacion-de-codigo-y-modularidad.html,1592,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/lecciones/reutilizacion-de-codigo-y-modularidad.html,1602,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/reutilizacion-de-codigo-y-modularidad.md failed (status code 429) _site/es/lecciones/reutilizando-colecciones-digitales-glam-labs.html,339,External link https://github.com/programminghistorian/ph-submissions/issues/313 failed (status code 429) -_site/es/lecciones/reutilizando-colecciones-digitales-glam-labs.html,497,External link http://labs.bl.uk failed with something very wrong. +_site/es/lecciones/reutilizando-colecciones-digitales-glam-labs.html,497,External link https://labs.bl.uk failed with something very wrong. _site/es/lecciones/reutilizando-colecciones-digitales-glam-labs.html,567,External link https://pro.europeana.eu/page/iiif failed (status code 403) _site/es/lecciones/reutilizando-colecciones-digitales-glam-labs.html,776,External link https://datos.gob.es/es/catalogo/e00123904-bibliografia-espanola-de-cartografia-2017 failed: Not Found (status code 404) _site/es/lecciones/reutilizando-colecciones-digitales-glam-labs.html,782,"External link https://es.wikipedia.org/wiki/Datos_enlazados#Proyecto_de_comunidad_para_la_inter-conexi%C3%B3n_de_datos_abiertos failed: https://es.wikipedia.org/wiki/Datos_enlazados exists, but the hash 'Proyecto_de_comunidad_para_la_inter-conexi%C3%B3n_de_datos_abiertos' does not (status code 200)" @@ -5344,12 +3285,13 @@ _site/es/lecciones/salida-palabras-clave-contexto-ngrams.html,339,External link _site/es/lecciones/salida-palabras-clave-contexto-ngrams.html,1741,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/lecciones/salida-palabras-clave-contexto-ngrams.html,1751,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/salida-palabras-clave-contexto-ngrams.md failed (status code 429) _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,337,External link https://github.com/programminghistorian/ph-submissions/issues/303 failed (status code 429) -_site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,1755,External link http://jekyll-windows.juthilo.com/ failed with something very wrong. +_site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,1690,External link https://jekyllthemes.org/ failed with something very wrong. +_site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,1755,External link https://jekyll-windows.juthilo.com/ failed with something very wrong. _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,2300,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html,2310,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.md failed (status code 429) _site/es/lecciones/topic-modeling-y-mallet.html,341,External link https://github.com/programminghistorian/ph-submissions/issues/191#issuecomment-432826840 failed (status code 429) -_site/es/lecciones/topic-modeling-y-mallet.html,571,External link http://www.worldcat.org/title/reading-machines-toward-an-algorithmic-criticism/oclc/708761605&referer=brief_results failed: Forbidden (status code 403) -_site/es/lecciones/topic-modeling-y-mallet.html,1011,External link http://dl.acm.org/citation.cfm?id=944937 failed (status code 403) +_site/es/lecciones/topic-modeling-y-mallet.html,1009,External link https://web.archive.org/web/20160704150726/https://www.lisarhody.com:80/some-assembly-required/ failed (status code 404) +_site/es/lecciones/topic-modeling-y-mallet.html,1011,External link https://dl.acm.org/citation.cfm?id=944937 failed (status code 403) _site/es/lecciones/topic-modeling-y-mallet.html,2469,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/lecciones/topic-modeling-y-mallet.html,2479,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/topic-modeling-y-mallet.md failed (status code 429) _site/es/lecciones/trabajar-con-archivos-de-texto.html,339,External link https://github.com/programminghistorian/ph-submissions/issues/40 failed (status code 429) @@ -5361,7 +3303,7 @@ _site/es/lecciones/trabajar-con-paginas-web.html,1704,External link https://gith _site/es/lecciones/transformacion-datos-xml-xsl.html,337,External link https://github.com/programminghistorian/ph-submissions/issues/162 failed (status code 429) _site/es/lecciones/transformacion-datos-xml-xsl.html,576,External link https://irt.kcl.ac.uk/irt2009/ failed with something very wrong. _site/es/lecciones/transformacion-datos-xml-xsl.html,736,External link https://stackoverflow.com/questions/16811332/cannot-run-java-from-the-windows-powershell-command-prompt failed (status code 403) -_site/es/lecciones/transformacion-datos-xml-xsl.html,1338,External link https://www.computerhope.com/issues/ch000549.htm failed (status code 403) +_site/es/lecciones/transformacion-datos-xml-xsl.html,777,External link https://scissors-and-paste.net failed with something very wrong. _site/es/lecciones/transformacion-datos-xml-xsl.html,1338,External link https://stackoverflow.com/questions/22465332/setting-path-environment-variable-in-osx-permanently failed (status code 403) _site/es/lecciones/transformacion-datos-xml-xsl.html,1901,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/lecciones/transformacion-datos-xml-xsl.html,1911,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/transformacion-datos-xml-xsl.md failed (status code 429) @@ -5375,7 +3317,6 @@ _site/es/lecciones/ver-archivos-html.html,1630,External link https://github.com/ _site/es/lecciones/visualizacion-y-animacion-de-tablas-historicas-con-R.html,337,External link https://github.com/programminghistorian/ph-submissions/issues/330 failed (status code 429) _site/es/lecciones/visualizacion-y-animacion-de-tablas-historicas-con-R.html,1490,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/lecciones/visualizacion-y-animacion-de-tablas-historicas-con-R.html,1500,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/visualizacion-y-animacion-de-tablas-historicas-con-R.md failed (status code 429) -_site/es/pia.html,271,External link https://www.oecd.org/en/topics/sub-issues/oda-eligibility-and-conditions/dac-list-of-oda-recipients.html failed (status code 403) _site/es/pia.html,422,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/pia.html,432,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/pia.md failed (status code 429) _site/es/politica-de-privacidad.html,348,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -5388,6 +3329,9 @@ _site/es/retroalimentacion.html,337,External link https://github.com/programming _site/es/vacantes.html,292,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/es/vacantes.html,302,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/vacantes.md failed (status code 429) _site/fr/apropos.html,259,"External link https://fr.wikipedia.org/wiki/Libre_acc%C3%A8s_(%C3%A9dition_scientifique)#La_voie_diamant failed: https://fr.wikipedia.org/wiki/Libre_acc%C3%A8s_(%C3%A9dition_scientifique) exists, but the hash 'La_voie_diamant' does not (status code 200)" +_site/fr/apropos.html,266,External link https://dhawards.org/dhawards2017/results/ failed with something very wrong. +_site/fr/apropos.html,266,External link https://dhawards.org/dhawards2016/results/ failed with something very wrong. +_site/fr/apropos.html,266,External link https://dhawards.org/dhawards2022/results/ failed with something very wrong. _site/fr/apropos.html,266,External link https://openpublishingawards.org/results/2021/index.html failed with something very wrong. _site/fr/apropos.html,323,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/fr/apropos.html,333,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/fr/apropos.md failed (status code 429) @@ -5404,38 +3348,39 @@ _site/fr/consignes-redacteurs.html,757,External link https://github.com/programm _site/fr/consignes-redacteurs.html,767,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/fr/consignes-redacteurs.md failed (status code 429) _site/fr/consignes-traducteurs.html,380,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/fr/consignes-traducteurs.html,390,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/fr/consignes-traducteurs.md failed (status code 429) +_site/fr/contribuer.html,298,External link https://www.worldcat.org/title/programming-historian-en-espanol/oclc/1061292935&referer=brief_results failed (status code 403) _site/fr/contribuer.html,353,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/fr/contribuer.html,363,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/fr/contribuer.md failed (status code 429) _site/fr/dons.html,265,External link https://www.patreon.com/join/theprogramminghistorian failed (status code 403) _site/fr/dons.html,322,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/fr/dons.html,332,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/fr/dons.md failed (status code 429) -_site/fr/equipe-projet.html,310,External link http://twitter.com/maxcarlons failed (status code 400) -_site/fr/equipe-projet.html,512,External link http://twitter.com/cosovschi failed (status code 400) +_site/fr/equipe-projet.html,310,External link https://twitter.com/maxcarlons failed (status code 400) +_site/fr/equipe-projet.html,512,External link https://twitter.com/cosovschi failed (status code 400) _site/fr/equipe-projet.html,1272,External link https://www.nabeelsiddiqui.net/ failed with something very wrong. -_site/fr/equipe-projet.html,1278,External link http://twitter.com/nabsiddiqui failed (status code 400) -_site/fr/equipe-projet.html,1631,External link http://twitter.com/giulia_taurino failed (status code 400) -_site/fr/equipe-projet.html,1810,External link http://twitter.com/alexwermercolan failed (status code 400) -_site/fr/equipe-projet.html,2059,External link http://www.mariajoseafanador.com failed: Moved Permanently (status code 301) -_site/fr/equipe-projet.html,2065,External link http://twitter.com/mariajoafana failed (status code 400) -_site/fr/equipe-projet.html,2534,External link http://twitter.com/IsaGribomont failed (status code 400) -_site/fr/equipe-projet.html,2745,External link http://twitter.com/espejolento failed (status code 400) -_site/fr/equipe-projet.html,3036,External link http://twitter.com/jenniferisve failed (status code 400) -_site/fr/equipe-projet.html,3361,External link http://twitter.com/enetreseles failed (status code 400) -_site/fr/equipe-projet.html,3568,External link http://twitter.com/jgob failed (status code 400) -_site/fr/equipe-projet.html,3863,External link http://twitter.com/rivaquiroga failed (status code 400) -_site/fr/equipe-projet.html,4804,External link http://twitter.com/superHH failed (status code 400) -_site/fr/equipe-projet.html,5190,External link http://twitter.com/emilienschultz failed (status code 400) -_site/fr/equipe-projet.html,5317,External link http://twitter.com/davvalent failed (status code 400) -_site/fr/equipe-projet.html,5842,External link http://twitter.com/danielalvesfcsh failed (status code 400) -_site/fr/equipe-projet.html,6107,External link http://twitter.com/ericbrasiln failed (status code 400) -_site/fr/equipe-projet.html,6543,External link http://twitter.com/jimmy_medeiros failed (status code 400) -_site/fr/equipe-projet.html,7027,External link http://twitter.com/araceletorres failed (status code 400) -_site/fr/equipe-projet.html,7286,External link http://twitter.com/j_w_baker failed (status code 400) -_site/fr/equipe-projet.html,7727,External link http://twitter.com/Adam_Crymble failed (status code 400) -_site/fr/equipe-projet.html,8258,External link http://twitter.com/jenniferisve failed (status code 400) -_site/fr/equipe-projet.html,8589,External link http://twitter.com/rivaquiroga failed (status code 400) -_site/fr/equipe-projet.html,8878,External link http://twitter.com/amsichani failed (status code 400) -_site/fr/equipe-projet.html,9221,External link http://twitter.com/AnisaHawes failed (status code 400) +_site/fr/equipe-projet.html,1278,External link https://twitter.com/nabsiddiqui failed (status code 400) +_site/fr/equipe-projet.html,1631,External link https://twitter.com/giulia_taurino failed (status code 400) +_site/fr/equipe-projet.html,1810,External link https://twitter.com/alexwermercolan failed (status code 400) +_site/fr/equipe-projet.html,2059,External link https://www.mariajoseafanador.com failed with something very wrong. +_site/fr/equipe-projet.html,2065,External link https://twitter.com/mariajoafana failed (status code 400) +_site/fr/equipe-projet.html,2534,External link https://twitter.com/IsaGribomont failed (status code 400) +_site/fr/equipe-projet.html,2745,External link https://twitter.com/espejolento failed (status code 400) +_site/fr/equipe-projet.html,3036,External link https://twitter.com/jenniferisve failed (status code 400) +_site/fr/equipe-projet.html,3361,External link https://twitter.com/enetreseles failed (status code 400) +_site/fr/equipe-projet.html,3568,External link https://twitter.com/jgob failed (status code 400) +_site/fr/equipe-projet.html,3863,External link https://twitter.com/rivaquiroga failed (status code 400) +_site/fr/equipe-projet.html,4804,External link https://twitter.com/superHH failed (status code 400) +_site/fr/equipe-projet.html,5190,External link https://twitter.com/emilienschultz failed (status code 400) +_site/fr/equipe-projet.html,5317,External link https://twitter.com/davvalent failed (status code 400) +_site/fr/equipe-projet.html,5842,External link https://twitter.com/danielalvesfcsh failed (status code 400) +_site/fr/equipe-projet.html,6107,External link https://twitter.com/ericbrasiln failed (status code 400) +_site/fr/equipe-projet.html,6543,External link https://twitter.com/jimmy_medeiros failed (status code 400) +_site/fr/equipe-projet.html,7027,External link https://twitter.com/araceletorres failed (status code 400) +_site/fr/equipe-projet.html,7286,External link https://twitter.com/j_w_baker failed (status code 400) +_site/fr/equipe-projet.html,7727,External link https://twitter.com/Adam_Crymble failed (status code 400) +_site/fr/equipe-projet.html,8258,External link https://twitter.com/jenniferisve failed (status code 400) +_site/fr/equipe-projet.html,8589,External link https://twitter.com/rivaquiroga failed (status code 400) +_site/fr/equipe-projet.html,8878,External link https://twitter.com/amsichani failed (status code 400) +_site/fr/equipe-projet.html,9221,External link https://twitter.com/AnisaHawes failed (status code 400) _site/fr/equipe-projet.html,10041,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/fr/equipe-projet.html,10051,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/fr/equipe-projet.md failed (status code 429) _site/fr/evenements.html,296,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -5446,8 +3391,13 @@ _site/fr/jisc-tna-partenariat.html,336,External link https://github.com/programm _site/fr/jisc-tna-partenariat.html,346,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/fr/jisc-tna-partenariat.md failed (status code 429) _site/fr/lecons/analyse-corpus-antconc.html,338,External link https://github.com/programminghistorian/ph-submissions/issues/240 failed (status code 429) _site/fr/lecons/analyse-corpus-antconc.html,631,External link https://academic.oup.com/dsh/article-abstract/8/4/243/928942 failed (status code 403) -_site/fr/lecons/analyse-corpus-antconc.html,1095,External link http://www.lexi-co.com/ failed: got a time out (response code 301) (status code 301) -_site/fr/lecons/analyse-corpus-antconc.html,1095,External link http://ancilla.unice.fr/ failed: got a time out (response code 0) (status code 0) +_site/fr/lecons/analyse-corpus-antconc.html,1092,External link https://explorationdecorpus.corpusecrits.huma-num.fr/antconc/ failed with something very wrong. +_site/fr/lecons/analyse-corpus-antconc.html,1093,External link https://cid.ens-lyon.fr/ac_article.asp?fic=antconc.asp failed with something very wrong. +_site/fr/lecons/analyse-corpus-antconc.html,1095,External link https://ancilla.unice.fr/ failed with something very wrong. +_site/fr/lecons/analyse-corpus-antconc.html,1095,External link https://textometrie.ens-lyon.fr/?lang=fr failed with something very wrong. +_site/fr/lecons/analyse-corpus-antconc.html,1095,External link https://iramuteq.org/ failed with something very wrong. +_site/fr/lecons/analyse-corpus-antconc.html,1095,External link https://www.lexi-co.com/ failed with something very wrong. +_site/fr/lecons/analyse-corpus-antconc.html,1099,External link https://lexicometrica.univ-paris3.fr/livre/st94/st94-tdm.html failed with something very wrong. _site/fr/lecons/analyse-corpus-antconc.html,1639,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/fr/lecons/analyse-corpus-antconc.html,1649,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/fr/lecons/analyse-corpus-antconc.md failed (status code 429) _site/fr/lecons/analyse-de-documents-avec-tfidf.html,353,External link https://github.com/programminghistorian/ph-submissions/issues/454 failed (status code 429) @@ -5461,11 +3411,13 @@ _site/fr/lecons/analyse-reseau-python.html,343,External link https://github.com/ _site/fr/lecons/analyse-reseau-python.html,2984,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/fr/lecons/analyse-reseau-python.html,2994,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/fr/lecons/analyse-reseau-python.md failed (status code 429) _site/fr/lecons/calibration-radiocarbone-avec-r.html,351,External link https://github.com/programminghistorian/ph-submissions/issues/329 failed (status code 429) +_site/fr/lecons/calibration-radiocarbone-avec-r.html,709,External link https://calib.org failed: got a time out (response code 0) (status code 0) _site/fr/lecons/calibration-radiocarbone-avec-r.html,715,"External link https://fr.wikipedia.org/wiki/Suaire_de_Turin#La_datation_par_le_carbone_14_(1988-1989) failed: https://fr.wikipedia.org/wiki/Suaire_de_Turin#La_datation_par_le_carbone_14_(1988-1989) exists, but the hash 'La_datation_par_le_carbone_14_(1988-1989)' does not (status code 200)" _site/fr/lecons/calibration-radiocarbone-avec-r.html,2181,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/fr/lecons/calibration-radiocarbone-avec-r.html,2191,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/fr/lecons/calibration-radiocarbone-avec-r.md failed (status code 429) _site/fr/lecons/comprendre-les-expressions-regulieres.html,337,External link https://github.com/programminghistorian/ph-submissions/issues/264 failed (status code 429) -_site/fr/lecons/comprendre-les-expressions-regulieres.html,668,"External link http://archive.org/stream/jstor-4560629/4560629#page/n0/mode/2up failed: http://archive.org/stream/jstor-4560629/4560629 exists, but the hash 'page/n0/mode/2up' does not (status code 200)" +_site/fr/lecons/comprendre-les-expressions-regulieres.html,668,"External link https://archive.org/stream/jstor-4560629/4560629#page/n0/mode/2up failed: https://archive.org/stream/jstor-4560629/4560629 exists, but the hash 'page/n0/mode/2up' does not (status code 200)" +_site/fr/lecons/comprendre-les-expressions-regulieres.html,1466,External link https://dh.obdurodon.org/regex.html failed with something very wrong. _site/fr/lecons/comprendre-les-expressions-regulieres.html,2005,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/fr/lecons/comprendre-les-expressions-regulieres.html,2015,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/fr/lecons/comprendre-les-expressions-regulieres.md failed (status code 429) _site/fr/lecons/comprendre-les-pages-web.html,339,External link https://github.com/programminghistorian/ph-submissions/issues/228 failed (status code 429) @@ -5486,12 +3438,11 @@ _site/fr/lecons/demarrer-avec-omeka-classic.html,1502,External link https://gith _site/fr/lecons/demarrer-avec-omeka-classic.html,1512,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/fr/lecons/demarrer-avec-omeka-classic.md failed (status code 429) _site/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.html,339,External link https://github.com/programminghistorian/ph-submissions/issues/372 failed (status code 429) _site/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.html,613,External link https://www.java.com/fr/download/ failed (status code 403) -_site/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.html,1269,External link https://github.com/impresso/PH-Passim-tutorial/blob/master/eebo/code/main.py failed (status code 429) -_site/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.html,1312,External link https://github.com/impresso/impresso-pycommons/blob/master/impresso_commons/text/rebuilder.py failed (status code 429) -_site/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.html,1454,External link https://github.com/impresso/PH-passim-tutorial/blob/master/explore-passim-output.ipynb failed (status code 429) -_site/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.html,1531,External link http://dx.doi.org/10.1093/alh/ajv028 failed (status code 403) -_site/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.html,1532,External link http://dx.doi.org/10.1080/1461670x.2020.1761865 failed (status code 403) -_site/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.html,1538,External link http://dx.doi.org/10.1145/2682571.2797068 failed (status code 403) +_site/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.html,1530,External link https://dx.doi.org/10.1093/alh/ajv029 failed (status code 403) +_site/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.html,1531,External link https://dx.doi.org/10.1093/alh/ajv028 failed (status code 403) +_site/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.html,1532,External link https://dx.doi.org/10.1080/1461670x.2020.1761865 failed (status code 403) +_site/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.html,1538,External link https://dx.doi.org/10.1145/2682571.2797068 failed (status code 403) +_site/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.html,1539,External link https://doi.org/10.18653/v1/D17-1290 failed (status code 409) _site/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.html,2515,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.html,2525,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/fr/lecons/detecter-la-reutilisation-de-texte-avec-passim.md failed (status code 429) _site/fr/lecons/du-html-a-une-liste-de-mots-1.html,339,External link https://github.com/programminghistorian/ph-submissions/issues/560 failed (status code 429) @@ -5503,7 +3454,6 @@ _site/fr/lecons/du-html-a-une-liste-de-mots-2.html,1792,External link https://gi _site/fr/lecons/generer-jeu-donnees-texte-ocr.html,337,External link https://github.com/programminghistorian/ph-submissions/issues/362 failed (status code 429) _site/fr/lecons/generer-jeu-donnees-texte-ocr.html,2172,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/fr/lecons/generer-jeu-donnees-texte-ocr.html,2182,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/fr/lecons/generer-jeu-donnees-texte-ocr.md failed (status code 429) -_site/fr/lecons/gestion-manipulation-donnees-r.html,337,External link https://github.com/programminghistorian/ph-submissions/issues/625 failed (status code 429) _site/fr/lecons/gestion-manipulation-donnees-r.html,1593,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/fr/lecons/gestion-manipulation-donnees-r.html,1603,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/fr/lecons/gestion-manipulation-donnees-r.md failed (status code 429) _site/fr/lecons/index.html,1722,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -5521,13 +3471,13 @@ _site/fr/lecons/intro-a-bash-et-zsh.html,339,External link https://github.com/pr _site/fr/lecons/intro-a-bash-et-zsh.html,2114,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/fr/lecons/intro-a-bash-et-zsh.html,2124,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/fr/lecons/intro-a-bash-et-zsh.md failed (status code 429) _site/fr/lecons/intro-aux-bots-twitter.html,337,External link https://github.com/programminghistorian/ph-submissions/issues/256 failed (status code 429) -_site/fr/lecons/intro-aux-bots-twitter.html,564,External link http://www.sciencedirect.com/science/article/pii/S0747563213003129 failed: Forbidden (status code 403) +_site/fr/lecons/intro-aux-bots-twitter.html,564,External link https://www.sciencedirect.com/science/article/pii/S0747563213003129 failed (status code 403) _site/fr/lecons/intro-aux-bots-twitter.html,587,External link https://twitter.com/Every3Minutes failed (status code 400) -_site/fr/lecons/intro-aux-bots-twitter.html,655,External link http://twitter.com/tinyarchae failed (status code 400) _site/fr/lecons/intro-aux-bots-twitter.html,655,External link https://twitter.com/archaeoglitch failed (status code 400) _site/fr/lecons/intro-aux-bots-twitter.html,655,External link https://twitter.com/botarchaeo failed (status code 400) +_site/fr/lecons/intro-aux-bots-twitter.html,655,External link https://twitter.com/tinyarchae failed (status code 400) _site/fr/lecons/intro-aux-bots-twitter.html,665,External link https://twitter.com/galaxykate failed (status code 400) -_site/fr/lecons/intro-aux-bots-twitter.html,907,External link http://unicode.org/emoji/charts/full-emoji-list.html failed with something very wrong. +_site/fr/lecons/intro-aux-bots-twitter.html,907,External link https://unicode.org/emoji/charts/full-emoji-list.html failed with something very wrong. _site/fr/lecons/intro-aux-bots-twitter.html,986,External link https://twitter.com/TinyAdv failed (status code 400) _site/fr/lecons/intro-aux-bots-twitter.html,1032,External link https://twitter.com/envoisdeRomeBot failed (status code 400) _site/fr/lecons/intro-aux-bots-twitter.html,1033,External link https://twitter.com/lebotde7lieux failed (status code 400) @@ -5541,6 +3491,7 @@ _site/fr/lecons/introduction-a-heurist.html,580,External link https://heuristnet _site/fr/lecons/introduction-a-heurist.html,2167,External link https://heuristnetwork.org/tutorials/ failed (status code 500) _site/fr/lecons/introduction-a-heurist.html,2749,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/fr/lecons/introduction-a-heurist.html,2759,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/fr/lecons/introduction-a-heurist.md failed (status code 429) +_site/fr/lecons/introduction-a-la-stylometrie-avec-python.html,337,External link https://github.com/programminghistorian/ph-submissions/issues/231 failed (status code 429) _site/fr/lecons/introduction-a-la-stylometrie-avec-python.html,1910,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/fr/lecons/introduction-a-la-stylometrie-avec-python.html,1920,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/fr/lecons/introduction-a-la-stylometrie-avec-python.md failed (status code 429) _site/fr/lecons/introduction-aux-carnets-jupyter-notebooks.html,341,External link https://github.com/programminghistorian/ph-submissions/issues/309 failed (status code 429) @@ -5553,8 +3504,8 @@ _site/fr/lecons/introduction-et-installation.html,1548,External link https://git _site/fr/lecons/manipuler-chaines-caracteres-python.html,339,External link https://github.com/programminghistorian/ph-submissions/issues/284 failed (status code 429) _site/fr/lecons/manipuler-chaines-caracteres-python.html,1705,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/fr/lecons/manipuler-chaines-caracteres-python.html,1715,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/fr/lecons/manipuler-chaines-caracteres-python.md failed (status code 429) -_site/fr/lecons/nettoyer-ses-donnees-avec-openrefine.html,341,External link https://github.com/programminghistorian/ph-submissions/issues/223 failed (status code 429) _site/fr/lecons/nettoyer-ses-donnees-avec-openrefine.html,577,External link https://powerhouse.com.au/ failed (status code 429) +_site/fr/lecons/nettoyer-ses-donnees-avec-openrefine.html,587,External link https://vis.stanford.edu/papers/wrangler/ failed with something very wrong. _site/fr/lecons/nettoyer-ses-donnees-avec-openrefine.html,592,External link https://api.maas.museum/docs failed with something very wrong. _site/fr/lecons/nettoyer-ses-donnees-avec-openrefine.html,2102,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/fr/lecons/nettoyer-ses-donnees-avec-openrefine.html,2112,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/fr/lecons/nettoyer-ses-donnees-avec-openrefine.md failed (status code 429) @@ -5562,7 +3513,6 @@ _site/fr/lecons/normaliser-donnees-textuelles-python.html,339,External link http _site/fr/lecons/normaliser-donnees-textuelles-python.html,1689,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/fr/lecons/normaliser-donnees-textuelles-python.html,1699,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/fr/lecons/normaliser-donnees-textuelles-python.md failed (status code 429) _site/fr/lecons/preserver-logiciels-recherche.html,337,External link https://github.com/programminghistorian/ph-submissions/issues/616 failed (status code 429) -_site/fr/lecons/preserver-logiciels-recherche.html,789,External link https://github.com/torvalds/linux/blob/master/fs/ext4/resize.c failed (status code 429) _site/fr/lecons/preserver-logiciels-recherche.html,967,External link https://gitlab.com/users/sign_in failed (status code 403) _site/fr/lecons/preserver-logiciels-recherche.html,968,External link https://gitlab.com/projects/new failed (status code 403) _site/fr/lecons/preserver-logiciels-recherche.html,1240,External link https://doi.org/10.1145/602421.602422 failed (status code 403) @@ -5573,11 +3523,9 @@ _site/fr/lecons/preserver-ses-donnees-de-recherche.html,588,External link https: _site/fr/lecons/preserver-ses-donnees-de-recherche.html,1334,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/fr/lecons/preserver-ses-donnees-de-recherche.html,1344,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/fr/lecons/preserver-ses-donnees-de-recherche.md failed (status code 429) _site/fr/lecons/redaction-durable-avec-pandoc-et-markdown.html,339,External link https://github.com/programminghistorian/ph-submissions/issues/307 failed (status code 429) -_site/fr/lecons/redaction-durable-avec-pandoc-et-markdown.html,628,External link https://github.com/dhcolumbia/pandoc-workflow/blob/master/pandoctut.bib failed (status code 429) -_site/fr/lecons/redaction-durable-avec-pandoc-et-markdown.html,894,External link http://stackoverflow.com/questions/tagged/pandoc failed (status code 403) -_site/fr/lecons/redaction-durable-avec-pandoc-et-markdown.html,896,External link http://www.draftin.com failed: Service Unavailable (status code 503) -_site/fr/lecons/redaction-durable-avec-pandoc-et-markdown.html,896,External link http://www.authorea.com failed: Forbidden (status code 403) -_site/fr/lecons/redaction-durable-avec-pandoc-et-markdown.html,896,External link http://mouapp.com/ failed with something very wrong. +_site/fr/lecons/redaction-durable-avec-pandoc-et-markdown.html,894,External link https://stackoverflow.com/questions/tagged/pandoc failed (status code 403) +_site/fr/lecons/redaction-durable-avec-pandoc-et-markdown.html,896,External link https://mouapp.com/ failed with something very wrong. +_site/fr/lecons/redaction-durable-avec-pandoc-et-markdown.html,896,External link https://www.draftin.com failed with something very wrong. _site/fr/lecons/redaction-durable-avec-pandoc-et-markdown.html,936,External link https://github.com/programminghistorian/jekyll/issues/46#issuecomment-59219906 failed (status code 429) _site/fr/lecons/redaction-durable-avec-pandoc-et-markdown.html,1913,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/fr/lecons/redaction-durable-avec-pandoc-et-markdown.html,1923,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/fr/lecons/redaction-durable-avec-pandoc-et-markdown.md failed (status code 429) @@ -5594,12 +3542,12 @@ _site/fr/lecons/travailler-avec-des-fichiers-texte.html,339,External link https: _site/fr/lecons/travailler-avec-des-fichiers-texte.html,563,"External link https://docs.python.org/fr/2.7/library/functions.html#prin failed: https://docs.python.org/fr/2.7/library/functions.html exists, but the hash 'prin' does not (status code 200)" _site/fr/lecons/travailler-avec-des-fichiers-texte.html,1757,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/fr/lecons/travailler-avec-des-fichiers-texte.html,1767,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/fr/lecons/travailler-avec-des-fichiers-texte.md failed (status code 429) +_site/fr/nos-soutiens.html,282,External link https://www.sas.ac.uk/ failed (status code 403) _site/fr/nos-soutiens.html,294,External link https://www.tilburguniversity.edu/ failed (status code 403) +_site/fr/nos-soutiens.html,306,External link https://www.history.ac.uk/library-digital failed (status code 403) _site/fr/nos-soutiens.html,337,"External link https://www.sussex.ac.uk/collaborate/business/public-funds#:~:text=Impact%20accelerator%20funds,-From%20law%20to&text=The%20ESRC%20and%20AHRC%20Impact,businesses%20through%20to%20large%20companies failed: https://www.sussex.ac.uk/collaborate/business/public-funds exists, but the hash ':~:text=Impact%20accelerator%20funds,-From%20law%20to&text=The%20ESRC%20and%20AHRC%20Impact,businesses%20through%20to%20large%20companies' does not (status code 200)" -_site/fr/nos-soutiens.html,338,External link https://www.thebritishacademy.ac.uk/projects/writing-workshops-2018-digital-humanities/ failed (status code 403) _site/fr/nos-soutiens.html,438,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/fr/nos-soutiens.html,448,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/fr/nos-soutiens.md failed (status code 429) -_site/fr/pi.html,271,External link https://www.oecd.org/fr/topics/sub-issues/oda-eligibility-and-conditions/dac-list-of-oda-recipients.html failed (status code 403) _site/fr/pi.html,417,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/fr/pi.html,427,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/fr/pi.md failed (status code 429) _site/fr/politique-retrait-lecons.html,355,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -5612,9 +3560,7 @@ _site/fr/reaction.html,256,External link https://github.com/orgs/programminghist _site/fr/reaction.html,328,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/fr/reaction.html,338,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/fr/reaction.md failed (status code 429) _site/fr/recherche.html,266,External link https://academic.oup.com/jah/article-abstract/103/1/299/1751315 failed (status code 403) -_site/fr/recherche.html,267,External link http://jitp.commons.gc.cuny.edu/review-of-the-programming-historian/ failed: got a time out (response code 301) (status code 301) -_site/fr/recherche.html,282,External link https://novaresearch.unl.pt/files/32228034/Ensinar_Humanidades_Digitais.pdf failed (status code 403) -_site/fr/recherche.html,285,External link http://www.tandfonline.com/doi/full/10.1080/13555502.2017.1301179 failed (status code 403) +_site/fr/recherche.html,285,External link https://www.tandfonline.com/doi/full/10.1080/13555502.2017.1301179 failed (status code 403) _site/fr/recherche.html,328,External link https://www.history.ac.uk/our-century/centenary-events/training-teacher-giving-your-first-digital-history-workshop failed (status code 403) _site/fr/recherche.html,364,External link https://www.caurj.gov.br/seminario-solare-reune-desenvolvedores-internacionais-de-software-livre-para-arquitetura-e-urbanismo/ failed (status code 403) _site/fr/recherche.html,371,External link https://www.jisc.ac.uk/events/digifest-2023/programme failed (status code 404) @@ -5622,6 +3568,7 @@ _site/fr/recherche.html,377,External link https://dcdcconference.com/ failed wit _site/fr/recherche.html,388,External link https://openpublishingfest.org/calendar.html#event-69/ failed: got a time out (response code 0) (status code 0) _site/fr/recherche.html,392,External link https://2021.dhbenelux.org/schedule/ failed with something very wrong. _site/fr/recherche.html,394,"External link https://msuglobaldh.org/abstracts/#programming-historian failed: https://msuglobaldh.org/abstracts/ exists, but the hash 'programming-historian' does not (status code 200)" +_site/fr/recherche.html,396,External link https://ixa2.si.ehu.eus/intele/?q=webinars failed with something very wrong. _site/fr/recherche.html,482,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/fr/recherche.html,492,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/fr/recherche.md failed (status code 429) _site/index.html,373,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -5629,19 +3576,19 @@ _site/index.html,383,External link https://github.com/programminghistorian/jekyl _site/posts/2022-in-review.html,287,External link https://twitter.com/ProgHist failed (status code 400) _site/posts/2022-in-review.html,783,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/posts/2022-in-review.html,793,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2022-12-16-2022-in-review.md failed (status code 429) +_site/posts/DH-Award-2017.html,281,External link https://dhawards.org/dhawards2017/results/ failed with something very wrong. _site/posts/DH-Award-2017.html,783,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/posts/DH-Award-2017.html,793,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2018-03-04-DH-Award-2017.md failed (status code 429) -_site/posts/FR-team.html,285,External link http://www.iperionch.eu/ failed with something very wrong. +_site/posts/FR-team.html,285,External link https://www.iperionch.eu/ failed with something very wrong. _site/posts/FR-team.html,787,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/posts/FR-team.html,797,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2018-07-13-FR-team.md failed (status code 429) _site/posts/Open-Education-Week.html,1303,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/posts/Open-Education-Week.html,1313,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2024-03-07-Open-Education-Week.md failed (status code 429) _site/posts/PH-TNA-JISC-event-2-annoucement.html,783,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/posts/PH-TNA-JISC-event-2-annoucement.html,793,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2022-04-12-PH-TNA-JISC-event-2-annoucement.md failed (status code 429) -_site/posts/PH-commitment-to-diversity.html,283,External link http://www.aauw.org/research/why-so-few/ failed: Not Found (status code 404) _site/posts/PH-commitment-to-diversity.html,283,External link https://www.surveymonkey.co.uk/r/SFSRHHD failed: Not Found (status code 404) _site/posts/PH-commitment-to-diversity.html,283,External link https://github.com/programminghistorian/jekyll/issues/152 failed (status code 429) -_site/posts/PH-commitment-to-diversity.html,377,External link http://femtechnet.org/ failed with something very wrong. +_site/posts/PH-commitment-to-diversity.html,377,External link https://femtechnet.org/ failed with something very wrong. _site/posts/PH-commitment-to-diversity.html,909,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/posts/PH-commitment-to-diversity.html,919,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2016-05-10-PH-commitment-to-diversity.md failed (status code 429) _site/posts/PH-contributors.html,802,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -5649,13 +3596,13 @@ _site/posts/PH-contributors.html,812,External link https://github.com/programmin _site/posts/PH-espanol-in-DH2018.html,279,External link https://twitter.com/mariajoafana/status/1011761787417628673 failed (status code 400) _site/posts/PH-espanol-in-DH2018.html,289,External link https://github.com/programminghistorian/jekyll/issues/651 failed (status code 429) _site/posts/PH-espanol-in-DH2018.html,291,External link https://twitter.com/visceral_blot/status/1012453500595290112 failed (status code 400) -_site/posts/PH-espanol-in-DH2018.html,297,External link https://github.com/programminghistorian/jekyll/issues?q=french+label:french failed (status code 429) +_site/posts/PH-espanol-in-DH2018.html,297,External link https://github.com/programminghistorian/jekyll/issues?q=french+label:french failed (status code 404) _site/posts/PH-espanol-in-DH2018.html,1232,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/posts/PH-espanol-in-DH2018.html,1242,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2018-07-09-PH-espanol-in-DH2018.md failed (status code 429) -_site/posts/Uses-Of-The-Programming-Historian.html,293,External link http://lj.libraryjournal.com/2014/09/opinion/not-dead-yet/connecting-researchers-to-new-digital-tools-not-dead-yet/#_ failed: Bad Gateway (status code 502) -_site/posts/Uses-Of-The-Programming-Historian.html,295,External link http://journalofdigitalhumanities.org/2-1/dh-contribution-to-topic-modeling/ failed: Moved Permanently (status code 301) -_site/posts/Uses-Of-The-Programming-Historian.html,342,External link http://digitalhumanities.rice.edu/fall-2013-syllabus/ failed (status code 404) -_site/posts/Uses-Of-The-Programming-Historian.html,344,External link http://dh.chadblack.net/info/syllabus/ failed with something very wrong. +_site/posts/Uses-Of-The-Programming-Historian.html,293,External link https://lj.libraryjournal.com/2014/09/opinion/not-dead-yet/connecting-researchers-to-new-digital-tools-not-dead-yet/#_ failed (status code 502) +_site/posts/Uses-Of-The-Programming-Historian.html,295,External link https://journalofdigitalhumanities.org/2-1/dh-contribution-to-topic-modeling/ failed with something very wrong. +_site/posts/Uses-Of-The-Programming-Historian.html,342,External link https://digitalhumanities.rice.edu/fall-2013-syllabus/ failed (status code 404) +_site/posts/Uses-Of-The-Programming-Historian.html,344,External link https://dh.chadblack.net/info/syllabus/ failed with something very wrong. _site/posts/Uses-Of-The-Programming-Historian.html,364,External link https://twitter.com/antimony27/status/730808295410311169 failed (status code 400) _site/posts/Uses-Of-The-Programming-Historian.html,372,External link https://twitter.com/StewartVarner/status/722520696606298112 failed (status code 400) _site/posts/Uses-Of-The-Programming-Historian.html,373,External link https://twitter.com/eric_loy/status/758039397539409921 failed (status code 400) @@ -5665,22 +3612,22 @@ _site/posts/Uses-Of-The-Programming-Historian.html,870,External link https://git _site/posts/Uses-Of-The-Programming-Historian.html,880,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2018-05-22-Uses-Of-The-Programming-Historian.md failed (status code 429) _site/posts/ad-hoc-translation.html,793,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/posts/ad-hoc-translation.html,803,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2018-11-30-ad-hoc-translation.md failed (status code 429) -_site/posts/adding-to-library-catalogue.html,281,External link http://www.twitter.com/Literature_Geek failed (status code 400) _site/posts/adding-to-library-catalogue.html,281,External link https://twitter.com/proghist failed (status code 400) +_site/posts/adding-to-library-catalogue.html,281,External link https://www.twitter.com/Literature_Geek failed (status code 400) _site/posts/adding-to-library-catalogue.html,790,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/posts/adding-to-library-catalogue.html,800,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2016-06-10-adding-to-library-catalogue.md failed (status code 429) _site/posts/anisa-welcome.html,787,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/posts/anisa-welcome.html,797,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2021-07-07-anisa-welcome.md failed (status code 429) _site/posts/anna-maria-sichani.html,783,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/posts/anna-maria-sichani.html,793,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2018-05-24-anna-maria-sichani.md failed (status code 429) -_site/posts/announcing-new-team-spanish-language-editors.html,287,External link https://en.neogranadina.org failed: got a time out (response code 0) (status code 0) _site/posts/announcing-new-team-spanish-language-editors.html,287,External link https://twitter.com/mariajoafana failed (status code 400) +_site/posts/announcing-new-team-spanish-language-editors.html,287,External link https://en.neogranadina.org failed: got a time out (response code 0) (status code 0) _site/posts/announcing-new-team-spanish-language-editors.html,289,External link https://twitter.com/victor_gayol failed (status code 400) _site/posts/announcing-new-team-spanish-language-editors.html,291,External link https://twitter.com/RojasCastroA failed (status code 400) -_site/posts/announcing-new-team-spanish-language-editors.html,293,External link https://twitter.com/proghist failed (status code 400) _site/posts/announcing-new-team-spanish-language-editors.html,293,External link https://twitter.com/mariajoafana failed (status code 400) -_site/posts/announcing-new-team-spanish-language-editors.html,293,External link https://twitter.com/victor_gayol failed (status code 400) +_site/posts/announcing-new-team-spanish-language-editors.html,293,External link https://twitter.com/proghist failed (status code 400) _site/posts/announcing-new-team-spanish-language-editors.html,293,External link https://twitter.com/RojasCastroA failed (status code 400) +_site/posts/announcing-new-team-spanish-language-editors.html,293,External link https://twitter.com/victor_gayol failed (status code 400) _site/posts/announcing-new-team-spanish-language-editors.html,788,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/posts/announcing-new-team-spanish-language-editors.html,798,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2016-08-22-announcing-new-team-spanish-language-editors.md failed (status code 429) _site/posts/appel-a-propositions.html,1261,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -5689,8 +3636,10 @@ _site/posts/appel-a-traductions.html,1245,External link https://github.com/progr _site/posts/appel-a-traductions.html,1255,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2025-02-28-appel-a-traductions.md failed (status code 429) _site/posts/articles-selected-ph-jisc-tna.html,783,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/posts/articles-selected-ph-jisc-tna.html,793,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2021-11-15-articles-selected-ph-jisc-tna.md failed (status code 429) +_site/posts/bienvenue-ph-fr.html,283,External link https://github.com/programminghistorian/ph-submissions/issues?q=is:issue+is:open+label:French failed (status code 404) _site/posts/bienvenue-ph-fr.html,283,External link https://github.com/programminghistorian/jekyll/issues/850 failed (status code 429) -_site/posts/bienvenue-ph-fr.html,283,External link https://github.com/programminghistorian/ph-submissions/issues?q=is:issue+is:open+label:French failed (status code 429) +_site/posts/bienvenue-ph-fr.html,285,External link https://dsharp.library.cmu.edu/ failed with something very wrong. +_site/posts/bienvenue-ph-fr.html,289,External link https://archives.mundaneum.org/fr/versions-digitalisees/schema-de-paul-otlet-documentation-et-telecommunication failed with something very wrong. _site/posts/bienvenue-ph-fr.html,783,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/posts/bienvenue-ph-fr.html,793,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2019-04-08-bienvenue-ph-fr.md failed (status code 429) _site/posts/bogota-workshop-report.html,1247,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -5724,8 +3673,9 @@ _site/posts/bulletin-de-information-oct20.html,841,External link https://github. _site/posts/bulletin-de-information.html,303,External link https://cas.au.dk/en/cedhar/events/show/artikel/teaching-digital-history-workshop-a-one-day-seminar/ failed (status code 404) _site/posts/bulletin-de-information.html,829,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/posts/bulletin-de-information.html,839,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2019-12-30-bulletin-de-information.md failed (status code 429) -_site/posts/bulletin-issue-01.html,377,External link https://www.linkedin.com/company/prog-hist/ failed (status code 999) +_site/posts/bulletin-issue-01.html,287,External link https://dhawards.org/dhawards2022/results/ failed with something very wrong. _site/posts/bulletin-issue-01.html,377,External link https://twitter.com/ProgHist failed (status code 400) +_site/posts/bulletin-issue-01.html,377,External link https://www.linkedin.com/company/prog-hist/ failed (status code 999) _site/posts/bulletin-issue-01.html,1310,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/posts/bulletin-issue-01.html,1320,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2023-12-13-bulletin-issue-01.md failed (status code 429) _site/posts/bulletin-issue-02.html,330,External link https://campus.dariah.eu/source/programming-historian/page/1 failed (status code 404) @@ -5738,6 +3688,7 @@ _site/posts/bulletin-issue-03.html,361,External link https://www.linkedin.com/co _site/posts/bulletin-issue-03.html,1294,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/posts/bulletin-issue-03.html,1304,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2024-06-28-bulletin-issue-03.md failed (status code 429) _site/posts/bulletin-issue-04.html,325,External link https://campus.dariah.eu/source/programming-historian/page/1 failed (status code 404) +_site/posts/bulletin-issue-04.html,325,External link https://dhawards.org/dhawards2022/results/ failed with something very wrong. _site/posts/bulletin-issue-04.html,351,External link https://twitter.com/ProgHist failed (status code 400) _site/posts/bulletin-issue-04.html,351,External link https://www.linkedin.com/company/prog-hist/ failed (status code 999) _site/posts/bulletin-issue-04.html,1284,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -5750,11 +3701,9 @@ _site/posts/bulletin-issue-06.html,367,External link https://www.linkedin.com/co _site/posts/bulletin-issue-06.html,1300,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/posts/bulletin-issue-06.html,1310,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2025-03-28-bulletin-issue-06.md failed (status code 429) _site/posts/call-for-editors-en.html,285,External link https://academic.oup.com/jah/article-abstract/103/1/299/1751315 failed (status code 403) -_site/posts/call-for-editors-en.html,288,External link http://jitp.commons.gc.cuny.edu/review-of-the-programming-historian/ failed: got a time out (response code 301) (status code 301) _site/posts/call-for-editors-en.html,817,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/posts/call-for-editors-en.html,827,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2021-09-22-call-for-editors-en.md failed (status code 429) _site/posts/call-for-editors.html,285,External link https://academic.oup.com/jah/article-abstract/103/1/299/1751315 failed (status code 403) -_site/posts/call-for-editors.html,288,External link http://jitp.commons.gc.cuny.edu/review-of-the-programming-historian/ failed: got a time out (response code 301) (status code 301) _site/posts/call-for-editors.html,817,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/posts/call-for-editors.html,827,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2020-05-04-call-for-editors.md failed (status code 429) _site/posts/call-for-fr-members.html,790,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -5782,15 +3731,16 @@ _site/posts/corpus-linguistics-in-action.html,289,"External link https://books.g _site/posts/corpus-linguistics-in-action.html,343,"External link https://books.google.co.uk/books?id=o9o4gLzrRPEC&lpg=PP1&pg=PA212#v=onepage&q&f=false failed: https://books.google.co.uk/books?id=o9o4gLzrRPEC&lpg=PP1&pg=PA212 exists, but the hash 'v=onepage&q&f=false' does not (status code 200)" _site/posts/corpus-linguistics-in-action.html,343,"External link https://books.google.co.uk/books?id=v98rcxoYUbYC&lpg=PP1&dq=mahlberg%20corpus%20stylistics&pg=PA111#v=onepage&q&f=false failed: https://books.google.co.uk/books?id=v98rcxoYUbYC&lpg=PP1&dq=mahlberg%20corpus%20stylistics&pg=PA111 exists, but the hash 'v=onepage&q&f=false' does not (status code 200)" _site/posts/corpus-linguistics-in-action.html,358,"External link https://books.google.co.uk/books?id=v98rcxoYUbYC&lpg=PP1&dq=mahlberg%20corpus%20stylistics&pg=PA111#v=onepage&q&f=false failed: https://books.google.co.uk/books?id=v98rcxoYUbYC&lpg=PP1&dq=mahlberg%20corpus%20stylistics&pg=PA111 exists, but the hash 'v=onepage&q&f=false' does not (status code 200)" -_site/posts/corpus-linguistics-in-action.html,430,External link http://www.euppublishing.com/doi/full/10.3366/cor.2016.0102 failed (status code 403) +_site/posts/corpus-linguistics-in-action.html,430,External link https://www.euppublishing.com/doi/full/10.3366/cor.2016.0102 failed (status code 403) _site/posts/corpus-linguistics-in-action.html,1798,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/posts/corpus-linguistics-in-action.html,1808,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2017-09-21-corpus-linguistics-in-action.md failed (status code 429) _site/posts/december-newsletter.html,300,External link https://openpublishingfest.org/calendar.html#event-69/ failed: got a time out (response code 0) (status code 0) _site/posts/december-newsletter.html,302,External link https://www.history.ac.uk/our-century/centenary-events/training-teacher-giving-your-first-digital-history-workshop failed (status code 403) -_site/posts/december-newsletter.html,304,External link http://ahlist.org/wp-content/uploads/2021/11/AHLIST-2021-PROGRAM_Virtual_FINAL.pdf failed: Not Found (status code 404) +_site/posts/december-newsletter.html,304,External link https://ahlist.org/wp-content/uploads/2021/11/AHLIST-2021-PROGRAM_Virtual_FINAL.pdf failed (status code 404) _site/posts/december-newsletter.html,320,External link https://openpublishingawards.org/results/2021/index.html failed with something very wrong. _site/posts/december-newsletter.html,1255,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/posts/december-newsletter.html,1265,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2021-12-22-december-newsletter.md failed (status code 429) +_site/posts/dh-award-2016.html,281,External link https://dhawards.org/dhawards2016/results/ failed with something very wrong. _site/posts/dh-award-2016.html,795,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/posts/dh-award-2016.html,805,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2017-03-02-dh-award-2016.md failed (status code 429) _site/posts/dh-publishing-assistant.html,826,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -5800,7 +3750,6 @@ _site/posts/digital-storytelling-immigrant-stories.html,797,External link https: _site/posts/distant-reading-in-the-undergraduate-classroom.html,812,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/posts/distant-reading-in-the-undergraduate-classroom.html,822,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2016-09-19-distant-reading-in-the-undergraduate-classroom.md failed (status code 429) _site/posts/dois-for-ph.html,292,External link https://github.com/programminghistorian/jekyll/issues/1682 failed (status code 429) -_site/posts/dois-for-ph.html,294,External link https://github.com/programminghistorian/jekyll/blob/4c5201ceb456deab677866886255bbd54500a9de/_layouts/crossref.xml failed (status code 429) _site/posts/dois-for-ph.html,853,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/posts/dois-for-ph.html,863,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2020-05-14-dois-for-ph.md failed (status code 429) _site/posts/edinburgh-workshop-2015.html,780,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -5811,7 +3760,6 @@ _site/posts/en-call-for-lessons.html,1281,External link https://github.com/progr _site/posts/en-call-for-lessons.html,1291,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2023-11-15-en-call-for-lessons.md failed (status code 429) _site/posts/en-call-for-proposals.html,1286,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/posts/en-call-for-proposals.html,1296,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2024-11-15-en-call-for-proposals.md failed (status code 429) -_site/posts/es-buscamos-revisores.html,321,External link https://github.com/programminghistorian/jekyll/blob/gh-pages/CODE_OF_CONDUCT.md failed (status code 429) _site/posts/es-buscamos-revisores.html,1270,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/posts/es-buscamos-revisores.html,1280,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2024-11-14-es-buscamos-revisores.md failed (status code 429) _site/posts/fd-laramee.html,784,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -5822,27 +3770,23 @@ _site/posts/full-text-search.html,838,External link https://github.com/programmi _site/posts/full-text-search.html,848,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2020-05-26-full-text-search.md failed (status code 429) _site/posts/gisele-welcome.html,779,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/posts/gisele-welcome.html,789,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2022-07-28-gisele-welcome.md failed (status code 429) -_site/posts/history-of-protest.html,285,External link http://politicalmeetingsmapper.co.uk failed with something very wrong. -_site/posts/history-of-protest.html,285,External link http://labs.bl.uk/British+Library+Labs+Competition failed with something very wrong. -_site/posts/history-of-protest.html,289,External link http://www.tandfonline.com/doi/full/10.1080/13555502.2017.1301179 failed (status code 403) +_site/posts/history-of-protest.html,285,External link https://labs.bl.uk/British+Library+Labs+Competition failed with something very wrong. +_site/posts/history-of-protest.html,285,External link https://politicalmeetingsmapper.co.uk failed with something very wrong. +_site/posts/history-of-protest.html,289,External link https://www.tandfonline.com/doi/full/10.1080/13555502.2017.1301179 failed (status code 403) _site/posts/history-of-protest.html,783,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/posts/history-of-protest.html,793,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2017-03-31-history-of-protest.md failed (status code 429) -_site/posts/how-we-moved-to-github.html,305,External link https://github.com/programminghistorian/oldsite/blob/master/original_html/data-mining-the-internet-archive.html failed (status code 429) -_site/posts/how-we-moved-to-github.html,338,External link https://github.com/programminghistorian/oldsite/blob/master/prep_for_pandoc.py failed (status code 429) -_site/posts/how-we-moved-to-github.html,347,External link https://github.com/programminghistorian/oldsite/blob/master/modified_html/data-mining-the-internet-archive.html failed (status code 429) _site/posts/how-we-moved-to-github.html,544,External link https://github.com/programminghistorian/jekyll/issues/2 failed (status code 429) -_site/posts/how-we-moved-to-github.html,551,External link https://github.com/programminghistorian/jekyll/blob/master/modified_html/data-mining-the-internet-archive.html#L50 failed (status code 429) +_site/posts/how-we-moved-to-github.html,551,External link https://github.com/programminghistorian/jekyll/blob/master/modified_html/data-mining-the-internet-archive.html#L50 failed (status code 404) _site/posts/how-we-moved-to-github.html,564,External link https://github.com/programminghistorian/jekyll/blob/master/modified_html/data-mining-the-internet-archive.html failed (status code 429) -_site/posts/how-we-moved-to-github.html,647,External link https://github.com/programminghistorian/jekyll/blob/master/process_with_pandoc.sh failed (status code 429) _site/posts/how-we-moved-to-github.html,647,External link https://github.com/programminghistorian/jekyll/commits/master/lessons failed (status code 429) +_site/posts/how-we-moved-to-github.html,647,External link https://github.com/programminghistorian/jekyll/blob/master/process_with_pandoc.sh failed (status code 429) _site/posts/how-we-moved-to-github.html,649,External link https://github.com/programminghistorian/jekyll/blob/master/pandoc_filter.py failed (status code 429) _site/posts/how-we-moved-to-github.html,651,External link https://github.com/programminghistorian/jekyll/issues/15 failed (status code 429) _site/posts/how-we-moved-to-github.html,651,External link https://github.com/programminghistorian/jekyll/issues/5 failed (status code 429) _site/posts/how-we-moved-to-github.html,1147,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/posts/how-we-moved-to-github.html,1157,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2014-11-05-how-we-moved-to-github.md failed (status code 429) _site/posts/infrastructure-at-ph.html,298,External link https://github.com/programminghistorian/jekyll/issues/390 failed (status code 429) -_site/posts/infrastructure-at-ph.html,304,External link https://github.com/programminghistorian/jekyll/blob/gh-pages/_build/build.sh#L15-L40 failed (status code 429) -_site/posts/infrastructure-at-ph.html,325,External link https://github.com/programminghistorian/jekyll/blob/gh-pages/_plugins/validate_yaml.rb failed (status code 429) +_site/posts/infrastructure-at-ph.html,304,"External link https://github.com/programminghistorian/jekyll/blob/gh-pages/_build/build.sh#L15-L40 failed: https://github.com/programminghistorian/jekyll/blob/gh-pages/_build/build.sh exists, but the hash 'L15-L40' does not (status code 200)" _site/posts/infrastructure-at-ph.html,341,External link https://github.com/programminghistorian/jekyll/blob/gh-pages/.travis.yml failed (status code 429) _site/posts/infrastructure-at-ph.html,342,"External link https://guides.github.com/activities/hello-world/#pr failed: https://guides.github.com/activities/hello-world/ exists, but the hash 'pr' does not (status code 200)" _site/posts/infrastructure-at-ph.html,352,External link https://github.com/programminghistorian/jekyll/issues/536 failed (status code 429) @@ -5888,20 +3832,23 @@ _site/posts/newsletter-april-21.html,291,External link https://twitter.com/ProgH _site/posts/newsletter-april-21.html,293,External link https://twitter.com/ProgHist/status/1369634644442939402 failed (status code 400) _site/posts/newsletter-april-21.html,293,External link https://twitter.com/cesta_stanford failed (status code 400) _site/posts/newsletter-april-21.html,293,External link https://twitter.com/UCLDH failed (status code 400) +_site/posts/newsletter-april-21.html,297,External link https://ixa2.si.ehu.eus/intele/?q=webinars failed with something very wrong. _site/posts/newsletter-april-21.html,328,"External link https://programminghistorian.org/en/support-us#institutional-partner-programme failed: https://programminghistorian.org/en/support-us exists, but the hash 'institutional-partner-programme' does not (status code 200)" +_site/posts/newsletter-april-21.html,367,External link https://dhawards.org/dhawards2020/results/ failed with something very wrong. _site/posts/newsletter-april-21.html,870,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/posts/newsletter-april-21.html,880,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2021-04-01-newsletter-april-21.md failed (status code 429) _site/posts/newsletter-june20.html,301,"External link https://programminghistorian.org/en/support-us#institutional-partner-programme failed: https://programminghistorian.org/en/support-us exists, but the hash 'institutional-partner-programme' does not (status code 200)" _site/posts/newsletter-june20.html,892,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/posts/newsletter-june20.html,902,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2020-07-01-newsletter-june20.md failed (status code 429) -_site/posts/newsletter-march20.html,314,External link https://twitter.com/KU_Leuven failed (status code 400) _site/posts/newsletter-march20.html,314,"External link https://programminghistorian.org/en/support-us#institutional-partner-programme failed: https://programminghistorian.org/en/support-us exists, but the hash 'institutional-partner-programme' does not (status code 200)" +_site/posts/newsletter-march20.html,314,External link https://twitter.com/KU_Leuven failed (status code 400) _site/posts/newsletter-march20.html,844,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/posts/newsletter-march20.html,854,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2020-04-01-newsletter-march20.md failed (status code 429) _site/posts/newsletter-oct20.html,299,"External link https://programminghistorian.org/en/support-us#institutional-partner-programme failed: https://programminghistorian.org/en/support-us exists, but the hash 'institutional-partner-programme' does not (status code 200)" _site/posts/newsletter-oct20.html,829,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/posts/newsletter-oct20.html,839,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2020-10-01-newsletter-oct20.md failed (status code 429) _site/posts/newsletter-year20.html,321,"External link https://programminghistorian.org/en/support-us#institutional-partner-programme failed: https://programminghistorian.org/en/support-us exists, but the hash 'institutional-partner-programme' does not (status code 200)" +_site/posts/newsletter-year20.html,325,External link https://www.history.ac.uk/library failed (status code 403) _site/posts/newsletter-year20.html,341,External link https://twitter.com/ProgHist failed (status code 400) _site/posts/newsletter-year20.html,901,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/posts/newsletter-year20.html,911,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2021-01-05-newsletter-year20.md failed (status code 429) @@ -5921,10 +3868,10 @@ _site/posts/premio-hdh-2018.html,791,External link https://github.com/programmin _site/posts/presentando-al-nuevo-equipo-de-editores-de-contenidos-en-espanol.html,287,External link https://twitter.com/mariajoafana failed (status code 400) _site/posts/presentando-al-nuevo-equipo-de-editores-de-contenidos-en-espanol.html,289,External link https://twitter.com/victor_gayol failed (status code 400) _site/posts/presentando-al-nuevo-equipo-de-editores-de-contenidos-en-espanol.html,291,External link https://twitter.com/RojasCastroA failed (status code 400) -_site/posts/presentando-al-nuevo-equipo-de-editores-de-contenidos-en-espanol.html,293,External link https://twitter.com/proghist failed (status code 400) -_site/posts/presentando-al-nuevo-equipo-de-editores-de-contenidos-en-espanol.html,293,External link https://twitter.com/RojasCastroA failed (status code 400) _site/posts/presentando-al-nuevo-equipo-de-editores-de-contenidos-en-espanol.html,293,External link https://twitter.com/victor_gayol failed (status code 400) +_site/posts/presentando-al-nuevo-equipo-de-editores-de-contenidos-en-espanol.html,293,External link https://twitter.com/RojasCastroA failed (status code 400) _site/posts/presentando-al-nuevo-equipo-de-editores-de-contenidos-en-espanol.html,293,External link https://twitter.com/mariajoafana failed (status code 400) +_site/posts/presentando-al-nuevo-equipo-de-editores-de-contenidos-en-espanol.html,293,External link https://twitter.com/proghist failed (status code 400) _site/posts/presentando-al-nuevo-equipo-de-editores-de-contenidos-en-espanol.html,791,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/posts/presentando-al-nuevo-equipo-de-editores-de-contenidos-en-espanol.html,801,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2016-08-25-presentando-al-nuevo-equipo-de-editores-de-contenidos-en-espanol.md failed (status code 429) _site/posts/proghist-trustee-advert.html,820,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -5936,9 +3883,8 @@ _site/posts/programming-historian-india.html,1308,External link https://github.c _site/posts/programming-historian-india.html,1318,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2018-04-04-programming-historian-india.md failed (status code 429) _site/posts/programming-historian-live-london.html,788,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/posts/programming-historian-live-london.html,798,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2015-06-02-programming-historian-live-london.md failed (status code 429) -_site/posts/promoting-digital-archives.html,279,External link https://dp.la/info/2016/02/01/color-our-collections/ failed (status code 404) _site/posts/promoting-digital-archives.html,279,External link https://twitter.com/search?q=%23colorourcollections&src=typd failed (status code 400) -_site/posts/promoting-digital-archives.html,302,External link https://www.canva.com/ failed (status code 403) +_site/posts/promoting-digital-archives.html,279,External link https://dp.la/info/2016/02/01/color-our-collections/ failed (status code 404) _site/posts/promoting-digital-archives.html,309,External link https://twitter.com/search?q=%23colorourcollections&src=typd failed (status code 400) _site/posts/promoting-digital-archives.html,803,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/posts/promoting-digital-archives.html,813,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2016-10-18-promoting-digital-archives.md failed (status code 429) @@ -5954,16 +3900,15 @@ _site/posts/riva-quiroga-joshua-ortiz.html,786,External link https://github.com/ _site/posts/riva-quiroga-joshua-ortiz.html,796,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2019-03-16-riva-quiroga-joshua-ortiz.md failed (status code 429) _site/posts/roundup2017a.html,817,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/posts/roundup2017a.html,827,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2017-06-12-roundup2017a.md failed (status code 429) -_site/posts/september-newsletter.html,301,External link https://novaresearch.unl.pt/files/32228034/Ensinar_Humanidades_Digitais.pdf failed (status code 403) _site/posts/september-newsletter.html,303,External link https://ach.org/blog/2021/09/13/fall-2021-programming-historian-book-club/ failed (status code 404) -_site/posts/september-newsletter.html,321,External link https://www.fct.pt/apoios/veraocomciencia/index.phtml.pt failed (status code 404) _site/posts/september-newsletter.html,321,External link https://www.fct.pt/fct.phtml.en failed (status code 404) +_site/posts/september-newsletter.html,321,External link https://www.fct.pt/apoios/veraocomciencia/index.phtml.pt failed (status code 404) _site/posts/september-newsletter.html,1254,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/posts/september-newsletter.html,1264,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2021-09-24-september-newsletter.md failed (status code 429) _site/posts/sonic-word-clouds.html,787,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/posts/sonic-word-clouds.html,797,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2017-06-18-sonic-word-clouds.md failed (status code 429) +_site/posts/spanish-editor.html,280,External link https://github.com/programminghistorian/jekyll/issues/246 failed (status code 429) _site/posts/spanish-editor.html,287,External link https://academic.oup.com/jah/article/103/1/299/1751315 failed (status code 403) -_site/posts/spanish-editor.html,288,External link http://jitp.commons.gc.cuny.edu/review-of-the-programming-historian/ failed: got a time out (response code 301) (status code 301) _site/posts/spanish-editor.html,803,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/posts/spanish-editor.html,813,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2016-07-05-spanish-editor.md failed (status code 429) _site/posts/subject-specialist-editor.html,299,External link https://academic.oup.com/jah/article/103/1/299/1751315/The-Programming-Historian failed (status code 403) @@ -5974,6 +3919,7 @@ _site/posts/twenty-sixteen-review.html,795,External link https://github.com/prog _site/posts/two-new-PH-editors.html,786,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/posts/two-new-PH-editors.html,796,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2017-03-30-two-new-PH-editors.md failed (status code 429) _site/posts/ucl-placement-2021.html,285,External link https://twitter.com/BlondeHistorian failed (status code 400) +_site/posts/ucl-placement-2021.html,285,External link https://github.com/programminghistorian/jekyll/issues/2072 failed (status code 429) _site/posts/ucl-placement-2021.html,797,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/posts/ucl-placement-2021.html,807,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2021-06-26-ucl-placement-2021.md failed (status code 429) _site/posts/vote-dh-award.html,783,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -5982,19 +3928,23 @@ _site/posts/welcome-martin-grandjean.html,777,External link https://github.com/p _site/posts/welcome-martin-grandjean.html,787,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2019-09-26-welcome-martin-grandjean.md failed (status code 429) _site/posts/welcome-mc-boucher.html,779,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/posts/welcome-mc-boucher.html,789,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2020-02-07-welcome-mc-boucher.md failed (status code 429) -_site/posts/welcome-ph-fr.html,283,External link https://github.com/programminghistorian/ph-submissions/issues?q=is:issue+is:open+label:French failed (status code 429) _site/posts/welcome-ph-fr.html,283,External link https://github.com/programminghistorian/jekyll/issues/850 failed (status code 429) +_site/posts/welcome-ph-fr.html,283,External link https://github.com/programminghistorian/ph-submissions/issues?q=is:issue+is:open+label:French failed (status code 404) +_site/posts/welcome-ph-fr.html,285,External link https://dsharp.library.cmu.edu/ failed with something very wrong. +_site/posts/welcome-ph-fr.html,289,External link https://archives.mundaneum.org/fr/versions-digitalisees/schema-de-paul-otlet-documentation-et-telecommunication failed with something very wrong. _site/posts/welcome-ph-fr.html,783,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/posts/welcome-ph-fr.html,793,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2019-04-08-welcome-ph-fr.md failed (status code 429) _site/posts/welcome-to-ph2.html,820,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/posts/welcome-to-ph2.html,830,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2012-06-27-welcome-to-ph2.md failed (status code 429) _site/posts/welcome-zoe-leblanc.html,785,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/posts/welcome-zoe-leblanc.html,795,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/_posts/2018-11-09-welcome-zoe-leblanc.md failed (status code 429) +_site/pt/apoiadores.html,282,External link https://www.sas.ac.uk/ failed (status code 403) _site/pt/apoiadores.html,294,External link https://www.tilburguniversity.edu/ failed (status code 403) +_site/pt/apoiadores.html,306,External link https://www.history.ac.uk/library-digital failed (status code 403) _site/pt/apoiadores.html,336,"External link https://www.sussex.ac.uk/collaborate/business/public-funds#:~:text=Impact%20accelerator%20funds,-From%20law%20to&text=The%20ESRC%20and%20AHRC%20Impact,businesses%20through%20to%20large%20companies failed: https://www.sussex.ac.uk/collaborate/business/public-funds exists, but the hash ':~:text=Impact%20accelerator%20funds,-From%20law%20to&text=The%20ESRC%20and%20AHRC%20Impact,businesses%20through%20to%20large%20companies' does not (status code 200)" -_site/pt/apoiadores.html,337,External link https://www.thebritishacademy.ac.uk/projects/writing-workshops-2018-digital-humanities/ failed (status code 403) _site/pt/apoiadores.html,437,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/pt/apoiadores.html,447,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/pt/apoiadores.md failed (status code 429) +_site/pt/contribua.html,292,External link https://www.worldcat.org/title/programming-historian-en-espanol/oclc/1061292935&referer=brief_results failed (status code 403) _site/pt/contribua.html,347,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/pt/contribua.html,357,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/pt/contribua.md failed (status code 429) _site/pt/directrizes-autor.html,267,"External link https://docs.google.com/spreadsheets/d/1vrvZTygZLfQRoQildD667Xcgzhf_reQC8Nq4OD-BRIA/edit#gid=0 failed: https://docs.google.com/spreadsheets/d/1vrvZTygZLfQRoQildD667Xcgzhf_reQC8Nq4OD-BRIA/edit exists, but the hash 'gid=0' does not (status code 200)" @@ -6012,33 +3962,33 @@ _site/pt/directrizes-tradutor.html,368,External link https://github.com/programm _site/pt/doacoes.html,265,External link https://www.patreon.com/join/theprogramminghistorian failed (status code 403) _site/pt/doacoes.html,322,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/pt/doacoes.html,332,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/pt/doacoes.md failed (status code 429) -_site/pt/equipe.html,310,External link http://twitter.com/maxcarlons failed (status code 400) -_site/pt/equipe.html,512,External link http://twitter.com/cosovschi failed (status code 400) +_site/pt/equipe.html,310,External link https://twitter.com/maxcarlons failed (status code 400) +_site/pt/equipe.html,512,External link https://twitter.com/cosovschi failed (status code 400) _site/pt/equipe.html,1272,External link https://www.nabeelsiddiqui.net/ failed with something very wrong. -_site/pt/equipe.html,1278,External link http://twitter.com/nabsiddiqui failed (status code 400) -_site/pt/equipe.html,1631,External link http://twitter.com/giulia_taurino failed (status code 400) -_site/pt/equipe.html,1810,External link http://twitter.com/alexwermercolan failed (status code 400) -_site/pt/equipe.html,2059,External link http://www.mariajoseafanador.com failed: Moved Permanently (status code 301) -_site/pt/equipe.html,2065,External link http://twitter.com/mariajoafana failed (status code 400) -_site/pt/equipe.html,2534,External link http://twitter.com/IsaGribomont failed (status code 400) -_site/pt/equipe.html,2745,External link http://twitter.com/espejolento failed (status code 400) -_site/pt/equipe.html,3036,External link http://twitter.com/jenniferisve failed (status code 400) -_site/pt/equipe.html,3361,External link http://twitter.com/enetreseles failed (status code 400) -_site/pt/equipe.html,3568,External link http://twitter.com/jgob failed (status code 400) -_site/pt/equipe.html,3863,External link http://twitter.com/rivaquiroga failed (status code 400) -_site/pt/equipe.html,4804,External link http://twitter.com/superHH failed (status code 400) -_site/pt/equipe.html,5190,External link http://twitter.com/emilienschultz failed (status code 400) -_site/pt/equipe.html,5317,External link http://twitter.com/davvalent failed (status code 400) -_site/pt/equipe.html,5842,External link http://twitter.com/danielalvesfcsh failed (status code 400) -_site/pt/equipe.html,6107,External link http://twitter.com/ericbrasiln failed (status code 400) -_site/pt/equipe.html,6543,External link http://twitter.com/jimmy_medeiros failed (status code 400) -_site/pt/equipe.html,7027,External link http://twitter.com/araceletorres failed (status code 400) -_site/pt/equipe.html,7286,External link http://twitter.com/j_w_baker failed (status code 400) -_site/pt/equipe.html,7727,External link http://twitter.com/Adam_Crymble failed (status code 400) -_site/pt/equipe.html,8258,External link http://twitter.com/jenniferisve failed (status code 400) -_site/pt/equipe.html,8589,External link http://twitter.com/rivaquiroga failed (status code 400) -_site/pt/equipe.html,8878,External link http://twitter.com/amsichani failed (status code 400) -_site/pt/equipe.html,9221,External link http://twitter.com/AnisaHawes failed (status code 400) +_site/pt/equipe.html,1278,External link https://twitter.com/nabsiddiqui failed (status code 400) +_site/pt/equipe.html,1631,External link https://twitter.com/giulia_taurino failed (status code 400) +_site/pt/equipe.html,1810,External link https://twitter.com/alexwermercolan failed (status code 400) +_site/pt/equipe.html,2059,External link https://www.mariajoseafanador.com failed with something very wrong. +_site/pt/equipe.html,2065,External link https://twitter.com/mariajoafana failed (status code 400) +_site/pt/equipe.html,2534,External link https://twitter.com/IsaGribomont failed (status code 400) +_site/pt/equipe.html,2745,External link https://twitter.com/espejolento failed (status code 400) +_site/pt/equipe.html,3036,External link https://twitter.com/jenniferisve failed (status code 400) +_site/pt/equipe.html,3361,External link https://twitter.com/enetreseles failed (status code 400) +_site/pt/equipe.html,3568,External link https://twitter.com/jgob failed (status code 400) +_site/pt/equipe.html,3863,External link https://twitter.com/rivaquiroga failed (status code 400) +_site/pt/equipe.html,4804,External link https://twitter.com/superHH failed (status code 400) +_site/pt/equipe.html,5190,External link https://twitter.com/emilienschultz failed (status code 400) +_site/pt/equipe.html,5317,External link https://twitter.com/davvalent failed (status code 400) +_site/pt/equipe.html,5842,External link https://twitter.com/danielalvesfcsh failed (status code 400) +_site/pt/equipe.html,6107,External link https://twitter.com/ericbrasiln failed (status code 400) +_site/pt/equipe.html,6543,External link https://twitter.com/jimmy_medeiros failed (status code 400) +_site/pt/equipe.html,7027,External link https://twitter.com/araceletorres failed (status code 400) +_site/pt/equipe.html,7286,External link https://twitter.com/j_w_baker failed (status code 400) +_site/pt/equipe.html,7727,External link https://twitter.com/Adam_Crymble failed (status code 400) +_site/pt/equipe.html,8258,External link https://twitter.com/jenniferisve failed (status code 400) +_site/pt/equipe.html,8589,External link https://twitter.com/rivaquiroga failed (status code 400) +_site/pt/equipe.html,8878,External link https://twitter.com/amsichani failed (status code 400) +_site/pt/equipe.html,9221,External link https://twitter.com/AnisaHawes failed (status code 400) _site/pt/equipe.html,10041,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/pt/equipe.html,10051,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/pt/equipe.md failed (status code 429) _site/pt/eventos.html,296,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) @@ -6049,25 +3999,33 @@ _site/pt/jisc-tna-parceria.html,336,External link https://github.com/programming _site/pt/jisc-tna-parceria.html,346,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/pt/jisc-tna-parceria.md failed (status code 429) _site/pt/licoes-politica-remocao.html,357,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/pt/licoes-politica-remocao.html,367,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/pt/licoes-politica-remocao.md failed (status code 429) +_site/pt/licoes/HTML-lista-palavras-1.html,339,External link https://github.com/programminghistorian/ph-submissions/issues/442 failed (status code 429) _site/pt/licoes/HTML-lista-palavras-1.html,1648,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/pt/licoes/HTML-lista-palavras-1.html,1658,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/pt/licoes/HTML-lista-palavras-1.md failed (status code 429) +_site/pt/licoes/HTML-lista-palavras-2.html,339,External link https://github.com/programminghistorian/ph-submissions/issues/443 failed (status code 429) _site/pt/licoes/HTML-lista-palavras-2.html,1752,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/pt/licoes/HTML-lista-palavras-2.html,1762,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/pt/licoes/HTML-lista-palavras-2.md failed (status code 429) +_site/pt/licoes/algoritmos-agrupamento-scikit-learn-python.html,353,External link https://github.com/programminghistorian/ph-submissions/issues/578 failed (status code 429) _site/pt/licoes/algoritmos-agrupamento-scikit-learn-python.html,2693,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/pt/licoes/algoritmos-agrupamento-scikit-learn-python.html,2703,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/pt/licoes/algoritmos-agrupamento-scikit-learn-python.md failed (status code 429) +_site/pt/licoes/analise-correspondencia-pesquisa-historica-R.html,353,External link https://github.com/programminghistorian/ph-submissions/issues/422 failed (status code 429) +_site/pt/licoes/analise-correspondencia-pesquisa-historica-R.html,703,External link https://factominer.free.fr/ failed: got a time out (response code 0) (status code 0) _site/pt/licoes/analise-correspondencia-pesquisa-historica-R.html,1709,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/pt/licoes/analise-correspondencia-pesquisa-historica-R.html,1719,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/pt/licoes/analise-correspondencia-pesquisa-historica-R.md failed (status code 429) +_site/pt/licoes/analise-sentimento-R-syuzhet.html,337,External link https://github.com/programminghistorian/ph-submissions/issues/467 failed (status code 429) +_site/pt/licoes/analise-sentimento-R-syuzhet.html,572,External link https://myrabr.com/blog/analise-de-sentimento/ failed: got a time out (response code 0) (status code 0) _site/pt/licoes/analise-sentimento-R-syuzhet.html,1598,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/pt/licoes/analise-sentimento-R-syuzhet.html,1608,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/pt/licoes/analise-sentimento-R-syuzhet.md failed (status code 429) -_site/pt/licoes/analise-sentimento-exploracao-dados.html,659,External link https://github.com/cjhutto/vaderSentiment/blob/master/vaderSentiment/vaderSentiment.py failed (status code 429) +_site/pt/licoes/analise-sentimento-exploracao-dados.html,337,External link https://github.com/programminghistorian/ph-submissions/issues/375 failed (status code 429) _site/pt/licoes/analise-sentimento-exploracao-dados.html,1478,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/pt/licoes/analise-sentimento-exploracao-dados.html,1488,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/pt/licoes/analise-sentimento-exploracao-dados.md failed (status code 429) +_site/pt/licoes/aplicacao-web-interativa-r-shiny-leaflet.html,337,External link https://github.com/programminghistorian/ph-submissions/issues/513 failed (status code 429) _site/pt/licoes/aplicacao-web-interativa-r-shiny-leaflet.html,1680,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/pt/licoes/aplicacao-web-interativa-r-shiny-leaflet.html,1690,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/pt/licoes/aplicacao-web-interativa-r-shiny-leaflet.md failed (status code 429) -_site/pt/licoes/autoria-sustentavel-texto-simples-pandoc-markdown.html,866,External link https://stackoverflow.com/questions/tagged/pandoc failed (status code 403) _site/pt/licoes/autoria-sustentavel-texto-simples-pandoc-markdown.html,866,"External link https://groups.google.com/forum/#!forum/pandoc-discuss failed: https://groups.google.com/forum/ exists, but the hash '!forum/pandoc-discuss' does not (status code 200)" +_site/pt/licoes/autoria-sustentavel-texto-simples-pandoc-markdown.html,866,External link https://stackoverflow.com/questions/tagged/pandoc failed (status code 403) +_site/pt/licoes/autoria-sustentavel-texto-simples-pandoc-markdown.html,868,External link https://www.draftin.com/ failed with something very wrong. _site/pt/licoes/autoria-sustentavel-texto-simples-pandoc-markdown.html,868,External link https://www.authorea.com/ failed (status code 403) -_site/pt/licoes/autoria-sustentavel-texto-simples-pandoc-markdown.html,868,External link http://www.draftin.com/ failed: Service Unavailable (status code 503) _site/pt/licoes/autoria-sustentavel-texto-simples-pandoc-markdown.html,896,External link https://github.com/programminghistorian/jekyll/issues/46#issue-45559983 failed (status code 429) _site/pt/licoes/autoria-sustentavel-texto-simples-pandoc-markdown.html,1873,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/pt/licoes/autoria-sustentavel-texto-simples-pandoc-markdown.html,1883,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/pt/licoes/autoria-sustentavel-texto-simples-pandoc-markdown.md failed (status code 429) @@ -6075,7 +4033,6 @@ _site/pt/licoes/camadas-vetoriais-qgis.html,341,External link https://github.com _site/pt/licoes/camadas-vetoriais-qgis.html,2502,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/pt/licoes/camadas-vetoriais-qgis.html,2512,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/pt/licoes/camadas-vetoriais-qgis.md failed (status code 429) _site/pt/licoes/contagem-mineracao-dados-investigacao-unix.html,339,External link https://github.com/programminghistorian/ph-submissions/issues/440 failed (status code 429) -_site/pt/licoes/contagem-mineracao-dados-investigacao-unix.html,558,External link https://www.worldcat.org/title/unix-and-linux/oclc/308171076&referer=brief_results failed (status code 403) _site/pt/licoes/contagem-mineracao-dados-investigacao-unix.html,1621,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/pt/licoes/contagem-mineracao-dados-investigacao-unix.html,1631,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/pt/licoes/contagem-mineracao-dados-investigacao-unix.md failed (status code 429) _site/pt/licoes/contar-frequencias-palavras-python.html,339,External link https://github.com/programminghistorian/ph-submissions/issues/461 failed (status code 429) @@ -6091,13 +4048,15 @@ _site/pt/licoes/download-automatico-wget.html,337,External link https://github.c _site/pt/licoes/download-automatico-wget.html,1362,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/pt/licoes/download-automatico-wget.html,1372,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/pt/licoes/download-automatico-wget.md failed (status code 429) _site/pt/licoes/download-multiplos-registros-query-strings.html,337,External link https://github.com/programminghistorian/ph-submissions/issues/465 failed (status code 429) -_site/pt/licoes/download-multiplos-registros-query-strings.html,1294,External link http://stackoverflow.com/questions/273192/python-best-way-to-create-directory-if-it-doesnt-exist-for-file-write failed (status code 403) +_site/pt/licoes/download-multiplos-registros-query-strings.html,1294,External link https://stackoverflow.com/questions/273192/python-best-way-to-create-directory-if-it-doesnt-exist-for-file-write failed (status code 403) _site/pt/licoes/download-multiplos-registros-query-strings.html,1832,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/pt/licoes/download-multiplos-registros-query-strings.html,1842,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/pt/licoes/download-multiplos-registros-query-strings.md failed (status code 429) _site/pt/licoes/download-paginas-web-python.html,339,External link https://github.com/programminghistorian/ph-submissions/issues/360 failed (status code 429) _site/pt/licoes/download-paginas-web-python.html,1670,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/pt/licoes/download-paginas-web-python.html,1680,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/pt/licoes/download-paginas-web-python.md failed (status code 429) _site/pt/licoes/explorar-analisar-dados-rede-python.html,343,External link https://github.com/programminghistorian/ph-submissions/issues/446 failed (status code 429) +_site/pt/licoes/explorar-analisar-dados-rede-python.html,621,External link https://www.sixdegreesoffrancisbacon.com failed with something very wrong. +_site/pt/licoes/explorar-analisar-dados-rede-python.html,897,External link https://sixdegreesoffrancisbacon.com/ failed with something very wrong. _site/pt/licoes/explorar-analisar-dados-rede-python.html,3009,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/pt/licoes/explorar-analisar-dados-rede-python.html,3019,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/pt/licoes/explorar-analisar-dados-rede-python.md failed (status code 429) _site/pt/licoes/extrair-paginas-ilustradas-com-python.html,337,External link https://github.com/programminghistorian/ph-submissions/issues/447 failed (status code 429) @@ -6111,14 +4070,11 @@ _site/pt/licoes/geocodificando-qgis.html,337,External link https://github.com/pr _site/pt/licoes/geocodificando-qgis.html,1547,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/pt/licoes/geocodificando-qgis.html,1557,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/pt/licoes/geocodificando-qgis.md failed (status code 429) _site/pt/licoes/georreferenciamento-qgis.html,341,External link https://github.com/programminghistorian/ph-submissions/issues/434 failed (status code 429) -_site/pt/licoes/georreferenciamento-qgis.html,612,External link http://www.gov.pe.ca/gis/license_agreement.php3?name=lot_town&file_format=SHP failed with something very wrong. +_site/pt/licoes/georreferenciamento-qgis.html,612,External link https://www.gov.pe.ca/gis/license_agreement.php3?name=lot_town&file_format=SHP failed: Found (status code 302) _site/pt/licoes/georreferenciamento-qgis.html,725,External link https://islandimagined.ca/islandora/object/imagined:208687 failed (status code 403) _site/pt/licoes/georreferenciamento-qgis.html,2485,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/pt/licoes/georreferenciamento-qgis.html,2495,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/pt/licoes/georreferenciamento-qgis.md failed (status code 429) _site/pt/licoes/git-ferramenta-metodologica-projetos-historia-1.html,333,External link https://github.com/programminghistorian/ph-submissions/issues/577 failed (status code 429) -_site/pt/licoes/git-ferramenta-metodologica-projetos-historia-1.html,625,External link https://www.canva.com/ failed (status code 403) -_site/pt/licoes/git-ferramenta-metodologica-projetos-historia-1.html,987,External link https://www.canva.com/ failed (status code 403) -_site/pt/licoes/git-ferramenta-metodologica-projetos-historia-1.html,1121,External link https://www.canva.com/ failed (status code 403) _site/pt/licoes/git-ferramenta-metodologica-projetos-historia-1.html,1448,External link https://doi.org/10.1080/00031305.2017.1399928 failed (status code 403) _site/pt/licoes/git-ferramenta-metodologica-projetos-historia-1.html,2022,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/pt/licoes/git-ferramenta-metodologica-projetos-historia-1.html,2032,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/pt/licoes/git-ferramenta-metodologica-projetos-historia-1.md failed (status code 429) @@ -6141,21 +4097,23 @@ _site/pt/licoes/introducao-ao-markdown.html,337,External link https://github.com _site/pt/licoes/introducao-ao-markdown.html,1412,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/pt/licoes/introducao-ao-markdown.html,1422,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/pt/licoes/introducao-ao-markdown.md failed (status code 429) _site/pt/licoes/introducao-codificacao-textos-tei-1.html,337,External link https://github.com/programminghistorian/ph-submissions/issues/470 failed (status code 429) -_site/pt/licoes/introducao-codificacao-textos-tei-1.html,667,External link https://github.com/rogalmic/vscode-xml-complete failed (status code 429) _site/pt/licoes/introducao-codificacao-textos-tei-1.html,1577,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/pt/licoes/introducao-codificacao-textos-tei-1.html,1587,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/pt/licoes/introducao-codificacao-textos-tei-1.md failed (status code 429) _site/pt/licoes/introducao-dados-abertos-conectados.html,337,External link https://github.com/programminghistorian/ph-submissions/issues/428 failed (status code 429) +_site/pt/licoes/introducao-dados-abertos-conectados.html,575,External link https://linkeddatacatalog.dws.informatik.uni-mannheim.de/state/ failed with something very wrong. _site/pt/licoes/introducao-dados-abertos-conectados.html,717,"External link https://gtr.ukri.org/projects?ref=AH/N003446/1#/tabOverview failed: https://gtr.ukri.org/projects?ref=AH/N003446/1 exists, but the hash '/tabOverview' does not (status code 200)" -_site/pt/licoes/introducao-dados-abertos-conectados.html,750,External link http://semanticweb.org/wiki/Main_Page.html failed: got a time out (response code 0) (status code 0) +_site/pt/licoes/introducao-dados-abertos-conectados.html,750,External link https://semanticweb.org/wiki/Main_Page.html failed: got a time out (response code 0) (status code 0) _site/pt/licoes/introducao-dados-abertos-conectados.html,905,"External link https://pt.wikipedia.org/wiki/%C3%81rvore_(estrutura_de_dados)#Terminologia failed: https://pt.wikipedia.org/wiki/%C3%81rvore_(estrutura_de_dados) exists, but the hash 'Terminologia' does not (status code 200)" -_site/pt/licoes/introducao-dados-abertos-conectados.html,1081,External link http://linkeddata.org/guides-and-tutorials failed: Internal Server Error (status code 500) +_site/pt/licoes/introducao-dados-abertos-conectados.html,905,External link https://www.history.ac.uk/research/digital-history failed (status code 403) +_site/pt/licoes/introducao-dados-abertos-conectados.html,1081,External link https://linkeddata.org/guides-and-tutorials failed with something very wrong. +_site/pt/licoes/introducao-dados-abertos-conectados.html,1083,External link https://linkeddatacatalog.dws.informatik.uni-mannheim.de/state/ failed with something very wrong. _site/pt/licoes/introducao-dados-abertos-conectados.html,1089,"External link https://gtr.ukri.org/projects?ref=AH/N003446/1#/tabOverview failed: https://gtr.ukri.org/projects?ref=AH/N003446/1 exists, but the hash '/tabOverview' does not (status code 200)" _site/pt/licoes/introducao-dados-abertos-conectados.html,1626,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/pt/licoes/introducao-dados-abertos-conectados.html,1636,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/pt/licoes/introducao-dados-abertos-conectados.md failed (status code 429) _site/pt/licoes/introducao-estilometria-python.html,337,External link https://github.com/programminghistorian/ph-submissions/issues/445 failed (status code 429) _site/pt/licoes/introducao-estilometria-python.html,1437,External link https://doi.org/10.1002/asi.21001 failed (status code 403) -_site/pt/licoes/introducao-estilometria-python.html,1440,External link https://doi.org/10.1093/llc/fqx017 failed (status code 403) _site/pt/licoes/introducao-estilometria-python.html,1440,External link https://doi.org/10.1093/llc/fqv023 failed (status code 403) +_site/pt/licoes/introducao-estilometria-python.html,1440,External link https://doi.org/10.1093/llc/fqx017 failed (status code 403) _site/pt/licoes/introducao-estilometria-python.html,1479,External link https://doi.org/10.1093/llc/fqi067 failed (status code 403) _site/pt/licoes/introducao-estilometria-python.html,1488,External link https://doi.org/10.1002/asi.22954 failed (status code 403) _site/pt/licoes/introducao-estilometria-python.html,1500,External link https://doi.org/10.1002/asi.v60:1 failed (status code 403) @@ -6186,6 +4144,7 @@ _site/pt/licoes/limpar-dados-openrefine.html,601,External link https://powerhous _site/pt/licoes/limpar-dados-openrefine.html,2123,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/pt/licoes/limpar-dados-openrefine.html,2133,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/pt/licoes/limpar-dados-openrefine.md failed (status code 429) _site/pt/licoes/manipulacao-transformacao-dados-r.html,337,External link https://github.com/programminghistorian/ph-submissions/issues/397 failed (status code 429) +_site/pt/licoes/manipulacao-transformacao-dados-r.html,1018,External link https://www.ggplot2.org failed with something very wrong. _site/pt/licoes/manipulacao-transformacao-dados-r.html,1565,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/pt/licoes/manipulacao-transformacao-dados-r.html,1575,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/pt/licoes/manipulacao-transformacao-dados-r.md failed (status code 429) _site/pt/licoes/manipular-strings-python.html,339,External link https://github.com/programminghistorian/ph-submissions/issues/403 failed (status code 429) @@ -6207,12 +4166,10 @@ _site/pt/licoes/preservar-os-seus-dados-de-investigacao.html,337,External link h _site/pt/licoes/preservar-os-seus-dados-de-investigacao.html,618,External link https://twitter.com/Girlinthe/status/387166944094199809 failed (status code 400) _site/pt/licoes/preservar-os-seus-dados-de-investigacao.html,1504,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/pt/licoes/preservar-os-seus-dados-de-investigacao.html,1514,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/pt/licoes/preservar-os-seus-dados-de-investigacao.md failed (status code 429) -_site/pt/licoes/processamento-basico-texto-r.html,339,External link https://github.com/programminghistorian/ph-submissions/issues/381 failed (status code 429) _site/pt/licoes/processamento-basico-texto-r.html,562,"External link https://www.rstudio.com/products/rstudio/#Desktop failed: https://www.rstudio.com/products/rstudio/ exists, but the hash 'Desktop' does not (status code 200)" _site/pt/licoes/processamento-basico-texto-r.html,2390,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/pt/licoes/processamento-basico-texto-r.html,2400,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/pt/licoes/processamento-basico-texto-r.md failed (status code 429) _site/pt/licoes/qgis-camadas.html,341,External link https://github.com/programminghistorian/ph-submissions/issues/566 failed (status code 429) -_site/pt/licoes/qgis-camadas.html,607,External link http://www.gov.pe.ca/gis/download.php3?name=coastline&file_format=SHP failed with something very wrong. _site/pt/licoes/qgis-camadas.html,2719,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/pt/licoes/qgis-camadas.html,2729,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/pt/licoes/qgis-camadas.md failed (status code 429) _site/pt/licoes/reutilizacao-codigo-modularidade-python.html,339,External link https://github.com/programminghistorian/ph-submissions/issues/406 failed (status code 429) @@ -6223,26 +4180,23 @@ _site/pt/licoes/saida-dados-ficheiro-html-python.html,339,External link https:// _site/pt/licoes/saida-dados-ficheiro-html-python.html,1701,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/pt/licoes/saida-dados-ficheiro-html-python.html,1711,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/pt/licoes/saida-dados-ficheiro-html-python.md failed (status code 429) _site/pt/licoes/som-dados-sonificacao-historiadores.html,337,External link https://github.com/programminghistorian/ph-submissions/issues/429 failed (status code 429) -_site/pt/licoes/som-dados-sonificacao-historiadores.html,564,External link http://blog.taracopplestone.co.uk/making-things-photobashing-as-archaeological-remediation/ failed with something very wrong. -_site/pt/licoes/som-dados-sonificacao-historiadores.html,591,External link http://www.icad.org/Proceedings/2008/Hermann2008.pdf failed with something very wrong. -_site/pt/licoes/som-dados-sonificacao-historiadores.html,606,External link http://www.icad.org/Proceedings/2008/Hermann2008.pdf failed with something very wrong. -_site/pt/licoes/som-dados-sonificacao-historiadores.html,672,External link http://www.lynda.com/GarageBand-tutorials/Importing-audio-tracks/156620/164050-4.html failed (status code 404) -_site/pt/licoes/som-dados-sonificacao-historiadores.html,882,External link http://www.electronics.dit.ie/staff/tscarff/Music_technology/midi/midi_note_numbers_for_octaves.html failed with something very wrong. -_site/pt/licoes/som-dados-sonificacao-historiadores.html,1045,External link http://puffin.creighton.edu/jesuit/relations/ failed with something very wrong. -_site/pt/licoes/som-dados-sonificacao-historiadores.html,1132,External link http://www.jstor.org/stable/734136 failed (status code 403) -_site/pt/licoes/som-dados-sonificacao-historiadores.html,1134,External link http://www.icad.org/Proceedings/2008/Hermann2008.pdf failed with something very wrong. -_site/pt/licoes/som-dados-sonificacao-historiadores.html,1136,External link http://motherboard.vice.com/read/the-strange-acoustic-phenomenon-behind-these-wacked-out-versions-of-pop-songs failed with something very wrong. +_site/pt/licoes/som-dados-sonificacao-historiadores.html,564,External link https://blog.taracopplestone.co.uk/making-things-photobashing-as-archaeological-remediation/ failed with something very wrong. +_site/pt/licoes/som-dados-sonificacao-historiadores.html,591,External link https://www.icad.org/Proceedings/2008/Hermann2008.pdf failed with something very wrong. +_site/pt/licoes/som-dados-sonificacao-historiadores.html,606,External link https://www.icad.org/Proceedings/2008/Hermann2008.pdf failed with something very wrong. +_site/pt/licoes/som-dados-sonificacao-historiadores.html,672,External link https://www.lynda.com/GarageBand-tutorials/Importing-audio-tracks/156620/164050-4.html failed (status code 404) +_site/pt/licoes/som-dados-sonificacao-historiadores.html,882,External link https://www.electronics.dit.ie/staff/tscarff/Music_technology/midi/midi_note_numbers_for_octaves.html failed with something very wrong. +_site/pt/licoes/som-dados-sonificacao-historiadores.html,1045,External link https://puffin.creighton.edu/jesuit/relations/ failed with something very wrong. +_site/pt/licoes/som-dados-sonificacao-historiadores.html,1109,External link https://www.lilypond.org/ failed with something very wrong. +_site/pt/licoes/som-dados-sonificacao-historiadores.html,1132,External link https://www.jstor.org/stable/734136 failed (status code 403) +_site/pt/licoes/som-dados-sonificacao-historiadores.html,1134,External link https://www.icad.org/Proceedings/2008/Hermann2008.pdf failed with something very wrong. +_site/pt/licoes/som-dados-sonificacao-historiadores.html,1136,External link https://motherboard.vice.com/read/the-strange-acoustic-phenomenon-behind-these-wacked-out-versions-of-pop-songs failed with something very wrong. _site/pt/licoes/som-dados-sonificacao-historiadores.html,1675,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/pt/licoes/som-dados-sonificacao-historiadores.html,1685,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/pt/licoes/som-dados-sonificacao-historiadores.md failed (status code 429) _site/pt/licoes/sumarizacao-narrativas-web-python.html,335,External link https://github.com/programminghistorian/ph-submissions/issues/420 failed (status code 429) _site/pt/licoes/sumarizacao-narrativas-web-python.html,523,External link https://dl.acm.org/doi/10.1145/1145581.1145623 failed (status code 403) -_site/pt/licoes/sumarizacao-narrativas-web-python.html,543,External link https://github.com/arquivo/pwa-technologies/wiki/Arquivo.pt-API failed (status code 429) -_site/pt/licoes/sumarizacao-narrativas-web-python.html,544,External link https://github.com/LIAAD/TemporalSummarizationFramework failed (status code 429) _site/pt/licoes/sumarizacao-narrativas-web-python.html,549,External link https://www.arquivo.pt failed with something very wrong. -_site/pt/licoes/sumarizacao-narrativas-web-python.html,555,External link https://arquivo.pt/api failed (status code 429) -_site/pt/licoes/sumarizacao-narrativas-web-python.html,559,External link https://github.com/arquivo/ failed (status code 429) _site/pt/licoes/sumarizacao-narrativas-web-python.html,570,External link https://www.arquivo.pt failed with something very wrong. -_site/pt/licoes/sumarizacao-narrativas-web-python.html,597,External link https://github.com/arquivo/pwa-technologies/wiki/Arquivo.pt-API failed (status code 429) +_site/pt/licoes/sumarizacao-narrativas-web-python.html,822,External link https://yake.inesctec.pt failed with something very wrong. _site/pt/licoes/sumarizacao-narrativas-web-python.html,1987,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/pt/licoes/sumarizacao-narrativas-web-python.html,1997,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/pt/licoes/sumarizacao-narrativas-web-python.md failed (status code 429) _site/pt/licoes/trabalhando-ficheiros-texto-python.html,339,External link https://github.com/programminghistorian/ph-submissions/issues/317 failed (status code 429) @@ -6258,10 +4212,8 @@ _site/pt/licoes/visualizacao-animacao-tabelas-historicas-R.html,1514,External li _site/pt/licoes/visualizacao-basica-dados-tabulares-r.html,333,External link https://github.com/programminghistorian/ph-submissions/issues/624 failed (status code 429) _site/pt/licoes/visualizacao-basica-dados-tabulares-r.html,1562,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/pt/licoes/visualizacao-basica-dados-tabulares-r.html,1572,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/pt/licoes/visualizacao-basica-dados-tabulares-r.md failed (status code 429) -_site/pt/pesquisa.html,263,External link http://jah.oxfordjournals.org/content/103/1/299.2.full failed (status code 403) -_site/pt/pesquisa.html,264,External link http://jitp.commons.gc.cuny.edu/review-of-the-programming-historian/ failed: got a time out (response code 301) (status code 301) -_site/pt/pesquisa.html,279,External link https://novaresearch.unl.pt/files/32228034/Ensinar_Humanidades_Digitais.pdf failed (status code 403) -_site/pt/pesquisa.html,282,External link http://www.tandfonline.com/doi/full/10.1080/13555502.2017.1301179 failed (status code 403) +_site/pt/pesquisa.html,263,External link https://jah.oxfordjournals.org/content/103/1/299.2.full failed (status code 403) +_site/pt/pesquisa.html,282,External link https://www.tandfonline.com/doi/full/10.1080/13555502.2017.1301179 failed (status code 403) _site/pt/pesquisa.html,293,"External link https://zenodo.org/record/3813763#.XvoVqShKhPY failed: https://zenodo.org/record/3813763 exists, but the hash '.XvoVqShKhPY' does not (status code 200)" _site/pt/pesquisa.html,325,External link https://www.history.ac.uk/our-century/centenary-events/training-teacher-giving-your-first-digital-history-workshop failed (status code 403) _site/pt/pesquisa.html,358,External link https://www.caurj.gov.br/seminario-solare-reune-desenvolvedores-internacionais-de-software-livre-para-arquitetura-e-urbanismo/ failed (status code 403) @@ -6270,23 +4222,25 @@ _site/pt/pesquisa.html,371,External link https://dcdcconference.com/ failed with _site/pt/pesquisa.html,382,External link https://openpublishingfest.org/calendar.html#event-69/ failed: got a time out (response code 0) (status code 0) _site/pt/pesquisa.html,386,External link https://2021.dhbenelux.org/schedule/ failed with something very wrong. _site/pt/pesquisa.html,388,"External link https://msuglobaldh.org/abstracts/#programming-historian failed: https://msuglobaldh.org/abstracts/ exists, but the hash 'programming-historian' does not (status code 200)" +_site/pt/pesquisa.html,390,External link https://ixa2.si.ehu.eus/intele/?q=webinars failed with something very wrong. _site/pt/pesquisa.html,477,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/pt/pesquisa.html,487,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/pt/pesquisa.md failed (status code 429) _site/pt/politica-de-privacidade.html,347,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/pt/politica-de-privacidade.html,357,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/pt/politica-de-privacidade.md failed (status code 429) -_site/pt/ppi.html,271,External link https://www.oecd.org/en/topics/sub-issues/oda-eligibility-and-conditions/dac-list-of-oda-recipients.html failed (status code 403) _site/pt/ppi.html,420,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/pt/ppi.html,430,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/pt/ppi.md failed (status code 429) _site/pt/reportar-um-erro.html,256,External link https://github.com/orgs/programminghistorian/projects/6 failed (status code 404) _site/pt/reportar-um-erro.html,333,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/pt/reportar-um-erro.html,343,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/pt/reportar-um-erro.md failed (status code 429) +_site/pt/sobre.html,266,External link https://dhawards.org/dhawards2022/results/ failed with something very wrong. +_site/pt/sobre.html,266,External link https://dhawards.org/dhawards2016/results/ failed with something very wrong. _site/pt/sobre.html,324,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/pt/sobre.html,334,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/pt/sobre.md failed (status code 429) _site/pt/vagas.html,292,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/pt/vagas.html,302,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/pt/vagas.md failed (status code 429) -_site/translation-concordance.html,3569,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) -_site/translation-concordance.html,3579,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/translation-concordance.md failed (status code 429) -_site/troubleshooting.html,392,External link http://www.diveintopython.net failed (status code 403) +_site/translation-concordance.html,5641,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) +_site/translation-concordance.html,5651,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/translation-concordance.md failed (status code 429) +_site/troubleshooting.html,392,External link https://www.diveintopython.net failed (status code 403) _site/troubleshooting.html,452,External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) _site/troubleshooting.html,462,External link https://github.com/programminghistorian/jekyll/commits/gh-pages/troubleshooting.md failed (status code 429) _site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html,20,"internally linking to css/screen.css, which does not exist" @@ -6298,22 +4252,22 @@ _site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html,63,"internally l _site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html,64,"internally linking to static/History.jsp, which does not exist" _site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html,66,"internally linking to static/Project.jsp, which does not exist" _site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html,68,"internally linking to static/Contact.jsp, which does not exist" -_site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html,81,"internally linking to images.jsp?doc=178006280087, which does not exist" -_site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html,81,"internally linking to static/Verdicts.jsp#guilty, which does not exist" -_site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html,81,"internally linking to images.jsp?doc=178006280089, which does not exist" _site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html,81,"internally linking to static/Punishment.jsp#death, which does not exist" +_site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html,81,"internally linking to images.jsp?doc=178006280090, which does not exist" +_site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html,81,"internally linking to images.jsp?doc=178006280090, which does not exist" +_site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html,81,"internally linking to images.jsp?doc=178006280089, which does not exist" _site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html,81,"internally linking to images.jsp?doc=178006280089, which does not exist" -_site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html,81,"internally linking to browse.jsp?div=t17800628-32, which does not exist" _site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html,81,"internally linking to images.jsp?doc=178006280088, which does not exist" -_site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html,81,"internally linking to browse.jsp?div=t17800628-34, which does not exist" _site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html,81,"internally linking to images.jsp?doc=178006280088, which does not exist" +_site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html,81,"internally linking to images.jsp?doc=178006280087, which does not exist" +_site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html,81,"internally linking to images.jsp?doc=178006280087, which does not exist" +_site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html,81,"internally linking to static/Verdicts.jsp#guilty, which does not exist" _site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html,81,"internally linking to images.jsp?doc=178006280084, which does not exist" _site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html,81,"internally linking to images.jsp?doc=178006280084, which does not exist" -_site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html,81,"internally linking to images.jsp?doc=178006280087, which does not exist" +_site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html,81,"internally linking to browse.jsp?div=t17800628-34, which does not exist" _site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html,81,"internally linking to static/Crimes.jsp#breakingpeace, which does not exist" -_site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html,81,"internally linking to images.jsp?doc=178006280090, which does not exist" +_site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html,81,"internally linking to browse.jsp?div=t17800628-32, which does not exist" _site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html,81,"internally linking to static/Crimes.jsp#riot, which does not exist" -_site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html,81,"internally linking to images.jsp?doc=178006280090, which does not exist" _site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html,83,"internally linking to browse.jsp?foo=bar&path=sessionsPapers/17800628.xml&div=t17800628-33&xml=yes, which does not exist" _site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html,97,"internally linking to forms/formMain.jsp, which does not exist" _site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html,100,"internally linking to browse.jsp?dir=sessionsPapers, which does not exist" @@ -6323,6 +4277,7 @@ _site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html,193,"internally _site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html,196,"internally linking to static/Site-map.jsp, which does not exist" _site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html,197,"internally linking to static/Legal-info.jsp, which does not exist" _site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html,20,"internally linking to css/screen.css, which does not exist" +_site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html,21,"internally linking to a.css, which does not exist" _site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html,25,"internally linking to css/print.css, which does not exist" _site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html,61,"internally linking to index.jsp, which does not exist" _site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html,62,"internally linking to forms/formMain.jsp, which does not exist" @@ -6330,22 +4285,22 @@ _site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html,63,"inte _site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html,64,"internally linking to static/History.jsp, which does not exist" _site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html,66,"internally linking to static/Project.jsp, which does not exist" _site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html,68,"internally linking to static/Contact.jsp, which does not exist" +_site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html,81,"internally linking to images.jsp?doc=178006280090, which does not exist" _site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html,81,"internally linking to browse.jsp?div=t17800628-34, which does not exist" -_site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html,81,"internally linking to browse.jsp?div=t17800628-32, which does not exist" -_site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html,81,"internally linking to images.jsp?doc=178006280088, which does not exist" -_site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html,81,"internally linking to images.jsp?doc=178006280088, which does not exist" -_site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html,81,"internally linking to static/Punishment.jsp#death, which does not exist" -_site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html,81,"internally linking to static/Verdicts.jsp#guilty, which does not exist" -_site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html,81,"internally linking to images.jsp?doc=178006280089, which does not exist" -_site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html,81,"internally linking to images.jsp?doc=178006280089, which does not exist" _site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html,81,"internally linking to static/Crimes.jsp#riot, which does not exist" +_site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html,81,"internally linking to static/Punishment.jsp#death, which does not exist" _site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html,81,"internally linking to static/Crimes.jsp#breakingpeace, which does not exist" -_site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html,81,"internally linking to images.jsp?doc=178006280090, which does not exist" -_site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html,81,"internally linking to images.jsp?doc=178006280090, which does not exist" +_site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html,81,"internally linking to browse.jsp?div=t17800628-32, which does not exist" +_site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html,81,"internally linking to static/Verdicts.jsp#guilty, which does not exist" _site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html,81,"internally linking to images.jsp?doc=178006280084, which does not exist" _site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html,81,"internally linking to images.jsp?doc=178006280084, which does not exist" _site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html,81,"internally linking to images.jsp?doc=178006280087, which does not exist" _site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html,81,"internally linking to images.jsp?doc=178006280087, which does not exist" +_site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html,81,"internally linking to images.jsp?doc=178006280088, which does not exist" +_site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html,81,"internally linking to images.jsp?doc=178006280088, which does not exist" +_site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html,81,"internally linking to images.jsp?doc=178006280089, which does not exist" +_site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html,81,"internally linking to images.jsp?doc=178006280089, which does not exist" +_site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html,81,"internally linking to images.jsp?doc=178006280090, which does not exist" _site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html,83,"internally linking to browse.jsp?foo=bar&path=sessionsPapers/17800628.xml&div=t17800628-33&xml=yes, which does not exist" _site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html,97,"internally linking to forms/formMain.jsp, which does not exist" _site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html,100,"internally linking to browse.jsp?dir=sessionsPapers, which does not exist" @@ -6448,3 +4403,4 @@ _site/pt/licoes/som-dados-sonificacao-historiadores.html,593,"internally linking _site/pt/licoes/transcricao-automatica-grafias-nao-latinas.html,708,"internally linking to #definicao-de-necessidades; the file exists, but the hash 'definicao-de-necessidades' does not" _site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html,26,internal script reference a.js does not exist _site/assets/interactive-text-games-using-twine/First Day in the Office.html,14,script is empty and has no src attribute +_site/assets/normaliser-donnees-textuelles-python/obo-t17800628-33.html,26,internal script reference a.js does not exist diff --git a/pt/apoiadores.md b/pt/apoiadores.md index 71e9492d54..da2ac6684e 100644 --- a/pt/apoiadores.md +++ b/pt/apoiadores.md @@ -31,7 +31,7 @@ Contribuintes para o nosso [Programa de Parceria Institucional](/pt/ppi): - [Cambridge Digital Humanities](https://www.cdh.cam.ac.uk/), Reino Unido - [Georg-August-Universität Göttingen](https://www.uni-goettingen.de/), Alemanhã - [MIT Libraries](https://libraries.mit.edu/), Estados Unidos -- [Center for Digital Research in the Humanities, University of Nebraska-Lincoln](http://cdrh.unl.edu/), Estados Unidos +- [Center for Digital Research in the Humanities, University of Nebraska-Lincoln](https://cdrh.unl.edu/), Estados Unidos - [The National Archives](https://www.nationalarchives.gov.uk/), Reino Unido - [College of the Liberal Arts, Penn State University](https://la.psu.edu/), Estados Unidos - [Purdue University](https://www.purdue.edu/), Estados Unidos diff --git a/pt/contribua.md b/pt/contribua.md index f6ef9ce59a..37ea190ce8 100755 --- a/pt/contribua.md +++ b/pt/contribua.md @@ -48,9 +48,9 @@ Estamos especialmente gratos por dicas sobre lições que apresentam links quebr Este projeto é a nossa tentativa de demonstrar que a publicação acadêmica pode e deve ser de acesso aberto. Por favor, ajude-nos a divulgar essa mensagem e a proporcionar o maior acesso possível a este recurso, solicitando ao bibliotecário que inclua o projeto no catálogo da sua biblioteca. -O _Programming Historian_ está listado no WorldCat (em [português](https://search.worldcat.org/title/1332987197), [inglês](http://www.worldcat.org/title/programming-historian/oclc/951537099), [espanhol](https://www.worldcat.org/title/programming-historian-en-espanol/oclc/1061292935&referer=brief_results), e [francês](https://uva.worldcat.org/title/programming-historian-en-franais/oclc/1104391842)). +O _Programming Historian_ está listado no WorldCat (em [português](https://search.worldcat.org/title/1332987197), [inglês](https://www.worldcat.org/title/programming-historian/oclc/951537099), [espanhol](https://www.worldcat.org/title/programming-historian-en-espanol/oclc/1061292935&referer=brief_results), e [francês](https://uva.worldcat.org/title/programming-historian-en-franais/oclc/1104391842)). -Os nossos agradecimentos à [Biblioteca da Universidade de Purdue](http://purdue-primo-prod.hosted.exlibrisgroup.com/primo_library/libweb/action/dlDisplay.do?vid=PURDUE&search_scope=everything&docId=PURDUE_ALMA51671812890001081&fn=permalink), à Amanda Visconti e à Universidade da Virgínia. +Os nossos agradecimentos à [Biblioteca da Universidade de Purdue](https://purdue-primo-prod.hosted.exlibrisgroup.com/primo_library/libweb/action/dlDisplay.do?vid=PURDUE&search_scope=everything&docId=PURDUE_ALMA51671812890001081&fn=permalink), à Amanda Visconti e à Universidade da Virgínia. A versão em Inglês do projeto está indexada no [Directory of Open Access Journals](https://doaj.org/toc/2397-2068). diff --git a/pt/directrizes-editor.md b/pt/directrizes-editor.md index d866c9672f..50aa33d577 100755 --- a/pt/directrizes-editor.md +++ b/pt/directrizes-editor.md @@ -293,7 +293,7 @@ Aqui estão alguns locais para procurar imagens para a lição: - [British Library](https://www.flickr.com/photos/britishlibrary) - [Internet Archive Book Images](https://archive.org/details/bookimages) - [Virtual Manuscript Library of Switzerland](https://www.flickr.com/photos/e-codices) - - [Library of Congress Maps](http://www.loc.gov/maps/collections) + - [Library of Congress Maps](https://www.loc.gov/maps/collections) É preciso verificar se a imagem corresponde ao estilo das anteriores (deve ser uma imagem de livro, não uma fotografia), ter pelo menos 200 pixels em ambas as dimensões e não ter restrições de direitos de autor. A imagem não pode ser ofensiva e deve seguir o nosso [compromisso com a diversidade (em inglês)](/posts/PH-commitment-to-diversity). Convém encontrar algo que não perpetue estereótipos ou tenha uma mensagem subliminar de machismo ou superioridade branca. diff --git a/pt/directrizes-tradutor.md b/pt/directrizes-tradutor.md index 933e88830f..a4d41ee844 100644 --- a/pt/directrizes-tradutor.md +++ b/pt/directrizes-tradutor.md @@ -32,7 +32,7 @@ Todas as nossas lições também devem ser escritas em Markdown e seguir as noss ## Submeter uma lição traduzida Depois do ficheiro de tradução ter as especificações acima mencionadas, estará pronto a ser enviado para revisão por pares. -Temos uma página do [_Programming Historian em português_ no GitHub](https://github.com/programminghistorian), onde mantemos dois repositórios (um repositório é um local para armazenar ficheiros e pastas relacionados, ou seja, um tipo de pasta). Um deles, chamado [jekyll](https://github.com/programminghistorian/jekyll), hospeda o código da versão online do site disponível em http://programminghistorian.org. O outro repositório é chamado [ph-submissions](https://github.com/programminghistorian/ph-submissions). +Temos uma página do [_Programming Historian em português_ no GitHub](https://github.com/programminghistorian), onde mantemos dois repositórios (um repositório é um local para armazenar ficheiros e pastas relacionados, ou seja, um tipo de pasta). Um deles, chamado [jekyll](https://github.com/programminghistorian/jekyll), hospeda o código da versão online do site disponível em https://programminghistorian.org. O outro repositório é chamado [ph-submissions](https://github.com/programminghistorian/ph-submissions). A melhor maneira para enviar uma tradução é adicioná-la diretamente ao repositório [ph-submissions](https://github.com/programminghistorian/ph-submissions). Graças aos recursos do GitHub, pode fazer isso usando ações de arrastar e soltar, com as quais provavelmente já está familiarizado. Para os novos tradutores, estas são as etapas: diff --git a/pt/doacoes.md b/pt/doacoes.md index c02fae4f11..6316d0e38f 100644 --- a/pt/doacoes.md +++ b/pt/doacoes.md @@ -19,7 +19,7 @@ O seu suporte contribui diretamente para manter a infraestrutura de divulgação
    - + diff --git a/pt/licoes/HTML-lista-palavras-1.md b/pt/licoes/HTML-lista-palavras-1.md index 107882f503..cec26178fd 100644 --- a/pt/licoes/HTML-lista-palavras-1.md +++ b/pt/licoes/HTML-lista-palavras-1.md @@ -1,152 +1,152 @@ ---- -title: De HTML para Lista de Palavras (parte 1) -layout: lesson -collection: lessons -slug: HTML-lista-palavras-1 -date: 2012-07-17 -translation_date: 2022-10-27 -authors: -- William J. Turkel -- Adam Crymble -reviewers: -- Jim Clifford -- Frederik Elwert -editors: -- Miriam Posner -translator: -- Felipe Lamarca -translation-editor: -- Jimmy Medeiros -translation-reviewer: -- Daniel Bonatto Seco -- Diana Rebelo Rodriguez -difficulty: 2 -review-ticket: https://github.com/programminghistorian/ph-submissions/issues/442 -next: HTML-lista-palavras-1 -series_total: 2 lessons -sequence: 1 -activity: transforming -topics: [python] -abstract: "Nesta lição de duas partes, aprofundaremos o que aprendeu sobre o Download de Páginas Web com Python, aprendendo como remover a marcação HTML de uma página web da transcrição do julgamento criminal de Benjamin Bowsey em 1780. Faremos isso usando uma variedade de operadores de string, métodos de string e habilidades de leitura atenta. Introduziremos looping e branching de modo que os programas possam repetir tarefas e testar certas condições, tornando possível a separação do conteúdo das tags HTML. Finalmente, faremos a conversão do conteúdo de uma string longa para uma lista de palavras, que podem ser ordenadas, indexadas e contabilizadas posteriormente." -original: from-html-to-list-of-words-1 -avatar_alt: Uma girafa a ser imitada por um humano -doi: 10.46430/phpt0027 ---- - -{% include toc.html %} - -
    -O site do Old Bailey Online foi recentemente atualizado. Infelizmente, devido às diversas mudanças, muitos (se não todos) os elementos do site de exemplo usado nesta lição não funcionarão conforme descrito. No entanto, as metodologias ensinadas por esta lição permanecem relevantes e podem ser adaptadas pelos leitores para um site de exemplo diferente. Estamos trabalhando na adaptação da lição para o novo site do Old Bailey Online, mas ainda não temos cronograma preciso de quando a lição será atualizada. [Abril de 2024] -
    - -## Objetivos da lição - -Nesta lição de duas partes, aprofundaremos o que aprendeu sobre o [Download de Páginas Web com Python](/pt/licoes/download-paginas-web-python), aprendendo como remover a *marcação HTML* de uma página web da [transcrição do julgamento criminal de Benjamin Bowsey em 1780](https://perma.cc/8LM6-W39K). Faremos isso usando uma variedade de *operadores de string*, *métodos de string* e habilidades de leitura atenta. Introduziremos *looping* e *branching* de modo que os programas possam repetir tarefas e testar certas condições, tornando possível a separação do conteúdo das tags HTML. Finalmente, faremos a conversão do conteúdo de uma string longa para uma *lista de palavras*, que podem ser ordenadas, indexadas e contabilizadas posteriormente. - -## O Desafio - -Para ter uma ideia mais clara da tarefa que temos pela frente, abra o ficheiro *obo-t17800628-33.html* que criou em [Download de Páginas Web com Python](/pt/licoes/download-paginas-web-python) (ou faça o [download e guarde a transcrição do julgamento](/assets/from-html-to-list-of-words-1/obo-t17800628-33.html) caso ainda não tenha uma cópia) e depois verifique o código-fonte do HTML clicando em *Ferramentas -> Ferramentas do Navegador -> Fonte da página* (para usuários do navegador Firefox). À medida que for olhando o código-fonte, notará que há tags HTML misturadas com texto. Caso não tenha experiência com HTML, recomendamos que faça o tutorial do W3 Schools [HTML](http://www.w3schools.com/html/) para se familiarizar com a marcação HTML. Se o seu trabalho frequentemente requer que remova a marcação HTML, certamente será útil entendê-la ao visualizá-la. - -## Ficheiros Necessários para esta Lição - -- *[obo-t17800628-33.html](/assets/from-html-to-list-of-words-1/obo-t17800628-33.html)* - -## Idealizando um Algoritmo - -Uma vez que o objetivo é nos livrarmos do HTML, o primeiro passo é criar um algoritmo que retorna apenas o texto (removendo as tags HTML) do artigo. Um algoritmo é um procedimento suficientemente detalhado a ponto de poder ser implementado em um computador. Facilita escrever o seu algoritmo no português direto; é uma ótima maneira de delinear exatamente o que deseja fazer antes de mergulhar no código. Para construir esse algoritmo, utilizaremos as nossas habilidades de leitura atenta para descobrir um modo de capturar apenas o conteúdo textual da biografia. - -Ao verificar o código-fonte do *obo-t17800628-33.html*, notará que a transcrição real não começa imediatamente. Na verdade, há um número de tags HTML e algumas informações de citação. Nesse caso, o conteúdo não começa antes da linha 81! - -``` xml -

    324. BENJAMIN BOWSEY (a blackmoor ) was indicted for that he together with five hundred other persons and more, did, unlawfully, riotously, and tumultuously assemble on the 6th of June -``` - -Estamos interessados apenas na transcrição em si e não nos metadados extras contidos nas tags. No entanto, irá notar que o final dos metadados corresponde ao início da transcrição. Isso torna a localização dos metadados uma marcação potencialmente útil para isolar o texto transcrito. - -À primeira vista, percebemos que a transcrição do julgamento em si começa com uma tag HTML: `

    `, que significa 'parágrafo'. Essa é coincidentemente a primeira tag de parágrafo no documento. Podemos usar isso para encontrar o ponto de partida do nosso texto transcrito. Temos sorte nesse caso porque essa tag é uma maneira confiável de encontrar o início do texto transcrito no julgamento (caso deseje, dê uma olhada em alguns outros julgamentos para verificar). - -O texto do julgamento termina na linha 82 com outra tag HTML: `
    `, que significa uma quebra de linha. Essa é a última quebra de linha no documento. Essas duas tags (tag de primeiro parágrafo e última quebra de linha), portanto, nos oferecem uma forma de isolar o texto desejado. Sites bem formatados quase sempre terão uma forma única de sinalizar o fim de um conteúdo. Você frequentemente só precisa verificar de forma atenta. - -A próxima tarefa é remover toda a marcação HTML que permanece mesclada ao conteúdo. Como sabe que tags HTML são sempre encontradas em pares correspondentes de parênteses angulares, é provavelmente uma aposta segura o fato de que, se remover tudo o que estiver entre parênteses angulares, todo o HTML será removido e restará somente a transcrição. Note que estamos assumindo que a transcrição não possuirá os símbolos matemáticos de "menor que" ou "maior que". Se Bowsey fosse um matemático, essa suposição não seria tão segura. - -A seguir, descreve-se o algoritmo em palavras. - -Para isolar o conteúdo: - -- Fazer o download do texto transcrito -- Buscar no HTML e guardar a localização da primeira tag `

    ` -- Buscar no HTML e guardar a localização da última tag `
    ` -- Armazenar tudo que vier após a tag `

    ` e antes da tag `
    ` numa string: *pageContents* - -Neste ponto, temos o texto da transcrição do julgamento, além da marcação HTML. Em seguida: - -- Verificar cada caractere na string *pageContents*, um por um -- Se o caractere for um colchete angular esquerdo (\<), estamos dentro de uma tag e deve-se ignorar os caracteres subsequentes -- Se o caractere for um colchete angular direito (\>), estamos deixando a tag; deve-se ignorar este caractere, mas verificar cada um dos caracteres subsequentes -- Se não estivermos dentro de uma tag, adiciona-se cada caractere a uma nova variável: *text* - -Finalmente: - -- Separar a string de texto em uma lista de palavras individuais, que podem ser manipuladas posteriormente. - -## Isolar o Conteúdo Desejado - -Os próximos passos utilizam os comandos de Python introduzidos na lição [Manipular strings com Python](/pt/licoes/manipular-strings-python) para implementar a primeira metade do algoritmo: remover todo o conteúdo antes da tag `

    ` e depois da tag `
    `. Para recapitular, o algoritmo era o seguinte: - -- Fazer o download do texto transcrito -- Buscar no HTML e guardar a localização da primeira tag `

    ` -- Buscar no HTML e guardar a localização da última tag `
    ` -- Armazenar tudo que vier após a tag `

    ` e antes da tag `
    ` numa string: *pageContents* - -Para fazer isso, você utilizará o método de string 'find', o método .rfind() (que encontra a última correspondência de algo) e criará uma nova substring contendo apenas o conteúdo desejado entre essas posições de índice. - -Enquanto trabalha, desenvolverá ficheiros separados para armazenar o seu código. Um deles será chamado `obo.py` (para "Old Bailey Online"). Esse ficheiro conterá todo o código que deseja reutilizar; em outras palavras, `obo.py` é um módulo. Discutimos a ideia de módulo em [Reutilização de código e modularidade em Python](/pt/licoes/reutilizacao-codigo-modularidade-python), quando salvamos nossas funções em `cumprimento.py`. - -Crie um novo ficheiro chamado `obo.py` e armazene-o no seu diretório *programming-historian*. Utilizaremos esse ficheiro para manter cópias das funções necessárias para processar o The Old Bailey Online. Digite ou copie o código a seguir no seu ficheiro: - -``` python -# obo.py - -def stripTags(pageContents): - pageContents = str(pageContents) - startLoc = pageContents.find("

    ") - endLoc = pageContents.rfind("
    ") - - pageContents = pageContents[startLoc:endLoc] - return pageContents -``` - -Crie um segundo ficheiro, `trial-content.py`, e salve o programa mostrado abaixo: - - -``` python -# trial-content.py - -import urllib.request, urllib.error, urllib.parse, obo - -url = 'http://www.oldbaileyonline.org/browse.jsp?id=t17800628-33&div=t17800628-33' - -response = urllib.request.urlopen(url) -HTML = response.read().decode('UTF-8') - -print((obo.stripTags(HTML))) -``` - -Quando executar o `trial-content.py`, ele acessará a página web da transcrição do julgamento de Bowsey e depois verificará o módulo `obo.py` para buscar a função *stripTags*. Ele utilizará essa função para extrair tudo após a primeira tag `

    ` e antes da última tag `
    `. Com alguma sorte, esse deve ser o conteúdo textual da transcrição de Bowsey, além de alguma marcação HTML. Não se preocupe se a sua tela de Saída de Comando terminar em uma linha preta grossa. A tela de saída do Komodo Edit possui um número máximo de caracteres para exibição, após o qual os caracteres começarão a literalmente escrever uns sobre os outros na tela, dando a aparência de uma linha preta. Não se preocupe: o texto está lá, ainda que não consiga vê-lo; pode cortá-lo e colá-lo em um ficheiro de texto para verificar. - -Vamos reservar um momento para ter certeza de que entendemos como `trial-contents.py` é capaz de usar as funções armazenadas em `obo.py`. A função *stripTags* que salvamos em `obo.py` requer um argumento. Em outras palavras, para que seja executada apropriadamente ela precisa que uma informação seja oferecida. Lembre-se do exemplo do cão treinado na lição anterior. Para latir, o cachorro precisa de duas coisas: ar e uma guloseima deliciosa. A função *stripTags* em `obo.py` precisa de uma coisa: a string chamada *pageContents*. Mas você perceberá que, quando chamamos *stripTags* no programa final (`trial-contents.py`), não há menção ao "*pageContents*". Em vez disso, a função recebe HTML como um argumento. Isso pode ser confuso para muitas pessoas quando começam a programar. Uma vez que uma função foi declarada, não precisamos usar o mesmo nome de variável quando chamamos a função. Desde que forneçamos o mesmo tipo de argumento, tudo deve funcionar bem, independente de como o chamarmos. Nesse caso, queríamos que *pageContents* usasse o conteúdo da nossa variável HTML. Você poderia ter passado qualquer string, inclusive uma que você insira diretamente entre aspas. Tente executar novamente `trial-content.py`, alterando o argumento de *stripTags* para "Eu gosto muito de cachorros" e veja o que acontece. Note que, dependendo de como defina a sua função (e o que ela faz), o seu argumento pode precisar ser algo que não seja uma string: um número inteiro (*integer*), por exemplo. - -Leituras sugeridas ------------------ - -- Lutz, *Learning Python* - - Ch. 7: Strings - - Ch. 8: Lists and Dictionaries - - Ch. 10: Introducing Python Statements - - Ch. 15: Function Basics - -## Sincronização de Código - -Para acompanhar lições futuras, é importante ter os ficheiros e programas corretos no seu diretório “programming-historian”. No final de cada lição, é possível fazer o download do ficheiro zip “programming-historian” para ter a certeza de que o ficheiro correto está a ser utilizado. Observe que removemos os ficheiros desnecessários das lições anteriores. Seu diretório pode conter mais ficheiros e não há problema! - -- programming-historian-2 ([zip](/assets/python-lessons2.zip)) +--- +title: De HTML para Lista de Palavras (parte 1) +layout: lesson +collection: lessons +slug: HTML-lista-palavras-1 +date: 2012-07-17 +translation_date: 2022-10-27 +authors: +- William J. Turkel +- Adam Crymble +reviewers: +- Jim Clifford +- Frederik Elwert +editors: +- Miriam Posner +translator: +- Felipe Lamarca +translation-editor: +- Jimmy Medeiros +translation-reviewer: +- Daniel Bonatto Seco +- Diana Rebelo Rodriguez +difficulty: 2 +review-ticket: https://github.com/programminghistorian/ph-submissions/issues/442 +next: HTML-lista-palavras-1 +series_total: 2 lessons +sequence: 1 +activity: transforming +topics: [python] +abstract: "Nesta lição de duas partes, aprofundaremos o que aprendeu sobre o Download de Páginas Web com Python, aprendendo como remover a marcação HTML de uma página web da transcrição do julgamento criminal de Benjamin Bowsey em 1780. Faremos isso usando uma variedade de operadores de string, métodos de string e habilidades de leitura atenta. Introduziremos looping e branching de modo que os programas possam repetir tarefas e testar certas condições, tornando possível a separação do conteúdo das tags HTML. Finalmente, faremos a conversão do conteúdo de uma string longa para uma lista de palavras, que podem ser ordenadas, indexadas e contabilizadas posteriormente." +original: from-html-to-list-of-words-1 +avatar_alt: Uma girafa a ser imitada por um humano +doi: 10.46430/phpt0027 +--- + +{% include toc.html %} + +

    +O site do Old Bailey Online foi recentemente atualizado. Infelizmente, devido às diversas mudanças, muitos (se não todos) os elementos do site de exemplo usado nesta lição não funcionarão conforme descrito. No entanto, as metodologias ensinadas por esta lição permanecem relevantes e podem ser adaptadas pelos leitores para um site de exemplo diferente. Estamos trabalhando na adaptação da lição para o novo site do Old Bailey Online, mas ainda não temos cronograma preciso de quando a lição será atualizada. [Abril de 2024] +
    + +## Objetivos da lição + +Nesta lição de duas partes, aprofundaremos o que aprendeu sobre o [Download de Páginas Web com Python](/pt/licoes/download-paginas-web-python), aprendendo como remover a *marcação HTML* de uma página web da [transcrição do julgamento criminal de Benjamin Bowsey em 1780](https://perma.cc/8LM6-W39K). Faremos isso usando uma variedade de *operadores de string*, *métodos de string* e habilidades de leitura atenta. Introduziremos *looping* e *branching* de modo que os programas possam repetir tarefas e testar certas condições, tornando possível a separação do conteúdo das tags HTML. Finalmente, faremos a conversão do conteúdo de uma string longa para uma *lista de palavras*, que podem ser ordenadas, indexadas e contabilizadas posteriormente. + +## O Desafio + +Para ter uma ideia mais clara da tarefa que temos pela frente, abra o ficheiro *obo-t17800628-33.html* que criou em [Download de Páginas Web com Python](/pt/licoes/download-paginas-web-python) (ou faça o [download e guarde a transcrição do julgamento](/assets/from-html-to-list-of-words-1/obo-t17800628-33.html) caso ainda não tenha uma cópia) e depois verifique o código-fonte do HTML clicando em *Ferramentas -> Ferramentas do Navegador -> Fonte da página* (para usuários do navegador Firefox). À medida que for olhando o código-fonte, notará que há tags HTML misturadas com texto. Caso não tenha experiência com HTML, recomendamos que faça o tutorial do W3 Schools [HTML](https://www.w3schools.com/html/) para se familiarizar com a marcação HTML. Se o seu trabalho frequentemente requer que remova a marcação HTML, certamente será útil entendê-la ao visualizá-la. + +## Ficheiros Necessários para esta Lição + +- *[obo-t17800628-33.html](/assets/from-html-to-list-of-words-1/obo-t17800628-33.html)* + +## Idealizando um Algoritmo + +Uma vez que o objetivo é nos livrarmos do HTML, o primeiro passo é criar um algoritmo que retorna apenas o texto (removendo as tags HTML) do artigo. Um algoritmo é um procedimento suficientemente detalhado a ponto de poder ser implementado em um computador. Facilita escrever o seu algoritmo no português direto; é uma ótima maneira de delinear exatamente o que deseja fazer antes de mergulhar no código. Para construir esse algoritmo, utilizaremos as nossas habilidades de leitura atenta para descobrir um modo de capturar apenas o conteúdo textual da biografia. + +Ao verificar o código-fonte do *obo-t17800628-33.html*, notará que a transcrição real não começa imediatamente. Na verdade, há um número de tags HTML e algumas informações de citação. Nesse caso, o conteúdo não começa antes da linha 81! + +``` xml +

    324. BENJAMIN BOWSEY (a blackmoor ) was indicted for that he together with five hundred other persons and more, did, unlawfully, riotously, and tumultuously assemble on the 6th of June +``` + +Estamos interessados apenas na transcrição em si e não nos metadados extras contidos nas tags. No entanto, irá notar que o final dos metadados corresponde ao início da transcrição. Isso torna a localização dos metadados uma marcação potencialmente útil para isolar o texto transcrito. + +À primeira vista, percebemos que a transcrição do julgamento em si começa com uma tag HTML: `

    `, que significa 'parágrafo'. Essa é coincidentemente a primeira tag de parágrafo no documento. Podemos usar isso para encontrar o ponto de partida do nosso texto transcrito. Temos sorte nesse caso porque essa tag é uma maneira confiável de encontrar o início do texto transcrito no julgamento (caso deseje, dê uma olhada em alguns outros julgamentos para verificar). + +O texto do julgamento termina na linha 82 com outra tag HTML: `
    `, que significa uma quebra de linha. Essa é a última quebra de linha no documento. Essas duas tags (tag de primeiro parágrafo e última quebra de linha), portanto, nos oferecem uma forma de isolar o texto desejado. Sites bem formatados quase sempre terão uma forma única de sinalizar o fim de um conteúdo. Você frequentemente só precisa verificar de forma atenta. + +A próxima tarefa é remover toda a marcação HTML que permanece mesclada ao conteúdo. Como sabe que tags HTML são sempre encontradas em pares correspondentes de parênteses angulares, é provavelmente uma aposta segura o fato de que, se remover tudo o que estiver entre parênteses angulares, todo o HTML será removido e restará somente a transcrição. Note que estamos assumindo que a transcrição não possuirá os símbolos matemáticos de "menor que" ou "maior que". Se Bowsey fosse um matemático, essa suposição não seria tão segura. + +A seguir, descreve-se o algoritmo em palavras. + +Para isolar o conteúdo: + +- Fazer o download do texto transcrito +- Buscar no HTML e guardar a localização da primeira tag `

    ` +- Buscar no HTML e guardar a localização da última tag `
    ` +- Armazenar tudo que vier após a tag `

    ` e antes da tag `
    ` numa string: *pageContents* + +Neste ponto, temos o texto da transcrição do julgamento, além da marcação HTML. Em seguida: + +- Verificar cada caractere na string *pageContents*, um por um +- Se o caractere for um colchete angular esquerdo (\<), estamos dentro de uma tag e deve-se ignorar os caracteres subsequentes +- Se o caractere for um colchete angular direito (\>), estamos deixando a tag; deve-se ignorar este caractere, mas verificar cada um dos caracteres subsequentes +- Se não estivermos dentro de uma tag, adiciona-se cada caractere a uma nova variável: *text* + +Finalmente: + +- Separar a string de texto em uma lista de palavras individuais, que podem ser manipuladas posteriormente. + +## Isolar o Conteúdo Desejado + +Os próximos passos utilizam os comandos de Python introduzidos na lição [Manipular strings com Python](/pt/licoes/manipular-strings-python) para implementar a primeira metade do algoritmo: remover todo o conteúdo antes da tag `

    ` e depois da tag `
    `. Para recapitular, o algoritmo era o seguinte: + +- Fazer o download do texto transcrito +- Buscar no HTML e guardar a localização da primeira tag `

    ` +- Buscar no HTML e guardar a localização da última tag `
    ` +- Armazenar tudo que vier após a tag `

    ` e antes da tag `
    ` numa string: *pageContents* + +Para fazer isso, você utilizará o método de string 'find', o método .rfind() (que encontra a última correspondência de algo) e criará uma nova substring contendo apenas o conteúdo desejado entre essas posições de índice. + +Enquanto trabalha, desenvolverá ficheiros separados para armazenar o seu código. Um deles será chamado `obo.py` (para "Old Bailey Online"). Esse ficheiro conterá todo o código que deseja reutilizar; em outras palavras, `obo.py` é um módulo. Discutimos a ideia de módulo em [Reutilização de código e modularidade em Python](/pt/licoes/reutilizacao-codigo-modularidade-python), quando salvamos nossas funções em `cumprimento.py`. + +Crie um novo ficheiro chamado `obo.py` e armazene-o no seu diretório *programming-historian*. Utilizaremos esse ficheiro para manter cópias das funções necessárias para processar o The Old Bailey Online. Digite ou copie o código a seguir no seu ficheiro: + +``` python +# obo.py + +def stripTags(pageContents): + pageContents = str(pageContents) + startLoc = pageContents.find("

    ") + endLoc = pageContents.rfind("
    ") + + pageContents = pageContents[startLoc:endLoc] + return pageContents +``` + +Crie um segundo ficheiro, `trial-content.py`, e salve o programa mostrado abaixo: + + +``` python +# trial-content.py + +import urllib.request, urllib.error, urllib.parse, obo + +url = 'http://www.oldbaileyonline.org/browse.jsp?id=t17800628-33&div=t17800628-33' + +response = urllib.request.urlopen(url) +HTML = response.read().decode('UTF-8') + +print((obo.stripTags(HTML))) +``` + +Quando executar o `trial-content.py`, ele acessará a página web da transcrição do julgamento de Bowsey e depois verificará o módulo `obo.py` para buscar a função *stripTags*. Ele utilizará essa função para extrair tudo após a primeira tag `

    ` e antes da última tag `
    `. Com alguma sorte, esse deve ser o conteúdo textual da transcrição de Bowsey, além de alguma marcação HTML. Não se preocupe se a sua tela de Saída de Comando terminar em uma linha preta grossa. A tela de saída do Komodo Edit possui um número máximo de caracteres para exibição, após o qual os caracteres começarão a literalmente escrever uns sobre os outros na tela, dando a aparência de uma linha preta. Não se preocupe: o texto está lá, ainda que não consiga vê-lo; pode cortá-lo e colá-lo em um ficheiro de texto para verificar. + +Vamos reservar um momento para ter certeza de que entendemos como `trial-contents.py` é capaz de usar as funções armazenadas em `obo.py`. A função *stripTags* que salvamos em `obo.py` requer um argumento. Em outras palavras, para que seja executada apropriadamente ela precisa que uma informação seja oferecida. Lembre-se do exemplo do cão treinado na lição anterior. Para latir, o cachorro precisa de duas coisas: ar e uma guloseima deliciosa. A função *stripTags* em `obo.py` precisa de uma coisa: a string chamada *pageContents*. Mas você perceberá que, quando chamamos *stripTags* no programa final (`trial-contents.py`), não há menção ao "*pageContents*". Em vez disso, a função recebe HTML como um argumento. Isso pode ser confuso para muitas pessoas quando começam a programar. Uma vez que uma função foi declarada, não precisamos usar o mesmo nome de variável quando chamamos a função. Desde que forneçamos o mesmo tipo de argumento, tudo deve funcionar bem, independente de como o chamarmos. Nesse caso, queríamos que *pageContents* usasse o conteúdo da nossa variável HTML. Você poderia ter passado qualquer string, inclusive uma que você insira diretamente entre aspas. Tente executar novamente `trial-content.py`, alterando o argumento de *stripTags* para "Eu gosto muito de cachorros" e veja o que acontece. Note que, dependendo de como defina a sua função (e o que ela faz), o seu argumento pode precisar ser algo que não seja uma string: um número inteiro (*integer*), por exemplo. + +Leituras sugeridas +----------------- + +- Lutz, *Learning Python* + - Ch. 7: Strings + - Ch. 8: Lists and Dictionaries + - Ch. 10: Introducing Python Statements + - Ch. 15: Function Basics + +## Sincronização de Código + +Para acompanhar lições futuras, é importante ter os ficheiros e programas corretos no seu diretório “programming-historian”. No final de cada lição, é possível fazer o download do ficheiro zip “programming-historian” para ter a certeza de que o ficheiro correto está a ser utilizado. Observe que removemos os ficheiros desnecessários das lições anteriores. Seu diretório pode conter mais ficheiros e não há problema! + +- programming-historian-2 ([zip](/assets/python-lessons2.zip)) diff --git a/pt/licoes/analise-correspondencia-pesquisa-historica-R.md b/pt/licoes/analise-correspondencia-pesquisa-historica-R.md index 44c8b6450f..db81e9ee37 100644 --- a/pt/licoes/analise-correspondencia-pesquisa-historica-R.md +++ b/pt/licoes/analise-correspondencia-pesquisa-historica-R.md @@ -1,474 +1,474 @@ ---- -title: "Análise de Correspondência para Pesquisa Histórica com R" -slug: analise-correspondencia-pesquisa-historica-R -original: correspondence-analysis-in-R -layout: lesson -collection: lessons -date: 2017-09-13 -translation_date: 2023-05-23 -authors: -- Ryan Deschamps -reviewers: -- Sandra van Ginhoven -- Taylor Arnold -editors: -- Matthew Lincoln -translator: -- Diana Rodriguez -translation-editor: -- Jimmy Medeiros -translation-reviewer: -- Yuri Pires -- André Salvo -review-ticket: https://github.com/programminghistorian/ph-submissions/issues/422 -difficulty: 3 -activity: analyzing -topics: [data-manipulation, network-analysis, r, data-visualization] -abstract: Esta lição explica como realizar e interpretar uma análise de correspondência com R, que pode ser usada para identificar relacionamentos dentro de dados categóricos. -avatar_alt: Diagrama de um cubo com arestas legendadas -mathjax: true -doi: 10.46430/phpt0042 ---- - -{% include toc.html %} - -A análise de correspondência (*correspondence analysis* ou CA) produz um gráfico bidimensional ou tridimensional baseado nas relações entre duas ou mais categorias de dados. Essas categorias poderiam ser "membros e clubes", "palavras e livros" ou "países e acordos comerciais". Por exemplo, um membro do clube pode ser equivalente a outro membro com base nos clubes compartilhados aos quais ele pertence. Os membros que frequentam os mesmos clubes provavelmente têm mais em comum do que aqueles que frequentam clubes diferentes. Da mesma forma, os clubes que compartilham membros provavelmente terão mais em comum do que aqueles que compartilham membros diferentes.[^1] - -Discernir essas correspondências significativas pode ser muito difícil de fazer quando há muitos elementos em cada uma de suas categorias (por exemplo, se tivermos centenas de membros espalhados por dezenas de clubes.) A CA mede as correspondências mais fortes em um *dataset* e as projeta em um espaço multidimensional, possibilitando sua visualização e interpretação. Normalmente, as duas principais dimensões são mostradas de uma só vez, embora seja possível mostrar três dimensões em um display 3D. - -Uma vez que a CA visualiza as relações entre elementos de seus dados como distâncias em um gráfico, muitas vezes é possível descobrir padrões amplos com base em que elementos de uma categoria aparecem próximos a elementos da outra. Assim, a CA pode ser um bom primeiro passo para filtrar os principais padrões de um grande *dataset*. É uma ferramenta particularmente poderosa para entender informações históricas dentro de coleções digitais. - -Depois de ler este tutorial, deve ser possível: - -* Saber o que é a CA e para que é usada. -* Saber como executar a CA usando o pacote FactoMineR do R. -* Descrever com exatidão os resultados de uma CA. - -## Pré-requisitos - -Este tutorial é para historiadores e pesquisadores com habilidades intermédias em programação. Pressupõe que já se tem um conhecimento básico de R e alguns conhecimentos básicos de estatística. - -O tutorial [Noções básicas de R com dados tabulares](/pt/licoes/nocoes-basicas-R-dados-tabulares) tem informações sobre como organizar e configurar o R e o tutorial [Processamento Básico de Texto em R](/pt/licoes/processamento-basico-texto-r) também pode ser útil como treinamento. - -Como a CA é uma espécie de *social network analysis* (análise de redes sociais), pode ser interessante olhar a lição [From Hermeneutics to Data to Networks: Data Extraction and Network Visualization of Historical Sources](/en/lessons/creating-network-diagrams-from-historical-sources) (em inglês), que também tem algumas informações úteis sobre a estruturação de dados para análise de redes. - -## O que é a Análise de Correspondência? - -A análise de correspondência (CA), também chamada "escala multidimensional" ou "análise bivariada de rede", permite observar a inter-relação de dois grupos em um gráfico de dispersão com dois eixos (*two-way graph plot*). Por exemplo, foi utilizada pelo sociólogo francês Pierre Bourdieu para mostrar como categorias sociais como a ocupação influenciam a opinião política.[^2] É especialmente poderosa como ferramenta para encontrar padrões em grandes *datasets*. - -A CA funciona com qualquer tipo de dados categóricos (*datasets* que foram agrupados em categorias). Vamos começar com um exemplo simples. Se quisesse entender o papel dos acordos internacionais de livre comércio na interconexão das nações do G8, seria possível criar uma tabela para os países e as relações de livre comércio que eles mantinham em um determinado momento. - -Uma pequena seleção de acordos comerciais (em azul) incluindo o Espaço Económico Europeu (*European Economic Area* ou EEA), o Acordo Comercial Canadá-UE (*Canada-EU Trade Agreement* ou CETA), o Acordo de Livre Comércio Norte-Americano (*North American Free Trade Agreement* ou NAFTA), a Parceria Trans-Pacífico (*Trans Pacific Partnership* ou TPP) e a Associação das Nações do Sudeste Asiático (*Association of Southeast Asian Nations* ou ASEAN) corresponde aos países do G8. Os países (de cor vermelha) agrupam-se geograficamente, com países do Pacífico à direita, países europeus à esquerda e países da América do Norte ao centro. O Canadá e os Estados Unidos, como previsto, estão juntos. Alemanha, Itália, França e Reino Unido pertencem todos aos mesmos dois acordos (CETA e EEA), portanto todos caem exatamente no mesmo ponto. - -{% include figure.html filename="tr-pt-analise-correspondenciaR-1.png" alt="Imagem representando um gráfico de correspondência sobre acordos comerciais" caption="Figura 1. Análise de correspondência de países selecionados do G8 e seus acordos comerciais" %} - -Por outro lado, enquanto a Rússia e os Estados Unidos estão um pouco próximos no eixo horizontal, estão em polos opostos no eixo vertical. A Rússia só compartilha um acordo de comércio com um outro país (Japão) e os Estados Unidos com dois (Japão e Canadá). Em um gráfico de CA, unidades com poucas correlações ficarão nos arreadores, enquanto aquelas unidades com maior quantidade de correlações ficarão mais próximo do centro do gráfico. A conexão relativa ou falta de conexão de um *datapoint* é quantificada como *inertia* (inércia) na CA. A falta relativa de conexão produz uma inércia maior. - -Um ponto mais substancial sobre a Rússia e os Estados Unidos é que a Rússia é um país do Pacífico que não pertence à TPP. Observando esta relação, um historiador poder-se-ia perguntar se isto ocorre por causa de uma relação comercial tensa entre a Rússia e os Estados Unidos em comparação com outros países do G8, ou por atitudes gerais em relação a acordos comerciais para estes países.[^3] - -Com mais dados, a CA pode descobrir distinções mais subtis entre grupos dentro de uma categoria particular. Neste tutorial, analisaremos a vida política canadense - especificamente, como representantes políticos são organizados em comités durante um ou outro governo. Semelhante aos acordos comerciais, esperaríamos que os comités que têm membros semelhantes estivessem mais próximos uns dos outros. Além disso, os comités que têm poucos representantes em comum se encontrarão nos cantos do gráfico. - -## Comités Parlamentares Canadenses (CPCs) - -No sistema parlamentar canadense, os cidadãos elegem representantes chamados membros do Parlamento, ou deputados, para a Câmara dos Comuns. Os parlamentares são responsáveis por votar e propor alterações à legislação no Canadá. Os [Comités Parlamentares (CPCs)](https://perma.cc/3PT6-77DB) (em inglês) consistem de parlamentares que informam à Câmara sobre detalhes importantes da política em uma área temática. Exemplos de tais comités incluem os CPCs sobre Finanças, Justiça e Saúde. - -Usaremos abreviações para os comités parlamentares, porque os nomes podem ficar longos, tornando-os difíceis de ler em um gráfico. É possível usar esta tabela como um guia de referência para as abreviações e seus respectivos nomes de comités: - -| Abbreviation (Abreviação) | Committee Name (Tradução do Nome do Comité) | -| :----------- | :----------------------------------------------------------------------------------------: | -| INAN | Indigenous and Northern Affairs (Assuntos Indígenas e do Norte) | -| HUMA | Human Resources, Skills and Social Development and the Status of Persons with Disabilities (Recursos Humanos, Habilidades e Desenvolvimento Social e o Status das Pessoas com Deficiência) | -| FINA | Finance (Finanças) | -| FAAE | Foreign Affairs and International Development (Relações Exteriores e Desenvolvimento Internacional) | -| ETHI | Access to Information, Privacy and Ethics (Acesso à Informação, Privacidade e Ética) | -| ENVI | Environment and Sustainable Development (Meio Ambiente e Desenvolvimento Sustentável) | -| CHPC | Canadian Heritage (Herança Canadense) | -| CIMM | Citizenship and Immigration (Cidadania e Imigração) | -| ACVA | Veterans Affairs (Assuntos de Veteranos) | -| HESA | Health (Saúde) | -| TRAN | Transport, Infrastructure and Communities (Transporte, Infraestrutura e Comunidades) | -| FOPO | Fisheries and Oceans (Pesca e Oceanos) | -| RNNR | Natural Resources (Recursos Naturais) | -| FEWO | Status of Women (Status das Mulheres) | -| ESPE | Pay Equity (Igualdade de Remuneração) | -| IWFA | Violence against Indigenous Women (Violência Contra as Mulheres Indígenas) | -| BILI | Library of Parliament (Biblioteca do Parlamento) | -| AGRI | Agriculture and Agri-food (Agricultura e Agroalimentação) | -| JUST | Justice and Human Rights (Justiça e Direitos Humanos) | - -O autor da lição, o historiador Ryan Deschamps, suspeitava que os deputados estariam organizados de acordo com os tópicos do comité de forma diferente de governo para governo. Por exemplo, os comités formados durante o primeiro gabinete do governo conservador de Stephen Harper podem ser organizados de forma diferente do gabinete inicial do Liberal de Justin Trudeau. Há uma série de razões para esta suspeita. Primeiro, os CPCs são formados por lideranças partidárias e as decisões dos comités precisam de coordenação entre os membros da Câmara. Em outras palavras, os partidos políticos usarão os CPCs como ferramentas para marcar pontos políticos, e os governos devem garantir que as pessoas certas sejam membros dos comités certos para proteger suas agendas políticas. Em segundo lugar, os dois governos têm um enfoque político diferente. O governo conservador de Harper se concentrou mais em questões de desenvolvimento económico, enquanto os Liberais de Trudeau enfatizaram, em primeiro lugar a igualdade social. Em resumo, pode haver algumas decisões calculadas sobre quem entra em que comité, fornecendo evidências sobre as atitudes do governo em relação ou contra certos tópicos. - -## Preparando o R para a CA - -Para fazer uma CA, precisaremos de um pacote de álgebra linear. Para os mais inclinados à matemática, há um apêndice com alguns detalhes sobre como isto é feito. Em R, há várias opções para CA, mas usaremos o [pacote FactoMineR](http://factominer.free.fr/) (em inglês), focado na "análise de dados exploratórios multivariados".[^4] A FactoMineR pode ser usada para conduzir todos os tipos de análises multivariadas diferentes, incluindo *clusters* hierárquicos, análise fatorial e assim por diante. - -Mas, primeiro, aqui está como instalar e puxar os pacotes, depois colocá-los em um objeto R para que possam ser discutidos. - -```R - -## Estes comandos só precisam ser feitos na primeira vez que se realiza uma análise. -## FactoMineR é um pacote bastante grande, portanto pode levar algum tempo para ser carregado. - -install.packages("FactoMineR") # Inclui um módulo para a condução de CA. -install.packages("factoextra") # Pacote para embelezar os nossos gráficos de CA. - -# Importar os pacotes: -library(FactoMineR) -library(factoextra) - -# set.seed(189981) # Opcional para reprodução. - -# Leia os ficheiros csv: - -harper_df <- read.csv("http://programminghistorian.org/assets/correspondence-analysis-in-R/HarperCPC.csv", stringsAsFactors = FALSE) -``` - - -## Os dados - -Se quiser ver os dados brutos, os dados para este tutorial podem ser encontrados no [Zenodo](https://doi.org/10.5281/zenodo.889846) (em inglês). Foram convenientemente incluídos também no formato tabular (nota: não é necessário baixar estes ficheiros manualmente. Usaremos o R para baixá-los diretamente): - -1) [CPCs do Harper](/assets/correspondence-analysis-in-R/HarperCPC.csv) -2) [CPCs do Trudeau's](/assets/correspondence-analysis-in-R/TrudeauCPC.csv) - -Uma amostra dos dados para a primeira sessão do governo de Stephen Harper. As filas representam comités e as colunas são membros específicos. Se um membro pertence a um comité, a célula terá um 1; se não, terá um 0. - -``` -harper_df - C Bennett D Wilks DV Kesteren G Rickford J Crowder K Block K Seeback -FAAE 0 0 1 0 0 0 0 -FEWO 0 0 0 0 0 0 0 -FINA 0 0 1 0 0 0 0 -HESA 0 1 0 0 0 1 0 -INAN 1 0 0 1 1 0 1 -IWFA 1 0 0 1 1 1 0 -JUST 0 1 0 0 0 0 1 - - L Davies N Ashton R Goguen R Saganash S Ambler S Truppe -FAAE 0 0 0 1 0 0 -FEWO 0 1 0 0 1 1 -FINA 0 0 0 0 0 0 -HESA 1 0 0 0 0 0 -INAN 0 0 0 0 1 0 -IWFA 1 1 1 1 1 1 -JUST 0 0 1 0 0 0 -``` - -Estruturado de outra forma (através de uma tabela R) podemos mostrar que os comités têm muitos deputados e alguns deputados são membros de vários comités. Por exemplo, a deputada liberal Carolyn Bennett era membro do "INAN" (Assuntos Indígenas e do Norte) e do "IWFA" (Violência contra Mulheres Indígenas) e o "HESA" (Comité Parlamentar de Saúde) incluía tanto o D Wilks como o K Block. Em geral, os comités têm entre nove e doze membros. Alguns parlamentares são membros de apenas um comité, enquanto outros podem pertencer a vários comités. - - -## Análise de Correspondência dos Comités Parlamentares Canadenses 2006 e 2016 - -O nosso *data frame* `harper_df` consiste em nomes completos de comités e nomes de deputados, mas alguns dos nomes dos comités (por exemplo, "Recursos Humanos, Habilidades e Desenvolvimento Social" e o "Status das Pessoas com Deficiência") são muito longos para serem bem mostrados em um gráfico: vamos usar as abreviações. - -```R -harper_table <- table(harper_df$abbr, harper_df$membership) -``` - -O comando `table` (tabela) faz um *dataset* de dados cruzados de duas categorias no *data frame*. As colunas são MPs individuais e as linhas são comités. Cada célula contém um 0 ou um 1 baseado na existência ou não de uma conexão. Se olhássemos a presença real em cada reunião, poderíamos também incluir valores ponderados (por exemplo, 5 para um membro do parlamento que participa de uma reunião de comité 5 vezes). Como regra geral, usar valores ponderados quando as quantidades importam (quando as pessoas investem dinheiro, por exemplo), e usar 0s e 1s quando não importam. - -Infelizmente, temos mais um problema. Muitos deputados são membros de apenas 1 comité. Isso fará com que esses deputados se sobreponham quando criarmos o gráfico, tornando-o menos legível. Vamos exigir que os parlamentares pertençam a pelo menos 2 comités antes de executarmos o comando CA da FactoMineR. - -```R -harper_table <- harper_table[,colSums(harper_table) > 1] -CA_harper <- CA(harper_table) -plot(CA_harper) -``` - -O comando `colSums` soma os valores para cada coluna da tabela. `rowSums` poderia ser usado para somar as linhas se isso fosse necessário (não é para nós, porque todos os comités têm mais de um deputado). - -O comando `CA` traça os resultados para as duas dimensões superiores e armazena o resumo dos dados em uma variável chamada `CA_harper`. Na maioria das vezes, `CA` faz a maior parte do trabalho. Como discutido, mais detalhes sobre a matemática por trás da CA são fornecidos no [apêndice](#Apêndice:AMatemáticaportrásdaAnálisedeCorrespondência). - -Deve-se obter um gráfico que se parece com isto: - -{% include figure.html filename="tr-pt-analise-correspondenciaR-2.png" alt="Imagem representando um gráfico de correspondências sobre comités parlamentares" caption="Figura 2. Análise de correspondência dos Comités Parlamentares para a 1ª Sessão do Governo Harper" %} - -Vamos tratar os dados do governo Trudeau exatamente da mesma maneira. - -```R -trudeau_df <- read.csv("http://programminghistorian.org/assets/correspondence-analysis-in-R/TrudeauCPC.csv", stringsAsFactors = FALSE) -trudeau_table <- table(trudeau_df$abbr, trudeau_df$membership) -trudeau_table <- trudeau_table[,colSums(trudeau_table) > 1] -CA_trudeau <- CA(trudeau_table) -plot(CA_trudeau) -``` -{% include figure.html filename="tr-pt-analise-correspondenciaR-3.png" alt="Imagem representando um gráfico de correspondências sobre comités parlamentares" caption="Figura 3. Análise de correspondência dos Comités Parlamentares para a 1ª Sessão do Governo de Justin Trudeau" %} - -As nossas etiquetas de dados não são muito legíveis no momento. Mesmo com a mudança para abreviações, as etiquetas estão sobrepostas. O pacote [factoextra](https://cran.r-project.org/web/packages/factoextra/index.html) (em inglês) tem uma característica de repelir que ajuda a mostrar as coisas mais claramente.[^5] - -``` -fviz_ca_biplot(CA_harper, repel = TRUE) -``` - -{% include figure.html filename="tr-pt-analise-correspondenciaR-4.png" alt="Imagem representando um gráfico de correspondências sobre comités parlamentares" caption="Figura 4. Análise de correspondência dos Comités Parlamentares para a 1ª Sessão do Governo Harper" %} - -``` -fviz_ca_biplot(CA_trudeau, repel = TRUE) -``` - -{% include figure.html filename="tr-pt-analise-correspondenciaR-5.png" alt="Imagem representando um gráfico de correspondências sobre comités parlamentares" caption="Figura 5. Análise de correspondência dos Comités Parlamentares para a 1ª Sessão do Governo de Justin Trudeau" %} - -Em vez de se sobrepor, as etiquetas agora usam setas para mostrar sua localização onde for apropriado. - -## Interpretando a Análise de Correspondência (CA) - -Os gráficos de dados parecem mais bonitos, mas quão bem podemos confiar na validade desses dados? A nossa primeira dica é olhar para as dimensões. Nos dados Harper, apenas onze e dez por cento de valor explicativo aparecem no eixo horizontal e vertical respectivamente para um total de 21%![^6] Isso não soa promissor para a nossa análise. Lembrando que o número total de dimensões é igual ao número de filas ou colunas (o que for menor), isto pode ser preocupante. Quando tais valores baixos ocorrem, geralmente significa que os pontos de dados são distribuídos de forma bastante uniforme, e que os MPs são distribuídos de forma uniforme nos CPCs é uma convenção bastante bem estabelecida do parlamento. - -Outra maneira de olhar para os dados é através de valores de inércia.[^7] Mais detalhes sobre inércia podem ser encontrados no [apêndice](#Apêndice:AMatemáticaportrásdaAnálisedeCorrespondência) mas, no gráfico, os pontos de dados distantes da origem têm maior inércia. Pontos de inércia elevados sugerem *outliers* (valores atípicos) - atores ou eventos que têm menos conexões do que aqueles próximos ao centro. Os baixos valores de inércia sugerem pontos de dados que têm mais em comum com o grupo como um todo. Como uma ferramenta de análise, pode ser útil para encontrar atores ou subgrupos renegados no *dataset*. Se todos os pontos tiverem alta inércia, pode ser um indicador de alta diversidade ou fragmentação para as redes. A baixa inércia geral pode ser um indicador de maior coesão ou convergência geral. O que isso significa dependerá do *dataset*. Para os nossos gráficos, nenhum projeto de *datapoint* vai muito além de 2 passos da média. Mais uma vez, este é um indicador de que as relações estão relativamente distribuídas de maneira uniforme. - -Vamos analisar os dados mais de perto: - -```R -summary(CA_harper) -``` - -Isto nos retorna - -``` -HARPER - -O qui-quadrado da independência entre as duas variáveis é igual a 655.6636 -(p-value = 0.7420958 ). - -Eigenvalues - Dim.1 Dim.2 Dim.3 Dim.4 Dim.5 Dim.6 -Variance 0.831 0.779 0.748 0.711 0.666 0.622 -% of var. 11.024 10.342 9.922 9.440 8.839 8.252 -Cumulative % of var. 11.024 21.366 31.288 40.729 49.568 57.820 - - Dim.7 Dim.8 Dim.9 Dim.10 Dim.11 Dim.12 -Variance 0.541 0.498 0.463 0.346 0.305 0.263 -% of var. 7.174 6.604 6.138 4.591 4.041 3.488 -Cumulative % of var. 64.995 71.599 77.736 82.328 86.368 89.856 - - Dim.13 Dim.14 Dim.15 Dim.16 Dim.17 -Variance 0.240 0.195 0.136 0.105 0.088 -% of var. 3.180 2.591 1.807 1.396 1.170 -Cumulative % of var. 93.036 95.627 97.434 98.830 100.000 -``` - -O cabeçalho `Eigenvalues` do resumo apresenta métricas sobre as dimensões recém computadas, listando a percentagem de variância contida em cada uma delas. Infelizmente, a percentagem de variância encontrada nas duas dimensões superiores é muito baixa. Mesmo se conseguíssemos visualizar 7 ou 8 dimensões dos dados, capturaríamos apenas uma percentagem acumulada de cerca de 70%. O teste de independência do [qui-quadrado](https://perma.cc/8B82-YAX6) nos diz que não podemos rejeitar a hipótese de que nossas duas categorias (CPCs e MPs) são independentes. O valor p (ou *p-value*) é 0,74, bem acima do 0,05 comumente usado como um recorte para rejeitar uma hipótese nula.[^8] Um valor p menor ocorreria, por exemplo, se todos ou a maioria dos deputados fossem membros de um ou dois comités. A propósito, o valor de p quadrado de chi da amostra de Trudeau é menor em 0,54, mas ainda não o suficiente para rejeitar a hipótese de categorias mutuamente independentes. - -Como discutido, este resultado não é muito surpreendente. Esperamos que os deputados sejam distribuídos de forma relativamente uniforme entre os comités. Se optarmos por ponderar as nossas medidas com base na participação dos parlamentares em cada reunião de comité ou em seu desejo de 1-100 de ser membro de cada comité, poderemos ver resultados diferentes (por exemplo, pode ser mais comum que os parlamentares participem regularmente nas reuniões financeiras em comparação com outras reuniões). - -A CA falhou conosco? Bem, na verdade não. Isto significa apenas que não podemos simplesmente lançar dados em um algoritmo e esperar responder a perguntas reais de história. Mas nós não somos apenas programadores, mas historiadores de programação. Vamos colocar nossos bonés da história e ver se podemos refinar as nossas pesquisas! - -## Trudeau ampliou a Agenda para a Igualdade das Mulheres no Parlamento? - -Uma das primeiras medidas políticas que Justin Trudeau tomou foi garantir que o Canadá tinha um gabinete com 50% de mulheres. É discutível que o objetivo deste anúncio era professar uma agenda de igualdade de género. Na sua primeira sessão, o governo de Trudeau também criou um novo Comité Parlamentar sobre igualdade de remuneração para as mulheres. Além disso, o governo de Trudeau apresentou uma moção para que houvesse um inquérito sobre Mulheres Indígenas Desaparecidas e Assassinadas, substituindo o mandato do comité parlamentar de Harper para a Violência Contra as Mulheres Indígenas. - -Se Trudeau tivesse a intenção de levar a igualdade das mulheres a sério, poderíamos esperar que mais membros do comité do Status da Mulher estivessem ligados a pastas maiores, como Justiça, Finanças, Saúde e Relações Exteriores, em comparação com o governo de Harper. Como o regime de Harper não tinha um CPC de salário igual, incluiremos o CPC para "Violência contra Mulheres Indígenas". - -```R -# Inclua apenas os comités desejados: -# HESA: Health, JUST: Justice, FEWO: Status of Women -# INAN: Indigenous and Northern Affairs, FINA: Finance -# FAAE: Foreign Affairs and International Trade -# IWFA: Violence against Indigenous Women - -harper_df2 <- harper_df[which(harper_df$abbr %in% - c("HESA", "JUST", "FEWO", "INAN", "FINA", "FAAE", "IWFA")),] -harper_table2 <- table(harper_df2$abbr, harper_df2$membership) - -# Remova os singles de novo. -harper_table2 <- harper_table2[, colSums(harper_table2) > 1] -CA_Harper2 <- CA(harper_table2) -plot(CA_Harper2) -``` - -Isto produz o seguinte gráfico: - -{% include figure.html filename="tr-pt-analise-correspondenciaR-6.png" alt="Imagem representando um gráfico de correspondências sobre comités parlamentares" caption="Figura 6. Análise de correspondência de Comités Parlamentares selecionados para a 1ª Sessão do Governo de Stephen Harper" %} - -O valor p do qui-quadrado para este resultado se move apenas ligeiramente em direção a zero, para 0,71. Ainda não podemos tirar nenhuma conclusão quantitativa sobre uma relação clara entre CPCs e MPs. Para os nossos dados, este não é um resultado muito importante. Se pesquisássemos os CPCs sobre qual CPC era o mais produtivo ou importante, talvez encontrássemos valores p mais baixos. A inércia no eixo horizontal praticamente dobrou, sugerindo que o FINA (Finance) é um valor mais baixo no gráfico em comparação com os outros portfólios. - -O significado de um CA depende de uma interpretação qualitativa da trama. Por exemplo, observando os elementos do gráfico Harper podemos dizer que as preocupações económicas caem para a direita do eixo y e as preocupações sociais caem para a esquerda. Portanto, uma das "razões" para escolher os parlamentares para participar de comités no governo Harper parece ser a distinção entre preocupações sociais e económicas. - -Entretanto, quando fazemos a mesma análise com o governo de Trudeau... - -```R -trudeau_df2 <- trudeau_df[which(trudeau_df$abbr %in% - c("HESA", "JUST", "FEWO", "INAN", "FINA", "FAAE", "ESPE")),] -trudeau_table2 <- table(trudeau_df2$abbr, trudeau_df2$membership) -trudeau_table2 <- trudeau_table2[, colSums(trudeau_table2) > 1] # remova os singles de novo -CA_trudeau2 <- CA(trudeau_table2) -plot(CA_trudeau2) -``` - -Produzimos um gráfico incompleto e esta mensagem aparece: - -``` -Warning message: -In CA(trudeau_table2) : -The rows FAAE, INAN, JUST sum at 0. They were suppressed from the analysis. -``` - -Isto significa que o gráfico produzido não nos mostra as colunas FAEE, INAN e JUST. Como o valor de cada uma delas é 0, elas foram suprimidas da análise. Olhando para a tabela `trudeau_table2`, vemos que: - -``` - A Vandenbeld D Albas M Gladu R Harder S Sidhu -ESPE 1 1 1 0 1 -FAAE 0 0 0 0 0 -FEWO 1 0 1 1 0 -FINA 0 1 0 0 0 -HESA 0 0 0 1 1 -INAN 0 0 0 0 0 -JUST 0 0 0 0 0 -``` - -Não há nenhuma associação cruzada para FAEE, INAN ou JUST! Bem, isso é um resultado em si mesmo. Podemos concluir, em geral, que as agendas dos dois governos são bastante diferentes, e que houve uma abordagem diferente utilizada para organizar os parlamentares em comités. - -Para um historiador canadense, o resultado faz algum sentido, dado que a Violência contra as Mulheres Indígenas (IWFA) tem muito mais probabilidade de estar ligada aos Assuntos Indígenas e do Norte (INAN), e à Justiça e Direitos Humanos (JUST), do que à Igualdade de Remuneração (ESPE). Afinal, a história da Violência contra as Mulheres Indígenas está ligada a uma série de casos criminais de alto nível no Canadá. Como discutido anteriormente, a análise de CA requer uma quantidade de interpretação para se tornar significativa. - -Talvez possamos observar alguns comités diferentes em seu lugar. Ao retirar “JUST”, “INAN” e “FAAE” (Relações Exteriores) e substituí-los por “CIMM” (Imigração), “ETHI” (Ética e Acesso à Informação) e “HUMA” (Recursos Humanos), podemos obter uma imagem melhor da estrutura dos comités parlamentares neste contexto. - -```R -trudeau_df3 <- trudeau_df[which(trudeau_df$abbr %in% - c("HESA", "CIMM", "FEWO", "ETHI", "FINA", "HUMA", "ESPE")),] -trudeau_table3 <- table(trudeau_df3$abbr, trudeau_df3$membership) -trudeau_table3 <- trudeau_table3[, colSums(trudeau_table3) > 1] # remova os singles de novo -CA_trudeau3 <- CA(trudeau_table3) -plot(CA_trudeau3) -``` - -{% include figure.html filename="tr-pt-analise-correspondenciaR-7.png" alt="Imagem representando um gráfico de correspondências sobre comités parlamentares" caption="Figura 7. Análise de correspondência de Comités Parlamentares selecionados para a 1ª Sessão do Governo de Justin Trudeau" %} - -Em geral, a inércia no eixo horizontal é menor que a do governo de Harper, mas a separação tem "HUMA" (Recursos Humanos) e "ETHI" (Ética) contra os outros portfólios à direita. A delimitação entre questões sociais e económicas não é tão evidente como para Harper, sugerindo uma filosofia diferente para a seleção. Dito isto, também há menos deputados compartilhando as posições. Isto pode ser outro mistério para uma maior exploração. No entanto, o processo CA nos fornece uma visão sólida das relações que ocorrem dentro dos comités com um olhar rápido e com muito poucos comandos. - -## Análise - -Como na maioria das pesquisas interpretativas, não obtemos uma resposta direta à nossa pergunta sobre o poder para as mulheres nos governos parlamentares. No caso Harper, vemos uma divisão no eixo horizontal entre questões sociais como Saúde e Justiça e questões económicas como Finanças e Relações Exteriores, respondendo por 35% da variação. Pela visualização, podemos adivinhar que Finanças (FINA) e Relações Exteriores (FAAE) têm um membro comum e que Relações Exteriores (FAAE) tem um membro comum com Violência contra Mulheres Indígenas (IWFA). Este resultado é, possivelmente, uma preocupação, pois as agendas mais divulgadas de Stephen Harper tendiam a se concentrar em preocupações económicas como o comércio e a contenção fiscal. A separação dos comités implica que a filosofia de governança de Harper separava as preocupações económicas das sociais e que os direitos das mulheres eram principalmente uma preocupação social. A própria pasta Status da Mulher (FEWO) é separada do resto das pastas, encontrando-se ligada às outras pastas somente através de parlamentares comuns com os comités Violência contra Mulheres Indígenas (IWFA) e Assuntos Indígenas e do Norte (INAN). - -O gráfico do governo de Trudeau não mostra conexões cruzadas do Status da Mulher com a Justiça, Relações Exteriores e Povos Indígenas, mas conexões mais fortes com Finanças, Cidadania, Recursos Humanos e Ética. Os Direitos da Mulher estão ligados às Finanças e à Imigração através da carteira de Igualdade de Remuneração. - -É discutível que o regime do governo Harper alinhou os Direitos das Mulheres às pastas sociais como Justiça e Saúde, enquanto Trudeau elevou o perfil do Status da Mulher até certo ponto ao incluir o comité de Igualdade de Remuneração. A conexão entre os comités focados nos Direitos da Mulher e fortes carteiras como Saúde, Finanças e Cidadania e Imigração no governo Trudeau é digna de uma análise mais detalhada. Neste contexto, o Status da Mulher parece ter uma posição mais central (mais próxima da origem) do que o comité Status da Mulher no governo de Harper. Dito isto, o número de pontos de dados neste caso ainda é bastante pequeno para se chegar a uma conclusão definitiva. Talvez outras fontes de evidência possam ser visualizadas de maneira semelhante para confirmar ou negar este ponto. - -A agenda anteriormente mantida entre as mulheres e os povos indígenas foi deslocada no caso Trudeau. Como discutido anteriormente, o [National Inquiry into Missing and Murdered Indigenous Women and Girls](https://perma.cc/U38Y-4CY9) (Inquérito Nacional sobre Mulheres Indígenas Desaparecidas e Assassinadas) (em inglês) deslocou o mandato para o comité Violência contra as Mulheres Indígenas que existia durante o mandato de Harper. A história desta transição é complexa, mas a pressão política foi aplicada ao governo Harper para criar o Inquérito Nacional sobre Mulheres Indígenas Desaparecidas e Assassinadas após o julgamento de Robert Pickton e relatos de investigações policiais insuficientes para mulheres indígenas desaparecidas. Harper recusou-se a conduzir um inquérito citando que o CPC era a melhor abordagem.[^9] Trudeau fez uma promessa eleitoral de incluir o inquérito, deslocando assim o comité. Até certo ponto, Harper parece ter dado à violência contra as mulheres indígenas um papel bastante central no planejamento do Comité Parlamentar. Esta evidência é um contraponto às críticas de que Harper não levou a sério a questão das Mulheres Indígenas Desaparecidas e Assassinadas. - -As diferenças entre as duas relações levantam questões importantes sobre o papel do Status da Mulher no discurso político e suas interconexões entre identidade racial, finanças públicas, saúde e justiça social, a serem exploradas talvez em um trabalho qualitativo mais detalhado. Também levanta questões importantes sobre o foco no género em geral (de acordo com a carteira do Status da Mulher) ou mais especificamente, uma vez que se aplica a um grupo marginalizado (Mulheres Indígenas Desaparecidas e Assassinadas). Um documento de política relacionado aos benefícios de um Inquérito versus discussão do Comité Parlamentar parece razoável após examinar esta evidência. Talvez haja um argumento de que a troca do "IWFA" por "ESPE" é uma espécie de teto de vidro, colocando artificialmente uma cota em questões de mulheres enquanto as carteiras estabelecidas permanecem intocadas. Como uma ferramenta exploratória, a CA nos ajuda a identificar tais temas a partir da observação empírica, em vez de confiar na teoria ou em preconceitos pessoais. - -## Conclusão - -Agora que este tutorial está completo, é possível ter alguma noção do que é a CA e como pode ser usada para responder perguntas exploratórias sobre dados. Usamos o comando `CA` do FactoMineR para criar a análise e traçar os resultados em duas dimensões. Quando as etiquetas se cruzaram, aplicamos o comando `viz_ca_biplot` do pacote factoextra para exibir os dados em um formato mais legível. - -Também aprendemos como interpretar uma CA e como detectar potenciais armadilhas analíticas, incluindo casos em que as relações entre categorias são distribuídas de forma muito uniforme e têm baixo valor explicativo. Neste caso, refinamos a nossa pergunta e os dados de pesquisa para fornecer uma imagem mais significativa do que aconteceu. - -Em geral, o benefício desta análise é fornecer uma rápida visão geral do *dataset* de duas categorias, como um guia para questões históricas mais substantivas. O uso de membros e reuniões ou eventos em todas as áreas da vida (negócios, sem fins lucrativos, reuniões municipais, *hashtags* de twitter, etc.) é uma abordagem comum para tal análise. Os grupos sociais e as suas preferências são outro uso comum para a CA. Em cada caso, a visualização oferece um mapa com o qual se pode observar um retrato da vida social, cultural e política. - -Os próximos passos podem incluir a adição de outras dimensões categóricas à nossa análise, como a incorporação do partido político, idade ou sexo. Quando se faz CA com mais de duas categorias, é chamada de [Análise de Correspondência Múltipla ou MCA](https://www.youtube.com/watch?v=RDexHE5Iqrg) (em inglês). Enquanto a matemática para a MCA é mais complicada, os resultados finais são bastante semelhantes aos da CA. - -Esperamos que, agora, estes métodos sejam aplicados aos seus próprios dados, ajudando a descobrir perguntas e hipóteses que enriquecem a sua pesquisa histórica. Boa sorte! - -## Apêndice: A Matemática por trás da Análise de Correspondência - -Como a matemática da CA será interessante para alguns e não para outros, optamos por discuti-la neste Apêndice. A secção também contém um pouco mais de detalhes sobre outros tópicos, tais como inércia (*inertia*), dimensões (*dimensions*) e decomposição de valores singulares (*singular value decomposition* ou SVD). - -A fim de facilitar a compreensão, começaremos com apenas alguns comités. "FEWO" (Status das Mulheres ou *Status of Women*), "HESA" (Saúde ou *Health*), "INAN" (Assuntos Indígenas e do Norte ou *Indigenous and Northern Affairs*), "IWFA" (Violência contra as Mulheres Indígenas ou *Violence Against Indigenous Women*) e "JUST" (Justiça ou *Justice*). - -``` - C Bennett D Wilks G Rickford J Crowder K Block K Seeback L Davies N Ashton -FEWO 0 0 0 0 0 0 0 1 -HESA 0 1 0 0 1 0 1 0 -INAN 1 0 1 1 0 1 0 0 -IWFA 1 0 1 1 1 0 1 1 -JUST 0 1 0 0 0 1 0 0 - - R Goguen S Ambler S Truppe -FEWO 0 1 1 -HESA 0 0 0 -INAN 0 1 0 -IWFA 1 1 1 -JUST 1 0 0 -``` - -A CA é feita em um *dataset* “normalizado” que é criado pela divisão do valor de cada célula pela raiz quadrada do produto da coluna e totais de linhas, ou célula \\(\frac{1}{\sqrt{column total \times row total}}\\). Por exemplo, a célula de "FEWO" e S Ambler é \\(\frac{1}{\sqrt{3 \times 3}}\\) ou 0.333.[^10] - -A tabela “normalizada” se parece com isto: - -``` - C Bennett D Wilks G Rickford J Crowder K Block K Seeback L Davies N Ashton -FEWO 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.408 -HESA 0.000 0.408 0.000 0.000 0.408 0.000 0.408 0.000 -INAN 0.316 0.000 0.316 0.316 0.000 0.316 0.000 0.000 -IWFA 0.235 0.000 0.235 0.235 0.235 0.000 0.235 0.235 -JUST 0.000 0.408 0.000 0.000 0.000 0.408 0.000 0.000 - - R Goguen S Ambler S Truppe -FEWO 0.000 0.333 0.408 -HESA 0.000 0.000 0.000 -INAN 0.000 0.258 0.000 -IWFA 0.235 0.192 0.235 -JUST 0.408 0.000 0.000 -``` - -O processo de normalização faz algo interessante. Aqueles que são membros de múltiplos comités e/ou que pertencem a comités com muitos membros tendem a ter notas de normalização mais baixas, sugerindo que são mais centrais para a rede. Estes membros serão colocados mais próximos do centro da matriz. Por exemplo, a célula pertencente a S Ambler e "IWFA" tem a pontuação mais baixa de 0,192 porque S Ambler é membro de três comités e o comité "IWFA" tem nove membros no gráfico representado. - -A próxima etapa é encontrar a decomposição de valor singular destes dados normalizados. Isto envolve álgebra linear bastante complexa que não será abordada aqui, mas pode-se aprender mais com este tutorial de *[Single Value Decomposition](https://perma.cc/CD5F-AL7W)* (Decomposição de Valores Singulares) (em inglês) ou com mais detalhes [neste pdf sobre SVD](https://perma.cc/F7MJ-EGET) (em inglês). Vou tentar resumir o que acontece em termos leigos. - -* Duas novas matrizes são criadas que mostram pontuações de “dimensão” para as linhas (comités) e as colunas (MPs) baseadas em vetores próprios. -* O número de dimensões é igual ao tamanho das colunas ou filas menos 1, que é sempre menor. Neste caso, há cinco comités em comparação com as MPs onze, portanto o número de dimensões é 4. -* Uma outra matriz mostra os valores singulares (valores próprios ou *eigenvalues*), que podem ser usados para mostrar a influência de cada dimensão na análise. -* Um dos vários “tratamentos” é aplicado aos dados para facilitar a plotagem. O mais comum é a abordagem de “coordenadas padrão”, que compara cada pontuação normalizada de forma positiva ou negativa com a pontuação média. - -Ao usar coordenadas padrão, a nossa tabela de dados mostra o seguinte: -``` -Columns (MPs): - -Dim 1 Dim 2 Dim 3 Dim 4 -C Bennett -0.4061946 -0.495800254 0.6100171 0.07717508 -D Wilks 1.5874119 0.147804035 -0.4190637 -0.34058221 -G Rickford -0.4061946 -0.495800254 0.6100171 0.07717508 -J Crowder -0.4061946 -0.495800254 0.6100171 0.07717508 -K Block 0.6536800 0.897240970 0.5665289 0.04755678 -K Seeback 0.5275373 -1.245237189 -0.3755754 -0.31096392 -L Davies 0.6536800 0.897240970 0.5665289 0.04755678 -N Ashton -0.8554566 0.631040866 -0.6518568 0.02489229 -R Goguen 0.6039463 -0.464503802 -0.6602408 0.73424971 -S Ambler -0.7311723 -0.004817303 -0.1363437 -0.30608465 -S Truppe -0.8554566 0.631040866 -0.6518568 0.02489229 - -$inertia -[1] 0.06859903 0.24637681 0.06859903 0.06859903 0.13526570 0.17971014 0.13526570 -[8] 0.13526570 0.13526570 0.08438003 0.13526570 - -Rows (Committees): - -Dim 1 Dim 2 Dim 3 Dim 4 -FEWO -1.0603194 0.6399308 -0.8842978 -0.30271466 -HESA 1.2568696 0.9885976 0.4384432 -0.28992174 -INAN -0.3705046 -0.8359969 0.4856563 -0.27320374 -IWFA -0.2531830 0.1866016 0.1766091 0.31676507 -JUST 1.1805065 -0.7950050 -0.8933999 0.09768076 - -$inertia -[1] 0.31400966 0.36956522 0.24927536 0.09017713 0.36956522 -``` - -Cada pontuação para uma “dimensão” pode ser usada como uma coordenada nesse gráfico. Como não podemos visualizar em quatro dimensões, as saídas CA normalmente se concentram nas primeiras duas ou três dimensões para produzir um gráfico (por exemplo, "HESA" será plotado em `[1.245, 0.989]` ou `[1.245, 0.989, 0.438]` em um gráfico 3D). - -{% include figure.html filename="tr-pt-analise-correspondenciaR-8.png" alt="Imagem representando um gráfico de correspondências sobre comités parlamentares" caption="Figura 8. Análise de correspondência de Comités Parlamentares selecionados para a 1ª Sessão do Governo Stephen Harper, 2006" %} - -As pontuações de inércia são uma forma de mostrar a variação nos dados. Saúde e Justiça possuem a menor quantidade de membros com uma alta pontuação de inércia, enquanto o comité mais popular - "IWFA" - tem uma pequena inércia. Assim, a inércia é uma forma de quantificar a distância dos pontos em relação ao centro do gráfico. - -Outra pontuação importante é visível no gráfico de CA - a percentagem do valor explicativo para cada dimensão. Isto significa que o eixo horizontal explica 42,32% da variação no gráfico, enquanto o eixo vertical explica quase 31%. O que estes eixos significam deve ser interpretado com base no gráfico. Por exemplo, podemos dizer que o lado esquerdo representa questões relativas à identidade social e os do lado direito são mais reguladores. Uma análise histórica mais aprofundada das atas destes comités poderia, por sua vez, oferecer uma maior compreensão sobre o significado da participação destes membros na época. - -## Notas -[^1]: A CA tem uma história ramificada de várias disciplinas e, assim, a terminologia pode ser confusa. Para simplificar, as categorias se referem aos tipos de dados que estão sendo comparados (por exemplo, membros e clubes) enquanto cada item dentro dessas categorias (por exemplo, “The Tennis Club” ou “John McEnroe”) será um elemento dentro dessa categoria. A localização quantitativa dos elementos (coordenadas x e y) são *datapoints*. - -[^2]: Brigitte Le Roux and Henry Rouanet, *Multiple Correspondence Analysis* (Los Angeles: SAGE Publications, 2010): 3. - -[^3]: Não pretendemos sugerir que esta análise seja de forma alguma conclusiva sobre os laços comerciais entre os EUA e a Rússia. A questão é que, como a Rússia não faz parte da TPP neste acordo, ela se separa dos EUA. Por outro lado, se a adesão à TPP pudesse ser comprovada como representando laços tensos entre os EUA e a Rússia, apareceria no gráfico de CA. - -[^4]: Sebastien Le, Julie Josse, Francois Husson (2008). FactoMineR: An R Package for Multivariate Analysis. Journal of Statistical Software, 25(1), 1-18. [10.18637/jss.v025.i01](https://doi.org/10.18637/jss.v025.i01). - -[^5]: Alboukadel Kassambara and Fabian Mundt (2017). factoextra: Extract and Visualize the Results of Multivariate Data Analyses. R package version 1.0.4. [https://CRAN.R-project.org/package=factoextra](https://perma.cc/Z2RC-F4J7). - -[^6]: O valor explicativo é a distância dos *datapoints* afastados do centro do gráfico. Cada dimensão é responsável por parte da distância que os *datapoints* divergem do centro. - -[^7]: Em geral, a inércia nas estatísticas refere-se à variação ou “disseminação” de um *dataset*. Esta é análoga ao desvio padrão nos dados de distribuição. - -[^8]: Ver Laura Kane (3 de abril de 2017), "Missing and murdered women's inquiry not reaching out to families, say advocates." *CBC News Indigenous*. [http://www.cbc.ca/news/indigenous/mmiw-inquiry-not-reaching-out-to-families-says-advocates-1.4053694](https://perma.cc/UQ6J-8QVZ). - -[^9]: Em estatística, um valor p (*p-value*), abreviação de valor de probabilidade, é um indicador de quão provável um resultado teria ocorrido em circunstâncias aleatórias. Um baixo valor de p sugere uma probabilidade baixa de que o resultado teria ocorrido ao acaso e, portanto, fornece algumas evidências de que uma hipótese nula (neste caso, que os MPs e CPCs são categorias independentes) é improvável. - -[^10]: Katherine Faust (2005) "Using Correspondence Analysis for Joint Displays of Affiliation Network" in *Models and Methods in Social Network Analysis* eds. Peter J. Carrington, John Scott and Stanley Wasserman. +--- +title: "Análise de Correspondência para Pesquisa Histórica com R" +slug: analise-correspondencia-pesquisa-historica-R +original: correspondence-analysis-in-R +layout: lesson +collection: lessons +date: 2017-09-13 +translation_date: 2023-05-23 +authors: +- Ryan Deschamps +reviewers: +- Sandra van Ginhoven +- Taylor Arnold +editors: +- Matthew Lincoln +translator: +- Diana Rodriguez +translation-editor: +- Jimmy Medeiros +translation-reviewer: +- Yuri Pires +- André Salvo +review-ticket: https://github.com/programminghistorian/ph-submissions/issues/422 +difficulty: 3 +activity: analyzing +topics: [data-manipulation, network-analysis, r, data-visualization] +abstract: Esta lição explica como realizar e interpretar uma análise de correspondência com R, que pode ser usada para identificar relacionamentos dentro de dados categóricos. +avatar_alt: Diagrama de um cubo com arestas legendadas +mathjax: true +doi: 10.46430/phpt0042 +--- + +{% include toc.html %} + +A análise de correspondência (*correspondence analysis* ou CA) produz um gráfico bidimensional ou tridimensional baseado nas relações entre duas ou mais categorias de dados. Essas categorias poderiam ser "membros e clubes", "palavras e livros" ou "países e acordos comerciais". Por exemplo, um membro do clube pode ser equivalente a outro membro com base nos clubes compartilhados aos quais ele pertence. Os membros que frequentam os mesmos clubes provavelmente têm mais em comum do que aqueles que frequentam clubes diferentes. Da mesma forma, os clubes que compartilham membros provavelmente terão mais em comum do que aqueles que compartilham membros diferentes.[^1] + +Discernir essas correspondências significativas pode ser muito difícil de fazer quando há muitos elementos em cada uma de suas categorias (por exemplo, se tivermos centenas de membros espalhados por dezenas de clubes.) A CA mede as correspondências mais fortes em um *dataset* e as projeta em um espaço multidimensional, possibilitando sua visualização e interpretação. Normalmente, as duas principais dimensões são mostradas de uma só vez, embora seja possível mostrar três dimensões em um display 3D. + +Uma vez que a CA visualiza as relações entre elementos de seus dados como distâncias em um gráfico, muitas vezes é possível descobrir padrões amplos com base em que elementos de uma categoria aparecem próximos a elementos da outra. Assim, a CA pode ser um bom primeiro passo para filtrar os principais padrões de um grande *dataset*. É uma ferramenta particularmente poderosa para entender informações históricas dentro de coleções digitais. + +Depois de ler este tutorial, deve ser possível: + +* Saber o que é a CA e para que é usada. +* Saber como executar a CA usando o pacote FactoMineR do R. +* Descrever com exatidão os resultados de uma CA. + +## Pré-requisitos + +Este tutorial é para historiadores e pesquisadores com habilidades intermédias em programação. Pressupõe que já se tem um conhecimento básico de R e alguns conhecimentos básicos de estatística. + +O tutorial [Noções básicas de R com dados tabulares](/pt/licoes/nocoes-basicas-R-dados-tabulares) tem informações sobre como organizar e configurar o R e o tutorial [Processamento Básico de Texto em R](/pt/licoes/processamento-basico-texto-r) também pode ser útil como treinamento. + +Como a CA é uma espécie de *social network analysis* (análise de redes sociais), pode ser interessante olhar a lição [From Hermeneutics to Data to Networks: Data Extraction and Network Visualization of Historical Sources](/en/lessons/creating-network-diagrams-from-historical-sources) (em inglês), que também tem algumas informações úteis sobre a estruturação de dados para análise de redes. + +## O que é a Análise de Correspondência? + +A análise de correspondência (CA), também chamada "escala multidimensional" ou "análise bivariada de rede", permite observar a inter-relação de dois grupos em um gráfico de dispersão com dois eixos (*two-way graph plot*). Por exemplo, foi utilizada pelo sociólogo francês Pierre Bourdieu para mostrar como categorias sociais como a ocupação influenciam a opinião política.[^2] É especialmente poderosa como ferramenta para encontrar padrões em grandes *datasets*. + +A CA funciona com qualquer tipo de dados categóricos (*datasets* que foram agrupados em categorias). Vamos começar com um exemplo simples. Se quisesse entender o papel dos acordos internacionais de livre comércio na interconexão das nações do G8, seria possível criar uma tabela para os países e as relações de livre comércio que eles mantinham em um determinado momento. + +Uma pequena seleção de acordos comerciais (em azul) incluindo o Espaço Económico Europeu (*European Economic Area* ou EEA), o Acordo Comercial Canadá-UE (*Canada-EU Trade Agreement* ou CETA), o Acordo de Livre Comércio Norte-Americano (*North American Free Trade Agreement* ou NAFTA), a Parceria Trans-Pacífico (*Trans Pacific Partnership* ou TPP) e a Associação das Nações do Sudeste Asiático (*Association of Southeast Asian Nations* ou ASEAN) corresponde aos países do G8. Os países (de cor vermelha) agrupam-se geograficamente, com países do Pacífico à direita, países europeus à esquerda e países da América do Norte ao centro. O Canadá e os Estados Unidos, como previsto, estão juntos. Alemanha, Itália, França e Reino Unido pertencem todos aos mesmos dois acordos (CETA e EEA), portanto todos caem exatamente no mesmo ponto. + +{% include figure.html filename="tr-pt-analise-correspondenciaR-1.png" alt="Imagem representando um gráfico de correspondência sobre acordos comerciais" caption="Figura 1. Análise de correspondência de países selecionados do G8 e seus acordos comerciais" %} + +Por outro lado, enquanto a Rússia e os Estados Unidos estão um pouco próximos no eixo horizontal, estão em polos opostos no eixo vertical. A Rússia só compartilha um acordo de comércio com um outro país (Japão) e os Estados Unidos com dois (Japão e Canadá). Em um gráfico de CA, unidades com poucas correlações ficarão nos arreadores, enquanto aquelas unidades com maior quantidade de correlações ficarão mais próximo do centro do gráfico. A conexão relativa ou falta de conexão de um *datapoint* é quantificada como *inertia* (inércia) na CA. A falta relativa de conexão produz uma inércia maior. + +Um ponto mais substancial sobre a Rússia e os Estados Unidos é que a Rússia é um país do Pacífico que não pertence à TPP. Observando esta relação, um historiador poder-se-ia perguntar se isto ocorre por causa de uma relação comercial tensa entre a Rússia e os Estados Unidos em comparação com outros países do G8, ou por atitudes gerais em relação a acordos comerciais para estes países.[^3] + +Com mais dados, a CA pode descobrir distinções mais subtis entre grupos dentro de uma categoria particular. Neste tutorial, analisaremos a vida política canadense - especificamente, como representantes políticos são organizados em comités durante um ou outro governo. Semelhante aos acordos comerciais, esperaríamos que os comités que têm membros semelhantes estivessem mais próximos uns dos outros. Além disso, os comités que têm poucos representantes em comum se encontrarão nos cantos do gráfico. + +## Comités Parlamentares Canadenses (CPCs) + +No sistema parlamentar canadense, os cidadãos elegem representantes chamados membros do Parlamento, ou deputados, para a Câmara dos Comuns. Os parlamentares são responsáveis por votar e propor alterações à legislação no Canadá. Os [Comités Parlamentares (CPCs)](https://perma.cc/3PT6-77DB) (em inglês) consistem de parlamentares que informam à Câmara sobre detalhes importantes da política em uma área temática. Exemplos de tais comités incluem os CPCs sobre Finanças, Justiça e Saúde. + +Usaremos abreviações para os comités parlamentares, porque os nomes podem ficar longos, tornando-os difíceis de ler em um gráfico. É possível usar esta tabela como um guia de referência para as abreviações e seus respectivos nomes de comités: + +| Abbreviation (Abreviação) | Committee Name (Tradução do Nome do Comité) | +| :----------- | :----------------------------------------------------------------------------------------: | +| INAN | Indigenous and Northern Affairs (Assuntos Indígenas e do Norte) | +| HUMA | Human Resources, Skills and Social Development and the Status of Persons with Disabilities (Recursos Humanos, Habilidades e Desenvolvimento Social e o Status das Pessoas com Deficiência) | +| FINA | Finance (Finanças) | +| FAAE | Foreign Affairs and International Development (Relações Exteriores e Desenvolvimento Internacional) | +| ETHI | Access to Information, Privacy and Ethics (Acesso à Informação, Privacidade e Ética) | +| ENVI | Environment and Sustainable Development (Meio Ambiente e Desenvolvimento Sustentável) | +| CHPC | Canadian Heritage (Herança Canadense) | +| CIMM | Citizenship and Immigration (Cidadania e Imigração) | +| ACVA | Veterans Affairs (Assuntos de Veteranos) | +| HESA | Health (Saúde) | +| TRAN | Transport, Infrastructure and Communities (Transporte, Infraestrutura e Comunidades) | +| FOPO | Fisheries and Oceans (Pesca e Oceanos) | +| RNNR | Natural Resources (Recursos Naturais) | +| FEWO | Status of Women (Status das Mulheres) | +| ESPE | Pay Equity (Igualdade de Remuneração) | +| IWFA | Violence against Indigenous Women (Violência Contra as Mulheres Indígenas) | +| BILI | Library of Parliament (Biblioteca do Parlamento) | +| AGRI | Agriculture and Agri-food (Agricultura e Agroalimentação) | +| JUST | Justice and Human Rights (Justiça e Direitos Humanos) | + +O autor da lição, o historiador Ryan Deschamps, suspeitava que os deputados estariam organizados de acordo com os tópicos do comité de forma diferente de governo para governo. Por exemplo, os comités formados durante o primeiro gabinete do governo conservador de Stephen Harper podem ser organizados de forma diferente do gabinete inicial do Liberal de Justin Trudeau. Há uma série de razões para esta suspeita. Primeiro, os CPCs são formados por lideranças partidárias e as decisões dos comités precisam de coordenação entre os membros da Câmara. Em outras palavras, os partidos políticos usarão os CPCs como ferramentas para marcar pontos políticos, e os governos devem garantir que as pessoas certas sejam membros dos comités certos para proteger suas agendas políticas. Em segundo lugar, os dois governos têm um enfoque político diferente. O governo conservador de Harper se concentrou mais em questões de desenvolvimento económico, enquanto os Liberais de Trudeau enfatizaram, em primeiro lugar a igualdade social. Em resumo, pode haver algumas decisões calculadas sobre quem entra em que comité, fornecendo evidências sobre as atitudes do governo em relação ou contra certos tópicos. + +## Preparando o R para a CA + +Para fazer uma CA, precisaremos de um pacote de álgebra linear. Para os mais inclinados à matemática, há um apêndice com alguns detalhes sobre como isto é feito. Em R, há várias opções para CA, mas usaremos o [pacote FactoMineR](https://factominer.free.fr/) (em inglês), focado na "análise de dados exploratórios multivariados".[^4] A FactoMineR pode ser usada para conduzir todos os tipos de análises multivariadas diferentes, incluindo *clusters* hierárquicos, análise fatorial e assim por diante. + +Mas, primeiro, aqui está como instalar e puxar os pacotes, depois colocá-los em um objeto R para que possam ser discutidos. + +```R + +## Estes comandos só precisam ser feitos na primeira vez que se realiza uma análise. +## FactoMineR é um pacote bastante grande, portanto pode levar algum tempo para ser carregado. + +install.packages("FactoMineR") # Inclui um módulo para a condução de CA. +install.packages("factoextra") # Pacote para embelezar os nossos gráficos de CA. + +# Importar os pacotes: +library(FactoMineR) +library(factoextra) + +# set.seed(189981) # Opcional para reprodução. + +# Leia os ficheiros csv: + +harper_df <- read.csv("http://programminghistorian.org/assets/correspondence-analysis-in-R/HarperCPC.csv", stringsAsFactors = FALSE) +``` + + +## Os dados + +Se quiser ver os dados brutos, os dados para este tutorial podem ser encontrados no [Zenodo](https://doi.org/10.5281/zenodo.889846) (em inglês). Foram convenientemente incluídos também no formato tabular (nota: não é necessário baixar estes ficheiros manualmente. Usaremos o R para baixá-los diretamente): + +1) [CPCs do Harper](/assets/correspondence-analysis-in-R/HarperCPC.csv) +2) [CPCs do Trudeau's](/assets/correspondence-analysis-in-R/TrudeauCPC.csv) + +Uma amostra dos dados para a primeira sessão do governo de Stephen Harper. As filas representam comités e as colunas são membros específicos. Se um membro pertence a um comité, a célula terá um 1; se não, terá um 0. + +``` +harper_df + C Bennett D Wilks DV Kesteren G Rickford J Crowder K Block K Seeback +FAAE 0 0 1 0 0 0 0 +FEWO 0 0 0 0 0 0 0 +FINA 0 0 1 0 0 0 0 +HESA 0 1 0 0 0 1 0 +INAN 1 0 0 1 1 0 1 +IWFA 1 0 0 1 1 1 0 +JUST 0 1 0 0 0 0 1 + + L Davies N Ashton R Goguen R Saganash S Ambler S Truppe +FAAE 0 0 0 1 0 0 +FEWO 0 1 0 0 1 1 +FINA 0 0 0 0 0 0 +HESA 1 0 0 0 0 0 +INAN 0 0 0 0 1 0 +IWFA 1 1 1 1 1 1 +JUST 0 0 1 0 0 0 +``` + +Estruturado de outra forma (através de uma tabela R) podemos mostrar que os comités têm muitos deputados e alguns deputados são membros de vários comités. Por exemplo, a deputada liberal Carolyn Bennett era membro do "INAN" (Assuntos Indígenas e do Norte) e do "IWFA" (Violência contra Mulheres Indígenas) e o "HESA" (Comité Parlamentar de Saúde) incluía tanto o D Wilks como o K Block. Em geral, os comités têm entre nove e doze membros. Alguns parlamentares são membros de apenas um comité, enquanto outros podem pertencer a vários comités. + + +## Análise de Correspondência dos Comités Parlamentares Canadenses 2006 e 2016 + +O nosso *data frame* `harper_df` consiste em nomes completos de comités e nomes de deputados, mas alguns dos nomes dos comités (por exemplo, "Recursos Humanos, Habilidades e Desenvolvimento Social" e o "Status das Pessoas com Deficiência") são muito longos para serem bem mostrados em um gráfico: vamos usar as abreviações. + +```R +harper_table <- table(harper_df$abbr, harper_df$membership) +``` + +O comando `table` (tabela) faz um *dataset* de dados cruzados de duas categorias no *data frame*. As colunas são MPs individuais e as linhas são comités. Cada célula contém um 0 ou um 1 baseado na existência ou não de uma conexão. Se olhássemos a presença real em cada reunião, poderíamos também incluir valores ponderados (por exemplo, 5 para um membro do parlamento que participa de uma reunião de comité 5 vezes). Como regra geral, usar valores ponderados quando as quantidades importam (quando as pessoas investem dinheiro, por exemplo), e usar 0s e 1s quando não importam. + +Infelizmente, temos mais um problema. Muitos deputados são membros de apenas 1 comité. Isso fará com que esses deputados se sobreponham quando criarmos o gráfico, tornando-o menos legível. Vamos exigir que os parlamentares pertençam a pelo menos 2 comités antes de executarmos o comando CA da FactoMineR. + +```R +harper_table <- harper_table[,colSums(harper_table) > 1] +CA_harper <- CA(harper_table) +plot(CA_harper) +``` + +O comando `colSums` soma os valores para cada coluna da tabela. `rowSums` poderia ser usado para somar as linhas se isso fosse necessário (não é para nós, porque todos os comités têm mais de um deputado). + +O comando `CA` traça os resultados para as duas dimensões superiores e armazena o resumo dos dados em uma variável chamada `CA_harper`. Na maioria das vezes, `CA` faz a maior parte do trabalho. Como discutido, mais detalhes sobre a matemática por trás da CA são fornecidos no [apêndice](#Apêndice:AMatemáticaportrásdaAnálisedeCorrespondência). + +Deve-se obter um gráfico que se parece com isto: + +{% include figure.html filename="tr-pt-analise-correspondenciaR-2.png" alt="Imagem representando um gráfico de correspondências sobre comités parlamentares" caption="Figura 2. Análise de correspondência dos Comités Parlamentares para a 1ª Sessão do Governo Harper" %} + +Vamos tratar os dados do governo Trudeau exatamente da mesma maneira. + +```R +trudeau_df <- read.csv("http://programminghistorian.org/assets/correspondence-analysis-in-R/TrudeauCPC.csv", stringsAsFactors = FALSE) +trudeau_table <- table(trudeau_df$abbr, trudeau_df$membership) +trudeau_table <- trudeau_table[,colSums(trudeau_table) > 1] +CA_trudeau <- CA(trudeau_table) +plot(CA_trudeau) +``` +{% include figure.html filename="tr-pt-analise-correspondenciaR-3.png" alt="Imagem representando um gráfico de correspondências sobre comités parlamentares" caption="Figura 3. Análise de correspondência dos Comités Parlamentares para a 1ª Sessão do Governo de Justin Trudeau" %} + +As nossas etiquetas de dados não são muito legíveis no momento. Mesmo com a mudança para abreviações, as etiquetas estão sobrepostas. O pacote [factoextra](https://cran.r-project.org/web/packages/factoextra/index.html) (em inglês) tem uma característica de repelir que ajuda a mostrar as coisas mais claramente.[^5] + +``` +fviz_ca_biplot(CA_harper, repel = TRUE) +``` + +{% include figure.html filename="tr-pt-analise-correspondenciaR-4.png" alt="Imagem representando um gráfico de correspondências sobre comités parlamentares" caption="Figura 4. Análise de correspondência dos Comités Parlamentares para a 1ª Sessão do Governo Harper" %} + +``` +fviz_ca_biplot(CA_trudeau, repel = TRUE) +``` + +{% include figure.html filename="tr-pt-analise-correspondenciaR-5.png" alt="Imagem representando um gráfico de correspondências sobre comités parlamentares" caption="Figura 5. Análise de correspondência dos Comités Parlamentares para a 1ª Sessão do Governo de Justin Trudeau" %} + +Em vez de se sobrepor, as etiquetas agora usam setas para mostrar sua localização onde for apropriado. + +## Interpretando a Análise de Correspondência (CA) + +Os gráficos de dados parecem mais bonitos, mas quão bem podemos confiar na validade desses dados? A nossa primeira dica é olhar para as dimensões. Nos dados Harper, apenas onze e dez por cento de valor explicativo aparecem no eixo horizontal e vertical respectivamente para um total de 21%![^6] Isso não soa promissor para a nossa análise. Lembrando que o número total de dimensões é igual ao número de filas ou colunas (o que for menor), isto pode ser preocupante. Quando tais valores baixos ocorrem, geralmente significa que os pontos de dados são distribuídos de forma bastante uniforme, e que os MPs são distribuídos de forma uniforme nos CPCs é uma convenção bastante bem estabelecida do parlamento. + +Outra maneira de olhar para os dados é através de valores de inércia.[^7] Mais detalhes sobre inércia podem ser encontrados no [apêndice](#Apêndice:AMatemáticaportrásdaAnálisedeCorrespondência) mas, no gráfico, os pontos de dados distantes da origem têm maior inércia. Pontos de inércia elevados sugerem *outliers* (valores atípicos) - atores ou eventos que têm menos conexões do que aqueles próximos ao centro. Os baixos valores de inércia sugerem pontos de dados que têm mais em comum com o grupo como um todo. Como uma ferramenta de análise, pode ser útil para encontrar atores ou subgrupos renegados no *dataset*. Se todos os pontos tiverem alta inércia, pode ser um indicador de alta diversidade ou fragmentação para as redes. A baixa inércia geral pode ser um indicador de maior coesão ou convergência geral. O que isso significa dependerá do *dataset*. Para os nossos gráficos, nenhum projeto de *datapoint* vai muito além de 2 passos da média. Mais uma vez, este é um indicador de que as relações estão relativamente distribuídas de maneira uniforme. + +Vamos analisar os dados mais de perto: + +```R +summary(CA_harper) +``` + +Isto nos retorna + +``` +HARPER + +O qui-quadrado da independência entre as duas variáveis é igual a 655.6636 +(p-value = 0.7420958 ). + +Eigenvalues + Dim.1 Dim.2 Dim.3 Dim.4 Dim.5 Dim.6 +Variance 0.831 0.779 0.748 0.711 0.666 0.622 +% of var. 11.024 10.342 9.922 9.440 8.839 8.252 +Cumulative % of var. 11.024 21.366 31.288 40.729 49.568 57.820 + + Dim.7 Dim.8 Dim.9 Dim.10 Dim.11 Dim.12 +Variance 0.541 0.498 0.463 0.346 0.305 0.263 +% of var. 7.174 6.604 6.138 4.591 4.041 3.488 +Cumulative % of var. 64.995 71.599 77.736 82.328 86.368 89.856 + + Dim.13 Dim.14 Dim.15 Dim.16 Dim.17 +Variance 0.240 0.195 0.136 0.105 0.088 +% of var. 3.180 2.591 1.807 1.396 1.170 +Cumulative % of var. 93.036 95.627 97.434 98.830 100.000 +``` + +O cabeçalho `Eigenvalues` do resumo apresenta métricas sobre as dimensões recém computadas, listando a percentagem de variância contida em cada uma delas. Infelizmente, a percentagem de variância encontrada nas duas dimensões superiores é muito baixa. Mesmo se conseguíssemos visualizar 7 ou 8 dimensões dos dados, capturaríamos apenas uma percentagem acumulada de cerca de 70%. O teste de independência do [qui-quadrado](https://perma.cc/8B82-YAX6) nos diz que não podemos rejeitar a hipótese de que nossas duas categorias (CPCs e MPs) são independentes. O valor p (ou *p-value*) é 0,74, bem acima do 0,05 comumente usado como um recorte para rejeitar uma hipótese nula.[^8] Um valor p menor ocorreria, por exemplo, se todos ou a maioria dos deputados fossem membros de um ou dois comités. A propósito, o valor de p quadrado de chi da amostra de Trudeau é menor em 0,54, mas ainda não o suficiente para rejeitar a hipótese de categorias mutuamente independentes. + +Como discutido, este resultado não é muito surpreendente. Esperamos que os deputados sejam distribuídos de forma relativamente uniforme entre os comités. Se optarmos por ponderar as nossas medidas com base na participação dos parlamentares em cada reunião de comité ou em seu desejo de 1-100 de ser membro de cada comité, poderemos ver resultados diferentes (por exemplo, pode ser mais comum que os parlamentares participem regularmente nas reuniões financeiras em comparação com outras reuniões). + +A CA falhou conosco? Bem, na verdade não. Isto significa apenas que não podemos simplesmente lançar dados em um algoritmo e esperar responder a perguntas reais de história. Mas nós não somos apenas programadores, mas historiadores de programação. Vamos colocar nossos bonés da história e ver se podemos refinar as nossas pesquisas! + +## Trudeau ampliou a Agenda para a Igualdade das Mulheres no Parlamento? + +Uma das primeiras medidas políticas que Justin Trudeau tomou foi garantir que o Canadá tinha um gabinete com 50% de mulheres. É discutível que o objetivo deste anúncio era professar uma agenda de igualdade de género. Na sua primeira sessão, o governo de Trudeau também criou um novo Comité Parlamentar sobre igualdade de remuneração para as mulheres. Além disso, o governo de Trudeau apresentou uma moção para que houvesse um inquérito sobre Mulheres Indígenas Desaparecidas e Assassinadas, substituindo o mandato do comité parlamentar de Harper para a Violência Contra as Mulheres Indígenas. + +Se Trudeau tivesse a intenção de levar a igualdade das mulheres a sério, poderíamos esperar que mais membros do comité do Status da Mulher estivessem ligados a pastas maiores, como Justiça, Finanças, Saúde e Relações Exteriores, em comparação com o governo de Harper. Como o regime de Harper não tinha um CPC de salário igual, incluiremos o CPC para "Violência contra Mulheres Indígenas". + +```R +# Inclua apenas os comités desejados: +# HESA: Health, JUST: Justice, FEWO: Status of Women +# INAN: Indigenous and Northern Affairs, FINA: Finance +# FAAE: Foreign Affairs and International Trade +# IWFA: Violence against Indigenous Women + +harper_df2 <- harper_df[which(harper_df$abbr %in% + c("HESA", "JUST", "FEWO", "INAN", "FINA", "FAAE", "IWFA")),] +harper_table2 <- table(harper_df2$abbr, harper_df2$membership) + +# Remova os singles de novo. +harper_table2 <- harper_table2[, colSums(harper_table2) > 1] +CA_Harper2 <- CA(harper_table2) +plot(CA_Harper2) +``` + +Isto produz o seguinte gráfico: + +{% include figure.html filename="tr-pt-analise-correspondenciaR-6.png" alt="Imagem representando um gráfico de correspondências sobre comités parlamentares" caption="Figura 6. Análise de correspondência de Comités Parlamentares selecionados para a 1ª Sessão do Governo de Stephen Harper" %} + +O valor p do qui-quadrado para este resultado se move apenas ligeiramente em direção a zero, para 0,71. Ainda não podemos tirar nenhuma conclusão quantitativa sobre uma relação clara entre CPCs e MPs. Para os nossos dados, este não é um resultado muito importante. Se pesquisássemos os CPCs sobre qual CPC era o mais produtivo ou importante, talvez encontrássemos valores p mais baixos. A inércia no eixo horizontal praticamente dobrou, sugerindo que o FINA (Finance) é um valor mais baixo no gráfico em comparação com os outros portfólios. + +O significado de um CA depende de uma interpretação qualitativa da trama. Por exemplo, observando os elementos do gráfico Harper podemos dizer que as preocupações económicas caem para a direita do eixo y e as preocupações sociais caem para a esquerda. Portanto, uma das "razões" para escolher os parlamentares para participar de comités no governo Harper parece ser a distinção entre preocupações sociais e económicas. + +Entretanto, quando fazemos a mesma análise com o governo de Trudeau... + +```R +trudeau_df2 <- trudeau_df[which(trudeau_df$abbr %in% + c("HESA", "JUST", "FEWO", "INAN", "FINA", "FAAE", "ESPE")),] +trudeau_table2 <- table(trudeau_df2$abbr, trudeau_df2$membership) +trudeau_table2 <- trudeau_table2[, colSums(trudeau_table2) > 1] # remova os singles de novo +CA_trudeau2 <- CA(trudeau_table2) +plot(CA_trudeau2) +``` + +Produzimos um gráfico incompleto e esta mensagem aparece: + +``` +Warning message: +In CA(trudeau_table2) : +The rows FAAE, INAN, JUST sum at 0. They were suppressed from the analysis. +``` + +Isto significa que o gráfico produzido não nos mostra as colunas FAEE, INAN e JUST. Como o valor de cada uma delas é 0, elas foram suprimidas da análise. Olhando para a tabela `trudeau_table2`, vemos que: + +``` + A Vandenbeld D Albas M Gladu R Harder S Sidhu +ESPE 1 1 1 0 1 +FAAE 0 0 0 0 0 +FEWO 1 0 1 1 0 +FINA 0 1 0 0 0 +HESA 0 0 0 1 1 +INAN 0 0 0 0 0 +JUST 0 0 0 0 0 +``` + +Não há nenhuma associação cruzada para FAEE, INAN ou JUST! Bem, isso é um resultado em si mesmo. Podemos concluir, em geral, que as agendas dos dois governos são bastante diferentes, e que houve uma abordagem diferente utilizada para organizar os parlamentares em comités. + +Para um historiador canadense, o resultado faz algum sentido, dado que a Violência contra as Mulheres Indígenas (IWFA) tem muito mais probabilidade de estar ligada aos Assuntos Indígenas e do Norte (INAN), e à Justiça e Direitos Humanos (JUST), do que à Igualdade de Remuneração (ESPE). Afinal, a história da Violência contra as Mulheres Indígenas está ligada a uma série de casos criminais de alto nível no Canadá. Como discutido anteriormente, a análise de CA requer uma quantidade de interpretação para se tornar significativa. + +Talvez possamos observar alguns comités diferentes em seu lugar. Ao retirar “JUST”, “INAN” e “FAAE” (Relações Exteriores) e substituí-los por “CIMM” (Imigração), “ETHI” (Ética e Acesso à Informação) e “HUMA” (Recursos Humanos), podemos obter uma imagem melhor da estrutura dos comités parlamentares neste contexto. + +```R +trudeau_df3 <- trudeau_df[which(trudeau_df$abbr %in% + c("HESA", "CIMM", "FEWO", "ETHI", "FINA", "HUMA", "ESPE")),] +trudeau_table3 <- table(trudeau_df3$abbr, trudeau_df3$membership) +trudeau_table3 <- trudeau_table3[, colSums(trudeau_table3) > 1] # remova os singles de novo +CA_trudeau3 <- CA(trudeau_table3) +plot(CA_trudeau3) +``` + +{% include figure.html filename="tr-pt-analise-correspondenciaR-7.png" alt="Imagem representando um gráfico de correspondências sobre comités parlamentares" caption="Figura 7. Análise de correspondência de Comités Parlamentares selecionados para a 1ª Sessão do Governo de Justin Trudeau" %} + +Em geral, a inércia no eixo horizontal é menor que a do governo de Harper, mas a separação tem "HUMA" (Recursos Humanos) e "ETHI" (Ética) contra os outros portfólios à direita. A delimitação entre questões sociais e económicas não é tão evidente como para Harper, sugerindo uma filosofia diferente para a seleção. Dito isto, também há menos deputados compartilhando as posições. Isto pode ser outro mistério para uma maior exploração. No entanto, o processo CA nos fornece uma visão sólida das relações que ocorrem dentro dos comités com um olhar rápido e com muito poucos comandos. + +## Análise + +Como na maioria das pesquisas interpretativas, não obtemos uma resposta direta à nossa pergunta sobre o poder para as mulheres nos governos parlamentares. No caso Harper, vemos uma divisão no eixo horizontal entre questões sociais como Saúde e Justiça e questões económicas como Finanças e Relações Exteriores, respondendo por 35% da variação. Pela visualização, podemos adivinhar que Finanças (FINA) e Relações Exteriores (FAAE) têm um membro comum e que Relações Exteriores (FAAE) tem um membro comum com Violência contra Mulheres Indígenas (IWFA). Este resultado é, possivelmente, uma preocupação, pois as agendas mais divulgadas de Stephen Harper tendiam a se concentrar em preocupações económicas como o comércio e a contenção fiscal. A separação dos comités implica que a filosofia de governança de Harper separava as preocupações económicas das sociais e que os direitos das mulheres eram principalmente uma preocupação social. A própria pasta Status da Mulher (FEWO) é separada do resto das pastas, encontrando-se ligada às outras pastas somente através de parlamentares comuns com os comités Violência contra Mulheres Indígenas (IWFA) e Assuntos Indígenas e do Norte (INAN). + +O gráfico do governo de Trudeau não mostra conexões cruzadas do Status da Mulher com a Justiça, Relações Exteriores e Povos Indígenas, mas conexões mais fortes com Finanças, Cidadania, Recursos Humanos e Ética. Os Direitos da Mulher estão ligados às Finanças e à Imigração através da carteira de Igualdade de Remuneração. + +É discutível que o regime do governo Harper alinhou os Direitos das Mulheres às pastas sociais como Justiça e Saúde, enquanto Trudeau elevou o perfil do Status da Mulher até certo ponto ao incluir o comité de Igualdade de Remuneração. A conexão entre os comités focados nos Direitos da Mulher e fortes carteiras como Saúde, Finanças e Cidadania e Imigração no governo Trudeau é digna de uma análise mais detalhada. Neste contexto, o Status da Mulher parece ter uma posição mais central (mais próxima da origem) do que o comité Status da Mulher no governo de Harper. Dito isto, o número de pontos de dados neste caso ainda é bastante pequeno para se chegar a uma conclusão definitiva. Talvez outras fontes de evidência possam ser visualizadas de maneira semelhante para confirmar ou negar este ponto. + +A agenda anteriormente mantida entre as mulheres e os povos indígenas foi deslocada no caso Trudeau. Como discutido anteriormente, o [National Inquiry into Missing and Murdered Indigenous Women and Girls](https://perma.cc/U38Y-4CY9) (Inquérito Nacional sobre Mulheres Indígenas Desaparecidas e Assassinadas) (em inglês) deslocou o mandato para o comité Violência contra as Mulheres Indígenas que existia durante o mandato de Harper. A história desta transição é complexa, mas a pressão política foi aplicada ao governo Harper para criar o Inquérito Nacional sobre Mulheres Indígenas Desaparecidas e Assassinadas após o julgamento de Robert Pickton e relatos de investigações policiais insuficientes para mulheres indígenas desaparecidas. Harper recusou-se a conduzir um inquérito citando que o CPC era a melhor abordagem.[^9] Trudeau fez uma promessa eleitoral de incluir o inquérito, deslocando assim o comité. Até certo ponto, Harper parece ter dado à violência contra as mulheres indígenas um papel bastante central no planejamento do Comité Parlamentar. Esta evidência é um contraponto às críticas de que Harper não levou a sério a questão das Mulheres Indígenas Desaparecidas e Assassinadas. + +As diferenças entre as duas relações levantam questões importantes sobre o papel do Status da Mulher no discurso político e suas interconexões entre identidade racial, finanças públicas, saúde e justiça social, a serem exploradas talvez em um trabalho qualitativo mais detalhado. Também levanta questões importantes sobre o foco no género em geral (de acordo com a carteira do Status da Mulher) ou mais especificamente, uma vez que se aplica a um grupo marginalizado (Mulheres Indígenas Desaparecidas e Assassinadas). Um documento de política relacionado aos benefícios de um Inquérito versus discussão do Comité Parlamentar parece razoável após examinar esta evidência. Talvez haja um argumento de que a troca do "IWFA" por "ESPE" é uma espécie de teto de vidro, colocando artificialmente uma cota em questões de mulheres enquanto as carteiras estabelecidas permanecem intocadas. Como uma ferramenta exploratória, a CA nos ajuda a identificar tais temas a partir da observação empírica, em vez de confiar na teoria ou em preconceitos pessoais. + +## Conclusão + +Agora que este tutorial está completo, é possível ter alguma noção do que é a CA e como pode ser usada para responder perguntas exploratórias sobre dados. Usamos o comando `CA` do FactoMineR para criar a análise e traçar os resultados em duas dimensões. Quando as etiquetas se cruzaram, aplicamos o comando `viz_ca_biplot` do pacote factoextra para exibir os dados em um formato mais legível. + +Também aprendemos como interpretar uma CA e como detectar potenciais armadilhas analíticas, incluindo casos em que as relações entre categorias são distribuídas de forma muito uniforme e têm baixo valor explicativo. Neste caso, refinamos a nossa pergunta e os dados de pesquisa para fornecer uma imagem mais significativa do que aconteceu. + +Em geral, o benefício desta análise é fornecer uma rápida visão geral do *dataset* de duas categorias, como um guia para questões históricas mais substantivas. O uso de membros e reuniões ou eventos em todas as áreas da vida (negócios, sem fins lucrativos, reuniões municipais, *hashtags* de twitter, etc.) é uma abordagem comum para tal análise. Os grupos sociais e as suas preferências são outro uso comum para a CA. Em cada caso, a visualização oferece um mapa com o qual se pode observar um retrato da vida social, cultural e política. + +Os próximos passos podem incluir a adição de outras dimensões categóricas à nossa análise, como a incorporação do partido político, idade ou sexo. Quando se faz CA com mais de duas categorias, é chamada de [Análise de Correspondência Múltipla ou MCA](https://www.youtube.com/watch?v=RDexHE5Iqrg) (em inglês). Enquanto a matemática para a MCA é mais complicada, os resultados finais são bastante semelhantes aos da CA. + +Esperamos que, agora, estes métodos sejam aplicados aos seus próprios dados, ajudando a descobrir perguntas e hipóteses que enriquecem a sua pesquisa histórica. Boa sorte! + +## Apêndice: A Matemática por trás da Análise de Correspondência + +Como a matemática da CA será interessante para alguns e não para outros, optamos por discuti-la neste Apêndice. A secção também contém um pouco mais de detalhes sobre outros tópicos, tais como inércia (*inertia*), dimensões (*dimensions*) e decomposição de valores singulares (*singular value decomposition* ou SVD). + +A fim de facilitar a compreensão, começaremos com apenas alguns comités. "FEWO" (Status das Mulheres ou *Status of Women*), "HESA" (Saúde ou *Health*), "INAN" (Assuntos Indígenas e do Norte ou *Indigenous and Northern Affairs*), "IWFA" (Violência contra as Mulheres Indígenas ou *Violence Against Indigenous Women*) e "JUST" (Justiça ou *Justice*). + +``` + C Bennett D Wilks G Rickford J Crowder K Block K Seeback L Davies N Ashton +FEWO 0 0 0 0 0 0 0 1 +HESA 0 1 0 0 1 0 1 0 +INAN 1 0 1 1 0 1 0 0 +IWFA 1 0 1 1 1 0 1 1 +JUST 0 1 0 0 0 1 0 0 + + R Goguen S Ambler S Truppe +FEWO 0 1 1 +HESA 0 0 0 +INAN 0 1 0 +IWFA 1 1 1 +JUST 1 0 0 +``` + +A CA é feita em um *dataset* “normalizado” que é criado pela divisão do valor de cada célula pela raiz quadrada do produto da coluna e totais de linhas, ou célula \\(\frac{1}{\sqrt{column total \times row total}}\\). Por exemplo, a célula de "FEWO" e S Ambler é \\(\frac{1}{\sqrt{3 \times 3}}\\) ou 0.333.[^10] + +A tabela “normalizada” se parece com isto: + +``` + C Bennett D Wilks G Rickford J Crowder K Block K Seeback L Davies N Ashton +FEWO 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.408 +HESA 0.000 0.408 0.000 0.000 0.408 0.000 0.408 0.000 +INAN 0.316 0.000 0.316 0.316 0.000 0.316 0.000 0.000 +IWFA 0.235 0.000 0.235 0.235 0.235 0.000 0.235 0.235 +JUST 0.000 0.408 0.000 0.000 0.000 0.408 0.000 0.000 + + R Goguen S Ambler S Truppe +FEWO 0.000 0.333 0.408 +HESA 0.000 0.000 0.000 +INAN 0.000 0.258 0.000 +IWFA 0.235 0.192 0.235 +JUST 0.408 0.000 0.000 +``` + +O processo de normalização faz algo interessante. Aqueles que são membros de múltiplos comités e/ou que pertencem a comités com muitos membros tendem a ter notas de normalização mais baixas, sugerindo que são mais centrais para a rede. Estes membros serão colocados mais próximos do centro da matriz. Por exemplo, a célula pertencente a S Ambler e "IWFA" tem a pontuação mais baixa de 0,192 porque S Ambler é membro de três comités e o comité "IWFA" tem nove membros no gráfico representado. + +A próxima etapa é encontrar a decomposição de valor singular destes dados normalizados. Isto envolve álgebra linear bastante complexa que não será abordada aqui, mas pode-se aprender mais com este tutorial de *[Single Value Decomposition](https://perma.cc/CD5F-AL7W)* (Decomposição de Valores Singulares) (em inglês) ou com mais detalhes [neste pdf sobre SVD](https://perma.cc/F7MJ-EGET) (em inglês). Vou tentar resumir o que acontece em termos leigos. + +* Duas novas matrizes são criadas que mostram pontuações de “dimensão” para as linhas (comités) e as colunas (MPs) baseadas em vetores próprios. +* O número de dimensões é igual ao tamanho das colunas ou filas menos 1, que é sempre menor. Neste caso, há cinco comités em comparação com as MPs onze, portanto o número de dimensões é 4. +* Uma outra matriz mostra os valores singulares (valores próprios ou *eigenvalues*), que podem ser usados para mostrar a influência de cada dimensão na análise. +* Um dos vários “tratamentos” é aplicado aos dados para facilitar a plotagem. O mais comum é a abordagem de “coordenadas padrão”, que compara cada pontuação normalizada de forma positiva ou negativa com a pontuação média. + +Ao usar coordenadas padrão, a nossa tabela de dados mostra o seguinte: +``` +Columns (MPs): + +Dim 1 Dim 2 Dim 3 Dim 4 +C Bennett -0.4061946 -0.495800254 0.6100171 0.07717508 +D Wilks 1.5874119 0.147804035 -0.4190637 -0.34058221 +G Rickford -0.4061946 -0.495800254 0.6100171 0.07717508 +J Crowder -0.4061946 -0.495800254 0.6100171 0.07717508 +K Block 0.6536800 0.897240970 0.5665289 0.04755678 +K Seeback 0.5275373 -1.245237189 -0.3755754 -0.31096392 +L Davies 0.6536800 0.897240970 0.5665289 0.04755678 +N Ashton -0.8554566 0.631040866 -0.6518568 0.02489229 +R Goguen 0.6039463 -0.464503802 -0.6602408 0.73424971 +S Ambler -0.7311723 -0.004817303 -0.1363437 -0.30608465 +S Truppe -0.8554566 0.631040866 -0.6518568 0.02489229 + +$inertia +[1] 0.06859903 0.24637681 0.06859903 0.06859903 0.13526570 0.17971014 0.13526570 +[8] 0.13526570 0.13526570 0.08438003 0.13526570 + +Rows (Committees): + +Dim 1 Dim 2 Dim 3 Dim 4 +FEWO -1.0603194 0.6399308 -0.8842978 -0.30271466 +HESA 1.2568696 0.9885976 0.4384432 -0.28992174 +INAN -0.3705046 -0.8359969 0.4856563 -0.27320374 +IWFA -0.2531830 0.1866016 0.1766091 0.31676507 +JUST 1.1805065 -0.7950050 -0.8933999 0.09768076 + +$inertia +[1] 0.31400966 0.36956522 0.24927536 0.09017713 0.36956522 +``` + +Cada pontuação para uma “dimensão” pode ser usada como uma coordenada nesse gráfico. Como não podemos visualizar em quatro dimensões, as saídas CA normalmente se concentram nas primeiras duas ou três dimensões para produzir um gráfico (por exemplo, "HESA" será plotado em `[1.245, 0.989]` ou `[1.245, 0.989, 0.438]` em um gráfico 3D). + +{% include figure.html filename="tr-pt-analise-correspondenciaR-8.png" alt="Imagem representando um gráfico de correspondências sobre comités parlamentares" caption="Figura 8. Análise de correspondência de Comités Parlamentares selecionados para a 1ª Sessão do Governo Stephen Harper, 2006" %} + +As pontuações de inércia são uma forma de mostrar a variação nos dados. Saúde e Justiça possuem a menor quantidade de membros com uma alta pontuação de inércia, enquanto o comité mais popular - "IWFA" - tem uma pequena inércia. Assim, a inércia é uma forma de quantificar a distância dos pontos em relação ao centro do gráfico. + +Outra pontuação importante é visível no gráfico de CA - a percentagem do valor explicativo para cada dimensão. Isto significa que o eixo horizontal explica 42,32% da variação no gráfico, enquanto o eixo vertical explica quase 31%. O que estes eixos significam deve ser interpretado com base no gráfico. Por exemplo, podemos dizer que o lado esquerdo representa questões relativas à identidade social e os do lado direito são mais reguladores. Uma análise histórica mais aprofundada das atas destes comités poderia, por sua vez, oferecer uma maior compreensão sobre o significado da participação destes membros na época. + +## Notas +[^1]: A CA tem uma história ramificada de várias disciplinas e, assim, a terminologia pode ser confusa. Para simplificar, as categorias se referem aos tipos de dados que estão sendo comparados (por exemplo, membros e clubes) enquanto cada item dentro dessas categorias (por exemplo, “The Tennis Club” ou “John McEnroe”) será um elemento dentro dessa categoria. A localização quantitativa dos elementos (coordenadas x e y) são *datapoints*. + +[^2]: Brigitte Le Roux and Henry Rouanet, *Multiple Correspondence Analysis* (Los Angeles: SAGE Publications, 2010): 3. + +[^3]: Não pretendemos sugerir que esta análise seja de forma alguma conclusiva sobre os laços comerciais entre os EUA e a Rússia. A questão é que, como a Rússia não faz parte da TPP neste acordo, ela se separa dos EUA. Por outro lado, se a adesão à TPP pudesse ser comprovada como representando laços tensos entre os EUA e a Rússia, apareceria no gráfico de CA. + +[^4]: Sebastien Le, Julie Josse, Francois Husson (2008). FactoMineR: An R Package for Multivariate Analysis. Journal of Statistical Software, 25(1), 1-18. [10.18637/jss.v025.i01](https://doi.org/10.18637/jss.v025.i01). + +[^5]: Alboukadel Kassambara and Fabian Mundt (2017). factoextra: Extract and Visualize the Results of Multivariate Data Analyses. R package version 1.0.4. [https://CRAN.R-project.org/package=factoextra](https://perma.cc/Z2RC-F4J7). + +[^6]: O valor explicativo é a distância dos *datapoints* afastados do centro do gráfico. Cada dimensão é responsável por parte da distância que os *datapoints* divergem do centro. + +[^7]: Em geral, a inércia nas estatísticas refere-se à variação ou “disseminação” de um *dataset*. Esta é análoga ao desvio padrão nos dados de distribuição. + +[^8]: Ver Laura Kane (3 de abril de 2017), "Missing and murdered women's inquiry not reaching out to families, say advocates." *CBC News Indigenous*. [https://www.cbc.ca/news/indigenous/mmiw-inquiry-not-reaching-out-to-families-says-advocates-1.4053694](https://perma.cc/UQ6J-8QVZ). + +[^9]: Em estatística, um valor p (*p-value*), abreviação de valor de probabilidade, é um indicador de quão provável um resultado teria ocorrido em circunstâncias aleatórias. Um baixo valor de p sugere uma probabilidade baixa de que o resultado teria ocorrido ao acaso e, portanto, fornece algumas evidências de que uma hipótese nula (neste caso, que os MPs e CPCs são categorias independentes) é improvável. + +[^10]: Katherine Faust (2005) "Using Correspondence Analysis for Joint Displays of Affiliation Network" in *Models and Methods in Social Network Analysis* eds. Peter J. Carrington, John Scott and Stanley Wasserman. diff --git a/pt/licoes/analise-sentimento-R-syuzhet.md b/pt/licoes/analise-sentimento-R-syuzhet.md index a91094c0d0..4ff751b230 100644 --- a/pt/licoes/analise-sentimento-R-syuzhet.md +++ b/pt/licoes/analise-sentimento-R-syuzhet.md @@ -1,504 +1,504 @@ ---- -title: Análise de sentimentos em R com 'syuzhet' -layout: lesson -slug: analise-sentimento-R-syuzhet -date: 2021-03-23 -translation_date: 2022-03-02 -authors: -- Jennifer Isasi -editors: -- Maria José Afanador-Llach -reviewers: -- Riva Quiroga -translator: -- Diana Rebelo Rodriguez -translation-editor: -- Jimmy Medeiros -translation-reviewer: -- Ana Giulia Aldgeire -- Ian Araujo -original: analisis-de-sentimientos-r -review-ticket: https://github.com/programminghistorian/ph-submissions/issues/467 -difficulty: 2 -activity: analyzing -topics: [distant-reading, r, data-visualization] -abstract: "Esta lição ensina uma maneira de obter e analisar dados sobre emoções e sentimentos em uma narrativa." -avatar_alt: "Gravura com três rostos que expressam emoções distintas" -doi: 10.46430/phpt0022 ---- - -{% include toc.html %} - -# Objetivos - -Esta lição usa a metodologia de análise de sentimentos e emoções através da linguagem de programação R para investigar documentos textuais de modo individual. Embora a lição não seja destinada a usuários avançados de R, é necessário que se tenha algum conhecimento dessa linguagem; assumimos que se tenha o R instalado e saiba como importar pacotes. Também recomendamos o download do RStudio. Se não estiver familiarizado com R, é melhor trabalhar primeiro através das lições [Processamento básico de texto em R](/pt/licoes/processamento-basico-texto-r), [Noções básicas de R com dados tabulares](/pt/licoes/nocoes-basicas-R-dados-tabulares) ou [Data Wrangling and Management in R](/en/lessons/data-wrangling-and-management-in-r) (em inglês). Ao final desta lição, o(a) pesquisador(a) será capaz de: - -- Colocar perguntas de pesquisa com base na análise quantitativa de sentimentos em textos de tipo ensaístico e/ou narrativo. -- Usar a linguagem de programação R, o ambiente RStudio e o pacote `syuzhet` com o dicionário NRC para gerar o indicador de sentimento de um texto em diferentes linguagens. -- Analisar criticamente os resultados do processamento de texto. -- Visualizar os dados gerais e sua evolução ao longo de um texto. - -Esta lição foi construída com a versão R 4.0.2, mas acreditamos que funcionará corretamente em versões futuras do programa. - -> O uso do R é geralmente o mesmo para Windows, Mac e Linux. Entretanto, como vamos trabalhar com textos em português, precisaremos escrever algum código extra para indicar o formato UTF-8 em máquinas Windows. Nesses casos, o código para o sistema operacional correspondente é exibido. - -# Antes de começar - -## Análise de sentimentos - -A [análise dos sentimentos ou a mineração de opinião](https://myrabr.com/blog/analise-de-sentimento/) é utilizada para extrair automaticamente informações sobre a conotação negativa ou positiva da linguagem de um documento. Embora seja uma tarefa que vem sendo utilizada há muito tempo no campo do marketing ou da política, em estudos literários ainda é uma abordagem recente e não há um método único. Além disso, há a possibilidade de extrair a polaridade dos sentimentos e também das emoções. - -É importante especificar o que estamos procurando com os termos “sentimento” e “emoções”, pois eles são frequentemente usados de forma intercambiável, de modo geral, mas são diferentes. Para Antonio R. Damasio, as emoções são reações corporais instigantes de nosso corpo, determinadas por estímulos ambientais e derivadas do desenvolvimento da regulamentação biológica (12). Elas podem ser divididas em primárias e secundárias. Embora não haja um acordo final sobre o número de emoções básicas, geralmente são seis: raiva, alegria, repugnância, medo, tristeza e surpresa, embora Damasio considere esta última como sendo secundária. Além disso, no caso do sistema automático que utilizaremos, as emoções secundárias de antecipação e confiança também aparecem. - -Por outro lado, podemos definir sentimento como a ação e o efeito de sentir uma emoção ou, em outras palavras, é o resultado do fato de que “quando um objeto, uma pessoa, uma situação ou um pensamento provoca em nós a emoção da alegria, começa um processo que pode concluir no sentimento de estar alegre ou feliz” (Pereira Zazo 32) porque é uma emoção positiva. Durante a lição faremos uma distinção entre os dois termos, pois usaremos o resultado do sentimento para ver a sua evolução ao longo do texto e as emoções para ver o uso das palavras em geral. - -## Dicionário de léxicos NRC - -O pacote `syuzhet` funciona com quatro dicionários de sentimentos: Bing, Afinn, Stanford e NRC. Nesta lição, trabalharemos com este último, pois é o único disponível em vários idiomas, incluindo o português. Este vocabulário com valores de sentimentos negativos ou positivos e oito emoções foi desenvolvido por Saif M. Mohammad, um cientista do Conselho Nacional de Pesquisa do Canadá (NRC). O conjunto de dados foi construído manualmente através de pesquisas usando a técnica Maximum Difference Scaling ou MaxDiff, que avalia a preferência por uma série de alternativas (Mohammad e Turney). Assim, o léxico tem 14.182 palavras com as categorias de sentimentos positivos e negativos e as emoções de raiva, antecipação, repugnância, medo, alegria, tristeza, surpresa e confiança. Além disso, está disponível em mais de 100 idiomas (através de tradução automática). - -Seus termos de uso estabelecem que o vocabulário pode ser usado gratuitamente para fins de pesquisa, portanto, todos os dados estão disponíveis para download. - -Se trabalhamos com o inglês, podemos interagir com as diferentes categorias no site do [NRC Word-Emotion Association Lexicon](http://saifmohammad.com/WebPages/NRC-Emotion-Lexicon.htm). Lá também podemos encontrar trabalhos publicados sobre a obtenção dos valores para o vocabulário, sua organização, extensão, etc. - -## Pacote `syuzhet` - -O [pacote `syuzhet`](https://cran.r-project.org/web/packages/syuzhet/vignettes/syuzhet-vignette.html) foi desenvolvido em 2015 por Matthew Jockers; que o mantém funcionando até hoje e continuamente apresenta novas versões (no momento da preparação desta lição, foi usada a versão de dezembro de 2017). Uma série de posts no blog acompanham o desenvolvimento do pacote, e estão disponíveis no blog do professor desde [5 de junho de 2014](http://www.matthewjockers.net/page/2/) (em inglês). - -Naturalmente, o pacote foi desenvolvido com testes em textos escritos ou traduzidos para o inglês e não sem debate sobre sua utilidade, para atribuir valores a textos literários que muitas vezes são, por natureza, bastante subjetivos. - -> Atenção: A lista de palavras do dicionário está sendo preparada em inglês como língua principal e os dados quantitativos atribuídos a cada palavra são o resultado da avaliação humana pelos participantes americanos. Portanto, vários fatores devem ser levados em consideração ao utilizar esta metodologia: -> -> - O léxico em português é uma tradução direta realizada por tradução automática (estes sistemas já são muito confiáveis entre o inglês e o português, mas não em outros idiomas que o NRC afirma ser capaz de analisar como, por exemplo, o basco). -> - A pontuação de cada palavra, ou seja, a valência sentimental e emocional, tem um viés cultural e temporal que deve ser levado em conta, e um termo que pareceu positivo para os participantes da pesquisa pode nos parecer negativo. -> - O uso desta metodologia não é recomendado para textos que são altamente metafóricos e simbólicos. -> - O método não vai captar a negação de um sentimento positivo como, por exemplo, a frase “Eu não estou feliz”. -> -> Seguindo o espírito de adaptabilidade das lições do *Programming Historian* a outras línguas, foi decidido usar `syuzhet` em sua forma original, mas ao final da lição indicamos algumas funções avançadas para usar seu próprio dicionário de sentimentos com o mesmo pacote. - -Como os resultados nos *dataframes* aparecerão em inglês, se achar necessário, dedique um momento para aprender esta tradução: - -| anger | anticipation | disgust | fear | joy | sadness | surprise | trust | negative | positive | -| ------ | ------------ | -------- | ----- | ------- | -------- | -------- | --------- | -------- | -------- | -| raiva | anticipação | desgosto | medo | alegria | tristeza | surpresa | confiança | negativo | positivo | - -## Um pequeno exemplo - -Antes de começar a realizar a análise de nossos textos, é útil saber de forma geral qual é o processo de análise realizado pela função de obter sentimentos de `syuzhet`, com o dicionário NRC e os resultados obtidos sobre os quais trabalharemos. - -O sistema irá processar nosso texto e transformá-lo em um vetor de caracteres (aqui palavras), para analisá-los individualmente (também é possível fazê-lo por sentenças). Sem entrar ainda no código para realizar a análise, dê uma olhada neste breve exemplo (nota de tradução: para a versão em português foi usado o texto _Dom Casmurro_ de Machado de Assis, mantendo o tipo de exercícios e o código da lição original): - -> “Contando aquela crise do meu amor adolescente, sinto uma coisa que não sei se explico bem, e é que as dores daquela quadra, a tal ponto se espiritualizaram com o tempo, que chegam a diluir-se no prazer. Não é claro isto, mas nem tudo é claro na vida ou nos livros. A verdade é que sinto um gosto particular em referir tal aborrecimento, quando é certo que ele me lembra outros que não quisera lembrar por nada.” -> -> *Dom Casmurro* de Machado de Assis. - -Este fragmento é transformado em um vetor de caracteres: - -```R -> print(exemplo_2) -[1] "contando" "aquela" "crise" "do" "meu" -[6] "amor" "adolescente" "sinto" "uma" "coisa" -[11] "que" "não" "sei" "se" "explico" ... -``` - -Com a função de obter sentimentos, obtém-se a valência positiva e negativa de cada palavra, assim como a valência das oito emoções classificadas pelo NRC. O resultado para este fragmento é o seguinte: - -```R -> print(sentimentos_exemplo_df, row.names = exemplo_2) - anger anticipation disgust fear joy sadness surprise trust negative positive -contando 0 0 0 0 0 0 0 0 0 0 -aquela 0 0 0 0 0 0 0 0 0 0 -crise 1 0 0 0 0 1 0 0 3 0 -do 0 0 0 0 0 0 0 0 0 0 -meu 0 0 0 0 0 0 0 0 0 0 -amor 0 1 0 0 1 1 0 1 0 1 -adolescente 0 0 0 0 0 0 0 0 0 0 -sinto 0 0 0 0 0 0 0 0 0 0 -uma 0 0 0 0 0 0 0 0 0 0 -coisa 0 0 0 0 0 0 0 0 0 0 -que 0 0 0 0 0 0 0 0 0 0 -não 0 0 0 0 0 0 0 0 0 0 -sei 0 0 0 0 0 0 0 0 0 0 -se 0 0 0 0 0 0 0 0 0 0 -explico 0 0 0 0 0 0 0 0 0 0 -bem 0 0 0 0 0 0 0 0 0 0 -... -``` - -> Nota de tradução: na lição original, os autores não explicaram o passo a passo para se obter esses resultados em um primeiro momento. Apesar de a lição explicar detalhadamente o processo, julguei ser interessante demonstrar aqui como obtive esses outputs: - -```R -exemplo <- "Contando aquela crise do meu amor adolescente, sinto uma coisa que não sei se explico bem, e é que as dores daquela quadra, a tal ponto se espiritualizaram com o tempo, que chegam a diluir-se no prazer. Não é claro isto, mas nem tudo é claro na vida ou nos livros. A verdade é que sinto um gosto particular em referir tal aborrecimento, quando é certo que ele me lembra outros que não quisera lembrar por nada." - -exemplo_2 <- get_tokens(exemplo) - -print(exemplo_2) - -sentimentos_exemplo_df <- get_nrc_sentiment(exemplo_2, lang="portuguese") - -print(sentimentos_exemplo_df, row.names = exemplo_2) -``` - -Como podemos ver nos resultados deste objeto tipo *data frame* ou tabela, cada palavra ou ficha tem um valor padrão de 0 nas dez colunas. Se houver um valor maior que 0 significa, em primeiro lugar, que este termo existe no dicionário NRC e, em segundo lugar, que tem um valor atribuído para alguma emoção e/ou sentimento. Neste exemplo, podemos ver que a palavra “amor” é entendida de forma positiva, ainda que represente tristeza (*sadness*). Por outro lado, a palavra “crise” possui uma conotação negativa muito forte, pois há menos margem para dúvidas. - -As possibilidades de explorar, analisar e visualizar estes resultados dependem, em grande parte, das suas habilidades de programação mas, acima de tudo, da sua questão de pesquisa. Para ajudar o pesquisador, nesta lição introdutória aprenderemos como analisar os dados utilizando várias formas de visualização. - -## Pergunta de pesquisa - -Para essa lição, vamos utilizar o romance Dom Casmurro, escrito por [Machado de Assis](https://pt.wikipedia.org/wiki/Machado_de_Assis), publicado em 1899, de caráter realista e ambientado no Rio de Janeiro na segunda metade do século XIX. O protagonista e narrador é Bento Santiago (também conhecido como Bentinho ou Dom Casmurro), que apresenta relatos desde a sua juventude até à sua vida adulta, quando escreve. Nesse intervalo de tempo passa por experiências como viver em um seminário e se preparar para ser Padre, mas também desistir dessa vida ao se apaixonar por Capitu. O enredo central da trama é o ciúme envolvido nessa relação. - -É possível observar a queda emocional desta trama ao se extrair automaticamente os valores de sentimento do romance? Ou, em outras palavras, nossa recepção dos ciúmes de Bentinho coincide com os resultados desse cálculo automático? Além disso, quais são as palavras mais usadas na descrição das emoções do texto? - - -# Obter valores de sentimentos e emoções - -## Instalar e carregar pacotes - -A primeira coisa que precisamos fazer para poder obter o sentimento de nosso texto, é instalar e carregar o pacote R correspondente, neste caso, o `syuzhet`. Além disso, para facilitar a visualização dos dados, vamos utilizar os pacotes `RColorBrewer`, `wordcloud`, `tm` e `NLP`. Para fazer isto, digite e execute os dois comandos seguintes em seu console; o primeiro para instalar o pacote e o segundo para carregá-lo (se já os tiver instalado, só precisa carregá-los). Note que a instalação destes pacotes pode levar alguns minutos. - -```R -# Instale os pacotes: -install.packages("syuzhet") -install.packages("RColorBrewer") -install.packages("wordcloud") -install.packages("tm") - -# Carregue os pacotes -library(syuzhet) -library(RColorBrewer) -library(wordcloud) -library(tm) -``` - -## Carregar e preparar o texto - -Faça o download do texto do romance [Dom Casmurro](/assets/analise-sentimento-R-syuzhet/domCasmurro.txt). Como podemos ver, o documento está em formato de texto simples, pois isto é essencial para realizar seu processamento e análise em R. - -Com o texto em mãos, a primeira coisa que vamos fazer é carregá-lo como um objeto de _string_. Certifique-se de mudar o caminho para o texto para corresponder ao seu computador. - -**Em Mac e Linux** - -Em sistemas Mac podemos usar a função `get_text_as_string` integrada no pacote `syuzhet`: - -```R -texto <- get_text_as_string("https://raw.githubusercontent.com/programminghistorian/jekyll/gh-pages/assets/analise-sentimento-R-syuzhet/domCasmurro.txt") -``` - -**Em Windows** - -Os sistemas Windows não lêem diretamente caracteres com acentos ou outras marcações típicas do espanhol, português ou francês, então temos que dizer ao sistema que o nosso texto está no formato UTF-8 usando a função `scan`. - -```R -texto <- scan(file = "https://raw.githubusercontent.com/programminghistorian/jekyll/gh-pages/assets/analise-sentimento-R-syuzhet/domCasmurro.txt", fileEncoding = "UTF-8", what = character(), sep = "\n", allowEscapes = T) -``` - -Como a análise que vamos realizar precisa de uma lista, seja de palavras ou de frases (aqui só prestaremos atenção a palavras individuais), precisamos de um passo intermediário entre o carregamento do texto e a extração dos valores de sentimento. Assim, vamos dividir o texto (*string*) em uma lista de palavras (*tokens*). Isto é muito comum na análise distante de textos. - -Para isso, usamos a função `get_tokens()` do pacote e geramos um novo objeto, neste caso um vetor de *tokens* (palavras). Conforme veremos, com esta função nos livramos da pontuação no texto e temos uma lista de palavras. - -```R -texto_palavras <- get_tokens(texto) -head(texto_palavras) -[1] "dom" "casmurro" "texto" "de" "referência" "obras" -``` -Agora podemos ver quantas palavras ou tokens estão neste texto com a função `length()`: -```R -length(texto_palavras) -[1] 66931 -``` - -Se quiser realizar a análise para orações, utilize a função `get_sentences()` e siga o mesmo processo, com exceção da criação da nuvem de palavras: - -```R -oracoes_vetor <- get_sentences(texto) -length(oracoes_vetor) -[1] 8637 -``` - - - -## Extração de dados com o NRC Sentiment Lexicon - -Agora podemos executar a função `get_nrc_sentiment` para obter os sentimentos no romance *Dom Casmurro*. Como a função executa por padrão o vocabulário inglês, nós a escrevemos com o argumento “lang” (de *language*, ou idioma) para usar o vocabulário português (“portuguese”). Por sua vez, criamos um novo objeto para armazenar os dados extraídos. Este será um objeto do tipo *data frame*. Esta função procura a presença das oito emoções e dos dois sentimentos para cada palavra em nosso vetor, e atribui um número maior que 0 se elas existirem. Dependendo do desempenho de seu computador e de acordo com as características de nosso texto, este processo pode levar de 15 a 30 minutos. - -```R -sentimentos_df <- get_nrc_sentiment(texto_palavras, lang="portuguese") -``` - -Quando o código terminar de ser executado, um aviso aparecerá porque o `syuzhet` usa uma função que é descontinuada dentro de sua função `get_nrc_sentiment`: - -```R -Warning message: -`data_frame()` is deprecated as of tibble 1.1.0. -Please use `tibble()` instead. -This warning is displayed once every 8 hours. -Call `lifecycle::last_warnings()` to see where this warning was generated. -``` - -Quando o processo terminar, se desejarmos, podemos ler os resultados no novo objeto, simplesmente selecionando o objeto e executando-o. Mas para evitar “imprimir” milhares de linhas no console, também podemos usar a função `head()` para ver os primeiros seis *tokens*. No caso do texto que estamos usando, quando executarmos essa função, devemos ver o seguinte, que não é nada interessante: - -```R -> head(sentimientos_df) - anger anticipation disgust fear joy sadness surprise trust negative positive -1 0 0 0 0 0 0 0 1 0 1 -2 0 0 0 0 0 0 0 0 0 0 -3 0 0 0 0 0 0 0 0 0 0 -4 0 0 0 0 0 0 0 0 0 0 -5 0 0 0 0 0 0 0 0 0 0 -6 0 0 0 0 0 0 0 0 0 0 -``` - -## Resumo do texto - -O que é interessante é ver um resumo de cada um dos valores que obtivemos utilizando a função geral `summary()`. Isto pode ser muito útil ao comparar vários textos, pois permite ver diferentes medidas, tais como a média dos resultados para cada uma das emoções e os dois sentimentos. Por exemplo, podemos ver que o romance *Dom Casmurro* é, em [média](https://pt.wikipedia.org/wiki/M%C3%A9dia) (*mean*), um pouco mais positivo (0,03892) do que negativo (0,03559). Mas se olharmos para as emoções, parece que a tristeza (0,02116) aparece em mais momentos do que a alegria (0,01593). Vários dos valores fornecidos pela função de resumo do texto aparecem com um valor igual a 0, incluindo a [mediana](https://pt.wikipedia.org/wiki/Mediana_(estat%C3%ADstica)) (*median*). Isto indica que poucas palavras do romance aparecem no dicionário que estamos usando (NRC) ou, inversamente, que poucas têm uma atribuição de sentimento ou emoção no dicionário. - -```R -> summary(sentimentos_df) - anger anticipation disgust fear joy - Min. :0.00000 Min. :0.00000 Min. :0.000000 Min. :0.00000 Min. :0.00000 - 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.000000 1st Qu.:0.00000 1st Qu.:0.00000 - Median :0.00000 Median :0.00000 Median :0.000000 Median :0.00000 Median :0.00000 - Mean :0.01116 Mean :0.01337 Mean :0.008815 Mean :0.01288 Mean :0.01593 - 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.000000 3rd Qu.:0.00000 3rd Qu.:0.00000 - Max. :5.00000 Max. :2.00000 Max. :3.000000 Max. :4.00000 Max. :7.00000 - sadness surprise trust negative positive - Min. :0.00000 Min. :0.000000 Min. :0.00000 Min. :0.00000 Min. :0.00000 - 1st Qu.:0.00000 1st Qu.:0.000000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 - Median :0.00000 Median :0.000000 Median :0.00000 Median :0.00000 Median :0.00000 - Mean :0.02116 Mean :0.008965 Mean :0.02299 Mean :0.03559 Mean :0.03892 - 3rd Qu.:0.00000 3rd Qu.:0.000000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 - Max. :4.00000 Max. :2.000000 Max. :3.00000 Max. :5.00000 Max. :7.00000 -``` - -> Parabéns! Já temos os resultados da análise de sentimentos! E, agora, o que podemos fazer com esses números? - - - -# Análise das emoções em um texto - -## Gráfico de barras - -Para ver quais as emoções que estão mais presentes no texto, a maneira mais simples é criar um *barplot*. Para isso, usamos a função `barplot()` com o resumo das colunas 1 a 8, ou seja, as colunas de raiva (*anger*), antecipação (*antecipation*), desgosto (*disgust*), medo (*fear*), alegria (*joy*), tristeza (*sadness*), surpresa (*surprise*) e confiança (*trust*). Os resultados obtidos vêm do processamento da função `prop.table()` dos resultados das oito colunas com cada uma das palavras da tabela. - -> Para cada barra, todos os valores da coluna de emoções correspondentes são somados. Então, o resultado de todas as emoções que adicionamos na saída do gráfico é somado. No final, a soma de cada emoção é dividida pelo total de todas as colunas ou emoções. Isto não acrescenta as colunas negativas e positivas. [^1] - -```R -barplot( -colSums(prop.table(sentimentos_df[, 1:8])), -space = 0.2, -horiz = FALSE, -las = 1, -cex.names = 0.7, -col = brewer.pal(n = 8, name = "Set3"), -main = "'Dom Casmurro' de Machado de Assis", -sub = "Análise realizada por Diana Rebelo Rodriguez", -xlab="emoções", ylab = NULL) -``` -O resto dos parâmetros que vemos no código são “extras”, pois são uma forma de configurar o formato visual do gráfico. Assim, indicamos um espaço (*space*) de 0,2 entre as barras, que estará na posição vertical ao indicar falsamente (*FALSE*) sua horizontalidade (*horiz*) e, ao contrário, a horizontalidade para os valores no eixo Y com `las = 1`. Além disso, reduzimos o tamanho do nome de cada barra (*cex.names*) para 0,7 para evitar que elas desapareçam, por exemplo, se fizermos um pequeno gráfico. Graças ao pacote que instalamos no início, `RColorBrewer`, podemos dar cor às colunas automaticamente, neste caso, com a paleta de cores (*brewer.pal*) do conjunto número 3 do pacote, com oito cores, uma para cada coluna. Finalmente, vamos colocar um título e subtítulo em nosso gráfico com os parâmetros `main` e `sub`, assim como a palavra “emoções” no eixo X e nada no eixo Y. - -Gráfico de barras com os valores das seis emoções capturadas em Dom Casmurro por Machado de Assis - -Se esses parâmetros não forem do seu interesse, basta executar o seguinte código para obter o gráfico padrão: - -```R -barplot(colSums(prop.table(sentimentos_df[, 1:8]))) -``` - -> Certifique-se de que há espaço suficiente na seção de exibição de gráficos do R para poder ver os nomes de cada coluna. - -Estas informações já indicam que as emoções de tristeza e confiança prevalecem mais do que as de desgosto ou surpresa. Mas quais são as palavras usadas por Machado na expressão dessa tristeza? Com que frequência cada uma aparece no romance como um todo? - -## Contando o número de palavras com cada emoção - -A fim de realizar uma análise do texto, é muito interessante saber quais são as palavras usadas com mais frequência no texto em relação à sua identificação com cada emoção. Para isso, primeiro temos que criar um objeto de caracteres com todas as palavras que tenham um valor maior que 0 na coluna “tristeza” (*sadness*). Para selecionar somente essa coluna, usamos o sinal de dólar após o nome do *data frame*: -```R -palavras_tristeza <- texto_palavras[sentimentos_df$sadness > 0] -``` - -O conteúdo de `palavras_tristeza` nos indica que esta lista não diz muito, pois retorna apenas a listagem de palavras sem maiores informações. Para obter a contagem das vezes que cada palavra relacionada à tristeza aparece no romance, geramos uma tabela do primeiro conjunto de caracteres com as funções `unlist` e `table`, que depois ordenamos em ordem decrescente (se quisermos uma ordem ascendente mudamos TRUE para FALSE); criamos um novo objeto de tipo tabela e imprimimos as primeiras 12 palavras da lista com sua frequência: - -```R -palavras_tristeza_ordem <- sort(table(unlist(palavras_tristeza)), decreasing = TRUE) -head(palavras_tristeza_ordem, n = 12) -head(palavras_tristeza_ordem, n = 12) - - nada mal tarde entre - 135 80 53 50 - caso morte sair medo - 34 34 32 23 - amor pecado pena defunto - 20 17 17 14 -``` - -Se quisermos saber quantas palavras únicas foram relacionadas à tristeza, basta usar a função `length` no objeto que agora agrupa as palavras em ordem: - -```R -length(palabras_tristeza_orden) -[1] 163 -``` - -Podemos repetir a mesma operação com o resto das emoções ou com aquela que nos interessa, assim como com os sentimentos positivos e negativos. Tente obter os resultados para a emoção “alegria” e compare os resultados [^2]. - -Dependendo do tipo de análise que se deseje fazer, tal resultado é eficiente. Agora, para o propósito introdutório da lição, vamos gerar uma nuvem de palavras que ajuda a visualizar facilmente os termos associados a cada emoção (embora iremos visualizar aqui apenas quatro para facilitar a leitura). - -## Nuvem de emoções - -A fim de gerar uma nuvem com as palavras que correspondem a cada emoção em *Dom Casmurro*, criaremos primeiro um vetor no qual armazenaremos todas as palavras que, nas colunas que indicamos após o símbolo `$`, têm um valor maior que 0. É gerado um novo objeto do tipo vetor, que contém um elemento para a lista de cada emoção. - -Neste caso, devemos indicar novamente à função que temos caracteres acentuados se for uma máquina Windows. - -**Em Mac e Linux** - -```R -nuvem_emocoes_vetor <- c( -paste(texto_palavras[sentimentos_df$sadness> 0], collapse = " "), -paste(texto_palavras[sentimentos_df$joy > 0], collapse = " "), -paste(texto_palavras[sentimentos_df$anger > 0], collapse = " "), -paste(texto_palavras[sentimentos_df$fear > 0], collapse = " ")) -``` -**Em Windows** - -Uma vez gerado o vetor, deve convertê-lo em caracteres UTF-8 utilizando a função `iconv`. - -```R -nuvem_emocoes_vetor <- c( -paste(texto_palavras[sentimentos_df$sadness> 0], collapse = " "), -paste(texto_palavras[sentimentos_df$joy > 0], collapse = " "), -paste(texto_palavras[sentimentos_df$anger > 0], collapse = " "), -paste(texto_palavras[sentimentos_df$fear > 0], collapse = " ")) - -nuvem_emocoes_vetor <- iconv(nuvem_emocoes_vetor, "latin1", "UTF-8") -``` -Agora que temos o vetor, criamos um _corpus_ de palavras com quatro “documentos” para a nuvem: - -```R -nuvem_corpus <- Corpus(VectorSource(nuvem_emocoes_vetor)) -``` - -Em seguida, transformamos este corpus em uma matriz termo-documento com a função `TermDocumentMatrix()`. Com isto, agora usamos a função `as.matrix()` para converter o TDM em uma matriz que, como podemos ver, lista os termos no texto com um valor maior que zero para cada uma das quatro emoções que extraímos aqui. Para ver o início desta informação, use novamente a função `head`: - -```R -nuvem_tdm <- TermDocumentMatrix(nuvem_corpus) -nuvem_tdm <- as.matrix(nuvem_tdm) -head(nuvem_tdm) - Docs -Terms 1 2 3 4 - abismo 1 0 0 1 - acidente 1 0 1 1 - afligir 3 0 0 3 - agonia 1 0 1 1 - amargamente 1 0 1 0 - amor 20 20 0 0 -``` - -Agora, atribua um nome a cada um dos grupos de palavras ou documentos (*Docs*) em nossa matriz. Aqui vamos usar o termo em português para as colunas que selecionamos para exibir na nuvem. Mais uma vez, podemos ver a mudança feita ao executar a função `head`. - -```R -colnames(nuvem_tdm) <- c('tristeza', 'felicidade', 'raiva', 'confiança') -head(nuvem_tdm) - Docs -Terms tristeza felicidade raiva confiança - abismo 1 0 0 1 - acidente 1 0 1 1 - afligir 3 0 0 3 - agonia 1 0 1 1 - amargamente 1 0 1 0 - amor 20 20 0 0 -``` - - -Finalmente, podemos visualizar a nuvem de palavras que estamos acostumados a ver na mídia ou em estudos académicos. O tamanho e a localização da palavra correspondem à sua maior ou menor ocorrência com valor emocional atribuído no texto. Primeiro, executamos a função `set.seed()` para que quando reproduzirmos o resultado visual seja o mesmo que o nosso (se não o fizer, será o mesmo, mas as palavras aparecerão em posições diferentes). E, para gerar a nuvem, vamos usar a função `comparison.cloud` do pacote `wordcloud`. Indicamos o objeto a representar, aqui ‘nuvem_tdm’, indicamos uma ordem não aleatória das palavras, atribuímos uma cor para cada grupo de palavras e damos tamanhos ao título e à escala geral, e atribuímos um número máximo de termos que serão exibidos. -```R -set.seed(757) # pode ser qualquer número -comparison.cloud(nuvem_tdm, random.order = FALSE, - colors = c("green", "red", "orange", "blue"), - title.size = 1, max.words = 50, scale = c(2.5, 1), rot.per = 0.4) -``` - -O resultado deve ser semelhante à imagem abaixo, mas a localização das palavras pode ser diferente uma vez que a figura é gerada segundo o tamanho da tela: - -Nuvem das palavras mais frequentes correspondentes às emoções de tristeza, felicidade, raiva e confiança no romance Dom Casmurro de Machado de Assis. - -O que sugere o resultado desta nuvem? Ficamos impressionados com o aparecimento de palavras como “entre” no conjunto da tristeza e “cavalo” no conjunto da raiva. Este “disparate” está relacionado com o aviso já anunciado no início da lição. O vocabulário para análise de sentimentos que estamos usando aqui é traduzido do inglês, um tradutor automático que não é “perfeito”. - -# Visualizando a evolução dos sentimentos em um texto - -Para complementar a leitura isolada das emoções, estudando a flutuação dos sentimentos positivos e negativos ao longo de um texto, há uma maneira de normalizar e visualizar estas informações. Como a análise da função de extração de sentimento atribui um valor positivo tanto ao sentimento positivo quanto ao negativo, precisamos gerar dados entre um intervalo de -1 para o momento mais negativo e 1 para o mais positivo, e onde 0 é neutro. Para isso, calculamos a valência do texto multiplicando os valores na coluna de valores negativos de nosso *data frame* com os resultados por -1 e adicionamos o valor na coluna de valores positivos. - -```R -sentimentos_valencia <- (sentimentos_df$negative * -1) + sentimentos_df$positive -``` - -Finalmente, podemos gerar um gráfico com a função `simple_plot()` integrada no pacote `syuzhet`, que nos dará duas imagens diferentes; a primeira, tem todas as medidas que o algoritmo calcula e, a segunda, é uma normalização das mesmas. O eixo horizontal apresenta o texto em 100 fragmentos normalizados e o eixo vertical nos informa sobre a valência do sentimento no texto. Dependendo das características de seu computador, este gráfico pode levar até 20-30 minutos para ser gerado. - -```R -simple_plot(sentimentos_valencia) -``` - -> Assegure-se de possuir espaço suficiente no espaço de visualização de gráficos do R para que ele seja gerado. Caso contrário, aparecerá o erro: *Error in plot.new() : figure margins too large* - -Evolução das emoções ao longo do texto - -Assim, neste caso, podemos interpretar que o romance *Dom Casmurro* varia bastante entre momentos positivos e negativos. Começa de forma mais negativa, fica mais positivo, sendo seguido por um novo momento negativo e um segundo positivo (porém menos do que o primeiro) para um desfecho negativo. Qualquer pessoa que tenha lido o romance pode confirmar esta variação de sentimentos vivida pelo protagonista. - -## Salvar seus dados - -Se quiser salvar seus dados para retornar a eles mais tarde, é possível fazê-lo em um ficheiro de valores separados por vírgula (CSV) com a função `write.csv()`. Aqui dizemos para salvar o *data frame*, que contém o resultado das oito emoções e os dois sentimentos de texto em um ficheiro com uma extensão `.csv`. Além disso, podemos acrescentar a palavra à qual cada linha de resultados corresponde, em uma coluna à esquerda usando a palavra vetor feita no início da análise. - -```R -write.csv(sentimentos_df, file = "analise_sent_domCasmurro.csv", row.names = texto_palavras) -``` - -Agora, pode começar a analisar seus próprios textos e compará-los uns com os outros! - -# Outras funcionalidades e suas limitações - -Talvez esteja trabalhando em um projeto onde já tem um dicionário de sentimentos criado, ou talvez precise personalizar o vocabulário e sua valência sentimental por razões culturais ou temporais, ou talvez esteja procurando melhorar os resultados traduzidos automaticamente do NRC usado aqui. Em qualquer um destes casos, a partir do final de 2020, também é possível usar o seu próprio conjunto de dados no *script* graças à função `custom` e realizar algumas das operações que foram aprendidas nesta lição. - -Para carregar seu próprio “dicionário de sentimentos”, é preciso primeiro criar (ou modificar) uma tabela contendo, pelo menos, uma coluna para palavras e uma coluna para sua valência, por exemplo: - -|word|value| -|---|---| -|amor|1| -|cólera|-1| -|tapete|0| -|catástrofe|-2| - - -Em seguida, carregue os seus dados salvos como um CSV com a função `read.csv`, que criará um novo conjunto disponível como `data.frame`, no qual é possível verificar seu texto: -```R -vocabulario_personalizado <- read.csv("ficheiro.csv") -method <- "custom" -sentimentos_oracoes <- get_sentiment(oracoes_vetor, method = method, lexicon = vocabulario_personalizado) -``` -Se quiser visualizar o progresso do sentimento ao longo de seu texto, podemos usar a função `plot` com outros parâmetros que já vimos: - -```R -plot(sentimentos_oracoes, - type = "l", - main = "'Dom Casmurro' de Machado de Assis", - sub = "Análise realizada por Diana Rebelo Rodriguez", - xlab="emocoes", ylab = " " - ) -``` -Entretanto, tenha em mente que esta forma de análise será limitada e não será possível realizar as mesmas operações como explicado acima. Por exemplo, seguindo o modelo do exemplo, não conseguimos as informações sobre emoções, portanto não somos capazes de fazer uma nuvem de palavras. - -# Referências -Assis, Machado de. _Dom Casmurro_. São Paulo: Editora Ática, 1996. - -Jockers, Matthew L. _Syuzhet: Extract Sentiment and Plot Arcs from Text_, 2015. [https://github.com/mjockers/syuzhet](https://github.com/mjockers/syuzhet) - -Jockers, Matthew L. "Introduction to the Syuzhet Package", CRAN R Project, 2017. https://mran.microsoft.com/snapshot/2017-12-31/web/packages/syuzhet/vignettes/syuzhet-vignette.html - -Damasio, Antonio R. *El error de Descartes: La razón de las emociones*. Barcelona: Andres Bello, 1999. - -Mohammad, Saif, and Peter D. Turney. "Crowdsourcing a Word–Emotion Association Lexicon". *Computational intelligence* 29 (2013): 436-465, doi: 10.1111/j.1467-8640.2012.00460.x - -Pereira Zazo, Óscar. *El analisis de la comunicación en español*. Iowa: Kendal Hunt, 2015. - -Rodríguez Aldape, Fernando Manuel. *Cuantificación del Interés de un usuario en un tema mediante minería de texto y análisis de sentimiento.* Tese de Mestrado, Universidad Autónoma de Nuevo León, 2013. - -# Notas - -[^1]:Agradecemos Mounika Puligurthi, estagiária da Universidade do Texas (UT), pelo seu auxílio na compreensão deste cálculo (durante a primavera de 2019). - -[^2]:Perceba que a palavra “amor”, por exemplo, aparece em ambas as emoções com um valor de 20 pontos. O que será que isso significa? +--- +title: Análise de sentimentos em R com 'syuzhet' +layout: lesson +slug: analise-sentimento-R-syuzhet +date: 2021-03-23 +translation_date: 2022-03-02 +authors: +- Jennifer Isasi +editors: +- Maria José Afanador-Llach +reviewers: +- Riva Quiroga +translator: +- Diana Rebelo Rodriguez +translation-editor: +- Jimmy Medeiros +translation-reviewer: +- Ana Giulia Aldgeire +- Ian Araujo +original: analisis-de-sentimientos-r +review-ticket: https://github.com/programminghistorian/ph-submissions/issues/467 +difficulty: 2 +activity: analyzing +topics: [distant-reading, r, data-visualization] +abstract: "Esta lição ensina uma maneira de obter e analisar dados sobre emoções e sentimentos em uma narrativa." +avatar_alt: "Gravura com três rostos que expressam emoções distintas" +doi: 10.46430/phpt0022 +--- + +{% include toc.html %} + +# Objetivos + +Esta lição usa a metodologia de análise de sentimentos e emoções através da linguagem de programação R para investigar documentos textuais de modo individual. Embora a lição não seja destinada a usuários avançados de R, é necessário que se tenha algum conhecimento dessa linguagem; assumimos que se tenha o R instalado e saiba como importar pacotes. Também recomendamos o download do RStudio. Se não estiver familiarizado com R, é melhor trabalhar primeiro através das lições [Processamento básico de texto em R](/pt/licoes/processamento-basico-texto-r), [Noções básicas de R com dados tabulares](/pt/licoes/nocoes-basicas-R-dados-tabulares) ou [Data Wrangling and Management in R](/en/lessons/data-wrangling-and-management-in-r) (em inglês). Ao final desta lição, o(a) pesquisador(a) será capaz de: + +- Colocar perguntas de pesquisa com base na análise quantitativa de sentimentos em textos de tipo ensaístico e/ou narrativo. +- Usar a linguagem de programação R, o ambiente RStudio e o pacote `syuzhet` com o dicionário NRC para gerar o indicador de sentimento de um texto em diferentes linguagens. +- Analisar criticamente os resultados do processamento de texto. +- Visualizar os dados gerais e sua evolução ao longo de um texto. + +Esta lição foi construída com a versão R 4.0.2, mas acreditamos que funcionará corretamente em versões futuras do programa. + +> O uso do R é geralmente o mesmo para Windows, Mac e Linux. Entretanto, como vamos trabalhar com textos em português, precisaremos escrever algum código extra para indicar o formato UTF-8 em máquinas Windows. Nesses casos, o código para o sistema operacional correspondente é exibido. + +# Antes de começar + +## Análise de sentimentos + +A [análise dos sentimentos ou a mineração de opinião](https://myrabr.com/blog/analise-de-sentimento/) é utilizada para extrair automaticamente informações sobre a conotação negativa ou positiva da linguagem de um documento. Embora seja uma tarefa que vem sendo utilizada há muito tempo no campo do marketing ou da política, em estudos literários ainda é uma abordagem recente e não há um método único. Além disso, há a possibilidade de extrair a polaridade dos sentimentos e também das emoções. + +É importante especificar o que estamos procurando com os termos “sentimento” e “emoções”, pois eles são frequentemente usados de forma intercambiável, de modo geral, mas são diferentes. Para Antonio R. Damasio, as emoções são reações corporais instigantes de nosso corpo, determinadas por estímulos ambientais e derivadas do desenvolvimento da regulamentação biológica (12). Elas podem ser divididas em primárias e secundárias. Embora não haja um acordo final sobre o número de emoções básicas, geralmente são seis: raiva, alegria, repugnância, medo, tristeza e surpresa, embora Damasio considere esta última como sendo secundária. Além disso, no caso do sistema automático que utilizaremos, as emoções secundárias de antecipação e confiança também aparecem. + +Por outro lado, podemos definir sentimento como a ação e o efeito de sentir uma emoção ou, em outras palavras, é o resultado do fato de que “quando um objeto, uma pessoa, uma situação ou um pensamento provoca em nós a emoção da alegria, começa um processo que pode concluir no sentimento de estar alegre ou feliz” (Pereira Zazo 32) porque é uma emoção positiva. Durante a lição faremos uma distinção entre os dois termos, pois usaremos o resultado do sentimento para ver a sua evolução ao longo do texto e as emoções para ver o uso das palavras em geral. + +## Dicionário de léxicos NRC + +O pacote `syuzhet` funciona com quatro dicionários de sentimentos: Bing, Afinn, Stanford e NRC. Nesta lição, trabalharemos com este último, pois é o único disponível em vários idiomas, incluindo o português. Este vocabulário com valores de sentimentos negativos ou positivos e oito emoções foi desenvolvido por Saif M. Mohammad, um cientista do Conselho Nacional de Pesquisa do Canadá (NRC). O conjunto de dados foi construído manualmente através de pesquisas usando a técnica Maximum Difference Scaling ou MaxDiff, que avalia a preferência por uma série de alternativas (Mohammad e Turney). Assim, o léxico tem 14.182 palavras com as categorias de sentimentos positivos e negativos e as emoções de raiva, antecipação, repugnância, medo, alegria, tristeza, surpresa e confiança. Além disso, está disponível em mais de 100 idiomas (através de tradução automática). + +Seus termos de uso estabelecem que o vocabulário pode ser usado gratuitamente para fins de pesquisa, portanto, todos os dados estão disponíveis para download. + +Se trabalhamos com o inglês, podemos interagir com as diferentes categorias no site do [NRC Word-Emotion Association Lexicon](https://saifmohammad.com/WebPages/NRC-Emotion-Lexicon.htm). Lá também podemos encontrar trabalhos publicados sobre a obtenção dos valores para o vocabulário, sua organização, extensão, etc. + +## Pacote `syuzhet` + +O [pacote `syuzhet`](https://cran.r-project.org/web/packages/syuzhet/vignettes/syuzhet-vignette.html) foi desenvolvido em 2015 por Matthew Jockers; que o mantém funcionando até hoje e continuamente apresenta novas versões (no momento da preparação desta lição, foi usada a versão de dezembro de 2017). Uma série de posts no blog acompanham o desenvolvimento do pacote, e estão disponíveis no blog do professor desde [5 de junho de 2014](https://www.matthewjockers.net/page/2/) (em inglês). + +Naturalmente, o pacote foi desenvolvido com testes em textos escritos ou traduzidos para o inglês e não sem debate sobre sua utilidade, para atribuir valores a textos literários que muitas vezes são, por natureza, bastante subjetivos. + +> Atenção: A lista de palavras do dicionário está sendo preparada em inglês como língua principal e os dados quantitativos atribuídos a cada palavra são o resultado da avaliação humana pelos participantes americanos. Portanto, vários fatores devem ser levados em consideração ao utilizar esta metodologia: +> +> - O léxico em português é uma tradução direta realizada por tradução automática (estes sistemas já são muito confiáveis entre o inglês e o português, mas não em outros idiomas que o NRC afirma ser capaz de analisar como, por exemplo, o basco). +> - A pontuação de cada palavra, ou seja, a valência sentimental e emocional, tem um viés cultural e temporal que deve ser levado em conta, e um termo que pareceu positivo para os participantes da pesquisa pode nos parecer negativo. +> - O uso desta metodologia não é recomendado para textos que são altamente metafóricos e simbólicos. +> - O método não vai captar a negação de um sentimento positivo como, por exemplo, a frase “Eu não estou feliz”. +> +> Seguindo o espírito de adaptabilidade das lições do *Programming Historian* a outras línguas, foi decidido usar `syuzhet` em sua forma original, mas ao final da lição indicamos algumas funções avançadas para usar seu próprio dicionário de sentimentos com o mesmo pacote. + +Como os resultados nos *dataframes* aparecerão em inglês, se achar necessário, dedique um momento para aprender esta tradução: + +| anger | anticipation | disgust | fear | joy | sadness | surprise | trust | negative | positive | +| ------ | ------------ | -------- | ----- | ------- | -------- | -------- | --------- | -------- | -------- | +| raiva | anticipação | desgosto | medo | alegria | tristeza | surpresa | confiança | negativo | positivo | + +## Um pequeno exemplo + +Antes de começar a realizar a análise de nossos textos, é útil saber de forma geral qual é o processo de análise realizado pela função de obter sentimentos de `syuzhet`, com o dicionário NRC e os resultados obtidos sobre os quais trabalharemos. + +O sistema irá processar nosso texto e transformá-lo em um vetor de caracteres (aqui palavras), para analisá-los individualmente (também é possível fazê-lo por sentenças). Sem entrar ainda no código para realizar a análise, dê uma olhada neste breve exemplo (nota de tradução: para a versão em português foi usado o texto _Dom Casmurro_ de Machado de Assis, mantendo o tipo de exercícios e o código da lição original): + +> “Contando aquela crise do meu amor adolescente, sinto uma coisa que não sei se explico bem, e é que as dores daquela quadra, a tal ponto se espiritualizaram com o tempo, que chegam a diluir-se no prazer. Não é claro isto, mas nem tudo é claro na vida ou nos livros. A verdade é que sinto um gosto particular em referir tal aborrecimento, quando é certo que ele me lembra outros que não quisera lembrar por nada.” +> +> *Dom Casmurro* de Machado de Assis. + +Este fragmento é transformado em um vetor de caracteres: + +```R +> print(exemplo_2) +[1] "contando" "aquela" "crise" "do" "meu" +[6] "amor" "adolescente" "sinto" "uma" "coisa" +[11] "que" "não" "sei" "se" "explico" ... +``` + +Com a função de obter sentimentos, obtém-se a valência positiva e negativa de cada palavra, assim como a valência das oito emoções classificadas pelo NRC. O resultado para este fragmento é o seguinte: + +```R +> print(sentimentos_exemplo_df, row.names = exemplo_2) + anger anticipation disgust fear joy sadness surprise trust negative positive +contando 0 0 0 0 0 0 0 0 0 0 +aquela 0 0 0 0 0 0 0 0 0 0 +crise 1 0 0 0 0 1 0 0 3 0 +do 0 0 0 0 0 0 0 0 0 0 +meu 0 0 0 0 0 0 0 0 0 0 +amor 0 1 0 0 1 1 0 1 0 1 +adolescente 0 0 0 0 0 0 0 0 0 0 +sinto 0 0 0 0 0 0 0 0 0 0 +uma 0 0 0 0 0 0 0 0 0 0 +coisa 0 0 0 0 0 0 0 0 0 0 +que 0 0 0 0 0 0 0 0 0 0 +não 0 0 0 0 0 0 0 0 0 0 +sei 0 0 0 0 0 0 0 0 0 0 +se 0 0 0 0 0 0 0 0 0 0 +explico 0 0 0 0 0 0 0 0 0 0 +bem 0 0 0 0 0 0 0 0 0 0 +... +``` + +> Nota de tradução: na lição original, os autores não explicaram o passo a passo para se obter esses resultados em um primeiro momento. Apesar de a lição explicar detalhadamente o processo, julguei ser interessante demonstrar aqui como obtive esses outputs: + +```R +exemplo <- "Contando aquela crise do meu amor adolescente, sinto uma coisa que não sei se explico bem, e é que as dores daquela quadra, a tal ponto se espiritualizaram com o tempo, que chegam a diluir-se no prazer. Não é claro isto, mas nem tudo é claro na vida ou nos livros. A verdade é que sinto um gosto particular em referir tal aborrecimento, quando é certo que ele me lembra outros que não quisera lembrar por nada." + +exemplo_2 <- get_tokens(exemplo) + +print(exemplo_2) + +sentimentos_exemplo_df <- get_nrc_sentiment(exemplo_2, lang="portuguese") + +print(sentimentos_exemplo_df, row.names = exemplo_2) +``` + +Como podemos ver nos resultados deste objeto tipo *data frame* ou tabela, cada palavra ou ficha tem um valor padrão de 0 nas dez colunas. Se houver um valor maior que 0 significa, em primeiro lugar, que este termo existe no dicionário NRC e, em segundo lugar, que tem um valor atribuído para alguma emoção e/ou sentimento. Neste exemplo, podemos ver que a palavra “amor” é entendida de forma positiva, ainda que represente tristeza (*sadness*). Por outro lado, a palavra “crise” possui uma conotação negativa muito forte, pois há menos margem para dúvidas. + +As possibilidades de explorar, analisar e visualizar estes resultados dependem, em grande parte, das suas habilidades de programação mas, acima de tudo, da sua questão de pesquisa. Para ajudar o pesquisador, nesta lição introdutória aprenderemos como analisar os dados utilizando várias formas de visualização. + +## Pergunta de pesquisa + +Para essa lição, vamos utilizar o romance Dom Casmurro, escrito por [Machado de Assis](https://pt.wikipedia.org/wiki/Machado_de_Assis), publicado em 1899, de caráter realista e ambientado no Rio de Janeiro na segunda metade do século XIX. O protagonista e narrador é Bento Santiago (também conhecido como Bentinho ou Dom Casmurro), que apresenta relatos desde a sua juventude até à sua vida adulta, quando escreve. Nesse intervalo de tempo passa por experiências como viver em um seminário e se preparar para ser Padre, mas também desistir dessa vida ao se apaixonar por Capitu. O enredo central da trama é o ciúme envolvido nessa relação. + +É possível observar a queda emocional desta trama ao se extrair automaticamente os valores de sentimento do romance? Ou, em outras palavras, nossa recepção dos ciúmes de Bentinho coincide com os resultados desse cálculo automático? Além disso, quais são as palavras mais usadas na descrição das emoções do texto? + + +# Obter valores de sentimentos e emoções + +## Instalar e carregar pacotes + +A primeira coisa que precisamos fazer para poder obter o sentimento de nosso texto, é instalar e carregar o pacote R correspondente, neste caso, o `syuzhet`. Além disso, para facilitar a visualização dos dados, vamos utilizar os pacotes `RColorBrewer`, `wordcloud`, `tm` e `NLP`. Para fazer isto, digite e execute os dois comandos seguintes em seu console; o primeiro para instalar o pacote e o segundo para carregá-lo (se já os tiver instalado, só precisa carregá-los). Note que a instalação destes pacotes pode levar alguns minutos. + +```R +# Instale os pacotes: +install.packages("syuzhet") +install.packages("RColorBrewer") +install.packages("wordcloud") +install.packages("tm") + +# Carregue os pacotes +library(syuzhet) +library(RColorBrewer) +library(wordcloud) +library(tm) +``` + +## Carregar e preparar o texto + +Faça o download do texto do romance [Dom Casmurro](/assets/analise-sentimento-R-syuzhet/domCasmurro.txt). Como podemos ver, o documento está em formato de texto simples, pois isto é essencial para realizar seu processamento e análise em R. + +Com o texto em mãos, a primeira coisa que vamos fazer é carregá-lo como um objeto de _string_. Certifique-se de mudar o caminho para o texto para corresponder ao seu computador. + +**Em Mac e Linux** + +Em sistemas Mac podemos usar a função `get_text_as_string` integrada no pacote `syuzhet`: + +```R +texto <- get_text_as_string("https://raw.githubusercontent.com/programminghistorian/jekyll/gh-pages/assets/analise-sentimento-R-syuzhet/domCasmurro.txt") +``` + +**Em Windows** + +Os sistemas Windows não lêem diretamente caracteres com acentos ou outras marcações típicas do espanhol, português ou francês, então temos que dizer ao sistema que o nosso texto está no formato UTF-8 usando a função `scan`. + +```R +texto <- scan(file = "https://raw.githubusercontent.com/programminghistorian/jekyll/gh-pages/assets/analise-sentimento-R-syuzhet/domCasmurro.txt", fileEncoding = "UTF-8", what = character(), sep = "\n", allowEscapes = T) +``` + +Como a análise que vamos realizar precisa de uma lista, seja de palavras ou de frases (aqui só prestaremos atenção a palavras individuais), precisamos de um passo intermediário entre o carregamento do texto e a extração dos valores de sentimento. Assim, vamos dividir o texto (*string*) em uma lista de palavras (*tokens*). Isto é muito comum na análise distante de textos. + +Para isso, usamos a função `get_tokens()` do pacote e geramos um novo objeto, neste caso um vetor de *tokens* (palavras). Conforme veremos, com esta função nos livramos da pontuação no texto e temos uma lista de palavras. + +```R +texto_palavras <- get_tokens(texto) +head(texto_palavras) +[1] "dom" "casmurro" "texto" "de" "referência" "obras" +``` +Agora podemos ver quantas palavras ou tokens estão neste texto com a função `length()`: +```R +length(texto_palavras) +[1] 66931 +``` + +Se quiser realizar a análise para orações, utilize a função `get_sentences()` e siga o mesmo processo, com exceção da criação da nuvem de palavras: + +```R +oracoes_vetor <- get_sentences(texto) +length(oracoes_vetor) +[1] 8637 +``` + + + +## Extração de dados com o NRC Sentiment Lexicon + +Agora podemos executar a função `get_nrc_sentiment` para obter os sentimentos no romance *Dom Casmurro*. Como a função executa por padrão o vocabulário inglês, nós a escrevemos com o argumento “lang” (de *language*, ou idioma) para usar o vocabulário português (“portuguese”). Por sua vez, criamos um novo objeto para armazenar os dados extraídos. Este será um objeto do tipo *data frame*. Esta função procura a presença das oito emoções e dos dois sentimentos para cada palavra em nosso vetor, e atribui um número maior que 0 se elas existirem. Dependendo do desempenho de seu computador e de acordo com as características de nosso texto, este processo pode levar de 15 a 30 minutos. + +```R +sentimentos_df <- get_nrc_sentiment(texto_palavras, lang="portuguese") +``` + +Quando o código terminar de ser executado, um aviso aparecerá porque o `syuzhet` usa uma função que é descontinuada dentro de sua função `get_nrc_sentiment`: + +```R +Warning message: +`data_frame()` is deprecated as of tibble 1.1.0. +Please use `tibble()` instead. +This warning is displayed once every 8 hours. +Call `lifecycle::last_warnings()` to see where this warning was generated. +``` + +Quando o processo terminar, se desejarmos, podemos ler os resultados no novo objeto, simplesmente selecionando o objeto e executando-o. Mas para evitar “imprimir” milhares de linhas no console, também podemos usar a função `head()` para ver os primeiros seis *tokens*. No caso do texto que estamos usando, quando executarmos essa função, devemos ver o seguinte, que não é nada interessante: + +```R +> head(sentimientos_df) + anger anticipation disgust fear joy sadness surprise trust negative positive +1 0 0 0 0 0 0 0 1 0 1 +2 0 0 0 0 0 0 0 0 0 0 +3 0 0 0 0 0 0 0 0 0 0 +4 0 0 0 0 0 0 0 0 0 0 +5 0 0 0 0 0 0 0 0 0 0 +6 0 0 0 0 0 0 0 0 0 0 +``` + +## Resumo do texto + +O que é interessante é ver um resumo de cada um dos valores que obtivemos utilizando a função geral `summary()`. Isto pode ser muito útil ao comparar vários textos, pois permite ver diferentes medidas, tais como a média dos resultados para cada uma das emoções e os dois sentimentos. Por exemplo, podemos ver que o romance *Dom Casmurro* é, em [média](https://pt.wikipedia.org/wiki/M%C3%A9dia) (*mean*), um pouco mais positivo (0,03892) do que negativo (0,03559). Mas se olharmos para as emoções, parece que a tristeza (0,02116) aparece em mais momentos do que a alegria (0,01593). Vários dos valores fornecidos pela função de resumo do texto aparecem com um valor igual a 0, incluindo a [mediana](https://pt.wikipedia.org/wiki/Mediana_(estat%C3%ADstica)) (*median*). Isto indica que poucas palavras do romance aparecem no dicionário que estamos usando (NRC) ou, inversamente, que poucas têm uma atribuição de sentimento ou emoção no dicionário. + +```R +> summary(sentimentos_df) + anger anticipation disgust fear joy + Min. :0.00000 Min. :0.00000 Min. :0.000000 Min. :0.00000 Min. :0.00000 + 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.000000 1st Qu.:0.00000 1st Qu.:0.00000 + Median :0.00000 Median :0.00000 Median :0.000000 Median :0.00000 Median :0.00000 + Mean :0.01116 Mean :0.01337 Mean :0.008815 Mean :0.01288 Mean :0.01593 + 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.000000 3rd Qu.:0.00000 3rd Qu.:0.00000 + Max. :5.00000 Max. :2.00000 Max. :3.000000 Max. :4.00000 Max. :7.00000 + sadness surprise trust negative positive + Min. :0.00000 Min. :0.000000 Min. :0.00000 Min. :0.00000 Min. :0.00000 + 1st Qu.:0.00000 1st Qu.:0.000000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 + Median :0.00000 Median :0.000000 Median :0.00000 Median :0.00000 Median :0.00000 + Mean :0.02116 Mean :0.008965 Mean :0.02299 Mean :0.03559 Mean :0.03892 + 3rd Qu.:0.00000 3rd Qu.:0.000000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 + Max. :4.00000 Max. :2.000000 Max. :3.00000 Max. :5.00000 Max. :7.00000 +``` + +> Parabéns! Já temos os resultados da análise de sentimentos! E, agora, o que podemos fazer com esses números? + + + +# Análise das emoções em um texto + +## Gráfico de barras + +Para ver quais as emoções que estão mais presentes no texto, a maneira mais simples é criar um *barplot*. Para isso, usamos a função `barplot()` com o resumo das colunas 1 a 8, ou seja, as colunas de raiva (*anger*), antecipação (*antecipation*), desgosto (*disgust*), medo (*fear*), alegria (*joy*), tristeza (*sadness*), surpresa (*surprise*) e confiança (*trust*). Os resultados obtidos vêm do processamento da função `prop.table()` dos resultados das oito colunas com cada uma das palavras da tabela. + +> Para cada barra, todos os valores da coluna de emoções correspondentes são somados. Então, o resultado de todas as emoções que adicionamos na saída do gráfico é somado. No final, a soma de cada emoção é dividida pelo total de todas as colunas ou emoções. Isto não acrescenta as colunas negativas e positivas. [^1] + +```R +barplot( +colSums(prop.table(sentimentos_df[, 1:8])), +space = 0.2, +horiz = FALSE, +las = 1, +cex.names = 0.7, +col = brewer.pal(n = 8, name = "Set3"), +main = "'Dom Casmurro' de Machado de Assis", +sub = "Análise realizada por Diana Rebelo Rodriguez", +xlab="emoções", ylab = NULL) +``` +O resto dos parâmetros que vemos no código são “extras”, pois são uma forma de configurar o formato visual do gráfico. Assim, indicamos um espaço (*space*) de 0,2 entre as barras, que estará na posição vertical ao indicar falsamente (*FALSE*) sua horizontalidade (*horiz*) e, ao contrário, a horizontalidade para os valores no eixo Y com `las = 1`. Além disso, reduzimos o tamanho do nome de cada barra (*cex.names*) para 0,7 para evitar que elas desapareçam, por exemplo, se fizermos um pequeno gráfico. Graças ao pacote que instalamos no início, `RColorBrewer`, podemos dar cor às colunas automaticamente, neste caso, com a paleta de cores (*brewer.pal*) do conjunto número 3 do pacote, com oito cores, uma para cada coluna. Finalmente, vamos colocar um título e subtítulo em nosso gráfico com os parâmetros `main` e `sub`, assim como a palavra “emoções” no eixo X e nada no eixo Y. + +Gráfico de barras com os valores das seis emoções capturadas em Dom Casmurro por Machado de Assis + +Se esses parâmetros não forem do seu interesse, basta executar o seguinte código para obter o gráfico padrão: + +```R +barplot(colSums(prop.table(sentimentos_df[, 1:8]))) +``` + +> Certifique-se de que há espaço suficiente na seção de exibição de gráficos do R para poder ver os nomes de cada coluna. + +Estas informações já indicam que as emoções de tristeza e confiança prevalecem mais do que as de desgosto ou surpresa. Mas quais são as palavras usadas por Machado na expressão dessa tristeza? Com que frequência cada uma aparece no romance como um todo? + +## Contando o número de palavras com cada emoção + +A fim de realizar uma análise do texto, é muito interessante saber quais são as palavras usadas com mais frequência no texto em relação à sua identificação com cada emoção. Para isso, primeiro temos que criar um objeto de caracteres com todas as palavras que tenham um valor maior que 0 na coluna “tristeza” (*sadness*). Para selecionar somente essa coluna, usamos o sinal de dólar após o nome do *data frame*: +```R +palavras_tristeza <- texto_palavras[sentimentos_df$sadness > 0] +``` + +O conteúdo de `palavras_tristeza` nos indica que esta lista não diz muito, pois retorna apenas a listagem de palavras sem maiores informações. Para obter a contagem das vezes que cada palavra relacionada à tristeza aparece no romance, geramos uma tabela do primeiro conjunto de caracteres com as funções `unlist` e `table`, que depois ordenamos em ordem decrescente (se quisermos uma ordem ascendente mudamos TRUE para FALSE); criamos um novo objeto de tipo tabela e imprimimos as primeiras 12 palavras da lista com sua frequência: + +```R +palavras_tristeza_ordem <- sort(table(unlist(palavras_tristeza)), decreasing = TRUE) +head(palavras_tristeza_ordem, n = 12) +head(palavras_tristeza_ordem, n = 12) + + nada mal tarde entre + 135 80 53 50 + caso morte sair medo + 34 34 32 23 + amor pecado pena defunto + 20 17 17 14 +``` + +Se quisermos saber quantas palavras únicas foram relacionadas à tristeza, basta usar a função `length` no objeto que agora agrupa as palavras em ordem: + +```R +length(palabras_tristeza_orden) +[1] 163 +``` + +Podemos repetir a mesma operação com o resto das emoções ou com aquela que nos interessa, assim como com os sentimentos positivos e negativos. Tente obter os resultados para a emoção “alegria” e compare os resultados [^2]. + +Dependendo do tipo de análise que se deseje fazer, tal resultado é eficiente. Agora, para o propósito introdutório da lição, vamos gerar uma nuvem de palavras que ajuda a visualizar facilmente os termos associados a cada emoção (embora iremos visualizar aqui apenas quatro para facilitar a leitura). + +## Nuvem de emoções + +A fim de gerar uma nuvem com as palavras que correspondem a cada emoção em *Dom Casmurro*, criaremos primeiro um vetor no qual armazenaremos todas as palavras que, nas colunas que indicamos após o símbolo `$`, têm um valor maior que 0. É gerado um novo objeto do tipo vetor, que contém um elemento para a lista de cada emoção. + +Neste caso, devemos indicar novamente à função que temos caracteres acentuados se for uma máquina Windows. + +**Em Mac e Linux** + +```R +nuvem_emocoes_vetor <- c( +paste(texto_palavras[sentimentos_df$sadness> 0], collapse = " "), +paste(texto_palavras[sentimentos_df$joy > 0], collapse = " "), +paste(texto_palavras[sentimentos_df$anger > 0], collapse = " "), +paste(texto_palavras[sentimentos_df$fear > 0], collapse = " ")) +``` +**Em Windows** + +Uma vez gerado o vetor, deve convertê-lo em caracteres UTF-8 utilizando a função `iconv`. + +```R +nuvem_emocoes_vetor <- c( +paste(texto_palavras[sentimentos_df$sadness> 0], collapse = " "), +paste(texto_palavras[sentimentos_df$joy > 0], collapse = " "), +paste(texto_palavras[sentimentos_df$anger > 0], collapse = " "), +paste(texto_palavras[sentimentos_df$fear > 0], collapse = " ")) + +nuvem_emocoes_vetor <- iconv(nuvem_emocoes_vetor, "latin1", "UTF-8") +``` +Agora que temos o vetor, criamos um _corpus_ de palavras com quatro “documentos” para a nuvem: + +```R +nuvem_corpus <- Corpus(VectorSource(nuvem_emocoes_vetor)) +``` + +Em seguida, transformamos este corpus em uma matriz termo-documento com a função `TermDocumentMatrix()`. Com isto, agora usamos a função `as.matrix()` para converter o TDM em uma matriz que, como podemos ver, lista os termos no texto com um valor maior que zero para cada uma das quatro emoções que extraímos aqui. Para ver o início desta informação, use novamente a função `head`: + +```R +nuvem_tdm <- TermDocumentMatrix(nuvem_corpus) +nuvem_tdm <- as.matrix(nuvem_tdm) +head(nuvem_tdm) + Docs +Terms 1 2 3 4 + abismo 1 0 0 1 + acidente 1 0 1 1 + afligir 3 0 0 3 + agonia 1 0 1 1 + amargamente 1 0 1 0 + amor 20 20 0 0 +``` + +Agora, atribua um nome a cada um dos grupos de palavras ou documentos (*Docs*) em nossa matriz. Aqui vamos usar o termo em português para as colunas que selecionamos para exibir na nuvem. Mais uma vez, podemos ver a mudança feita ao executar a função `head`. + +```R +colnames(nuvem_tdm) <- c('tristeza', 'felicidade', 'raiva', 'confiança') +head(nuvem_tdm) + Docs +Terms tristeza felicidade raiva confiança + abismo 1 0 0 1 + acidente 1 0 1 1 + afligir 3 0 0 3 + agonia 1 0 1 1 + amargamente 1 0 1 0 + amor 20 20 0 0 +``` + + +Finalmente, podemos visualizar a nuvem de palavras que estamos acostumados a ver na mídia ou em estudos académicos. O tamanho e a localização da palavra correspondem à sua maior ou menor ocorrência com valor emocional atribuído no texto. Primeiro, executamos a função `set.seed()` para que quando reproduzirmos o resultado visual seja o mesmo que o nosso (se não o fizer, será o mesmo, mas as palavras aparecerão em posições diferentes). E, para gerar a nuvem, vamos usar a função `comparison.cloud` do pacote `wordcloud`. Indicamos o objeto a representar, aqui ‘nuvem_tdm’, indicamos uma ordem não aleatória das palavras, atribuímos uma cor para cada grupo de palavras e damos tamanhos ao título e à escala geral, e atribuímos um número máximo de termos que serão exibidos. +```R +set.seed(757) # pode ser qualquer número +comparison.cloud(nuvem_tdm, random.order = FALSE, + colors = c("green", "red", "orange", "blue"), + title.size = 1, max.words = 50, scale = c(2.5, 1), rot.per = 0.4) +``` + +O resultado deve ser semelhante à imagem abaixo, mas a localização das palavras pode ser diferente uma vez que a figura é gerada segundo o tamanho da tela: + +Nuvem das palavras mais frequentes correspondentes às emoções de tristeza, felicidade, raiva e confiança no romance Dom Casmurro de Machado de Assis. + +O que sugere o resultado desta nuvem? Ficamos impressionados com o aparecimento de palavras como “entre” no conjunto da tristeza e “cavalo” no conjunto da raiva. Este “disparate” está relacionado com o aviso já anunciado no início da lição. O vocabulário para análise de sentimentos que estamos usando aqui é traduzido do inglês, um tradutor automático que não é “perfeito”. + +# Visualizando a evolução dos sentimentos em um texto + +Para complementar a leitura isolada das emoções, estudando a flutuação dos sentimentos positivos e negativos ao longo de um texto, há uma maneira de normalizar e visualizar estas informações. Como a análise da função de extração de sentimento atribui um valor positivo tanto ao sentimento positivo quanto ao negativo, precisamos gerar dados entre um intervalo de -1 para o momento mais negativo e 1 para o mais positivo, e onde 0 é neutro. Para isso, calculamos a valência do texto multiplicando os valores na coluna de valores negativos de nosso *data frame* com os resultados por -1 e adicionamos o valor na coluna de valores positivos. + +```R +sentimentos_valencia <- (sentimentos_df$negative * -1) + sentimentos_df$positive +``` + +Finalmente, podemos gerar um gráfico com a função `simple_plot()` integrada no pacote `syuzhet`, que nos dará duas imagens diferentes; a primeira, tem todas as medidas que o algoritmo calcula e, a segunda, é uma normalização das mesmas. O eixo horizontal apresenta o texto em 100 fragmentos normalizados e o eixo vertical nos informa sobre a valência do sentimento no texto. Dependendo das características de seu computador, este gráfico pode levar até 20-30 minutos para ser gerado. + +```R +simple_plot(sentimentos_valencia) +``` + +> Assegure-se de possuir espaço suficiente no espaço de visualização de gráficos do R para que ele seja gerado. Caso contrário, aparecerá o erro: *Error in plot.new() : figure margins too large* + +Evolução das emoções ao longo do texto + +Assim, neste caso, podemos interpretar que o romance *Dom Casmurro* varia bastante entre momentos positivos e negativos. Começa de forma mais negativa, fica mais positivo, sendo seguido por um novo momento negativo e um segundo positivo (porém menos do que o primeiro) para um desfecho negativo. Qualquer pessoa que tenha lido o romance pode confirmar esta variação de sentimentos vivida pelo protagonista. + +## Salvar seus dados + +Se quiser salvar seus dados para retornar a eles mais tarde, é possível fazê-lo em um ficheiro de valores separados por vírgula (CSV) com a função `write.csv()`. Aqui dizemos para salvar o *data frame*, que contém o resultado das oito emoções e os dois sentimentos de texto em um ficheiro com uma extensão `.csv`. Além disso, podemos acrescentar a palavra à qual cada linha de resultados corresponde, em uma coluna à esquerda usando a palavra vetor feita no início da análise. + +```R +write.csv(sentimentos_df, file = "analise_sent_domCasmurro.csv", row.names = texto_palavras) +``` + +Agora, pode começar a analisar seus próprios textos e compará-los uns com os outros! + +# Outras funcionalidades e suas limitações + +Talvez esteja trabalhando em um projeto onde já tem um dicionário de sentimentos criado, ou talvez precise personalizar o vocabulário e sua valência sentimental por razões culturais ou temporais, ou talvez esteja procurando melhorar os resultados traduzidos automaticamente do NRC usado aqui. Em qualquer um destes casos, a partir do final de 2020, também é possível usar o seu próprio conjunto de dados no *script* graças à função `custom` e realizar algumas das operações que foram aprendidas nesta lição. + +Para carregar seu próprio “dicionário de sentimentos”, é preciso primeiro criar (ou modificar) uma tabela contendo, pelo menos, uma coluna para palavras e uma coluna para sua valência, por exemplo: + +|word|value| +|---|---| +|amor|1| +|cólera|-1| +|tapete|0| +|catástrofe|-2| + + +Em seguida, carregue os seus dados salvos como um CSV com a função `read.csv`, que criará um novo conjunto disponível como `data.frame`, no qual é possível verificar seu texto: +```R +vocabulario_personalizado <- read.csv("ficheiro.csv") +method <- "custom" +sentimentos_oracoes <- get_sentiment(oracoes_vetor, method = method, lexicon = vocabulario_personalizado) +``` +Se quiser visualizar o progresso do sentimento ao longo de seu texto, podemos usar a função `plot` com outros parâmetros que já vimos: + +```R +plot(sentimentos_oracoes, + type = "l", + main = "'Dom Casmurro' de Machado de Assis", + sub = "Análise realizada por Diana Rebelo Rodriguez", + xlab="emocoes", ylab = " " + ) +``` +Entretanto, tenha em mente que esta forma de análise será limitada e não será possível realizar as mesmas operações como explicado acima. Por exemplo, seguindo o modelo do exemplo, não conseguimos as informações sobre emoções, portanto não somos capazes de fazer uma nuvem de palavras. + +# Referências +Assis, Machado de. _Dom Casmurro_. São Paulo: Editora Ática, 1996. + +Jockers, Matthew L. _Syuzhet: Extract Sentiment and Plot Arcs from Text_, 2015. [https://github.com/mjockers/syuzhet](https://github.com/mjockers/syuzhet) + +Jockers, Matthew L. "Introduction to the Syuzhet Package", CRAN R Project, 2017. https://mran.microsoft.com/snapshot/2017-12-31/web/packages/syuzhet/vignettes/syuzhet-vignette.html + +Damasio, Antonio R. *El error de Descartes: La razón de las emociones*. Barcelona: Andres Bello, 1999. + +Mohammad, Saif, and Peter D. Turney. "Crowdsourcing a Word–Emotion Association Lexicon". *Computational intelligence* 29 (2013): 436-465, doi: 10.1111/j.1467-8640.2012.00460.x + +Pereira Zazo, Óscar. *El analisis de la comunicación en español*. Iowa: Kendal Hunt, 2015. + +Rodríguez Aldape, Fernando Manuel. *Cuantificación del Interés de un usuario en un tema mediante minería de texto y análisis de sentimiento.* Tese de Mestrado, Universidad Autónoma de Nuevo León, 2013. + +# Notas + +[^1]:Agradecemos Mounika Puligurthi, estagiária da Universidade do Texas (UT), pelo seu auxílio na compreensão deste cálculo (durante a primavera de 2019). + +[^2]:Perceba que a palavra “amor”, por exemplo, aparece em ambas as emoções com um valor de 20 pontos. O que será que isso significa? diff --git a/pt/licoes/analise-sentimento-exploracao-dados.md b/pt/licoes/analise-sentimento-exploracao-dados.md index 0ed4de6d40..c93a38d2ae 100644 --- a/pt/licoes/analise-sentimento-exploracao-dados.md +++ b/pt/licoes/analise-sentimento-exploracao-dados.md @@ -1,426 +1,426 @@ ---- -title: Análise de sentimento para exploração de dados -layout: lesson -slug: analise-sentimento-exploracao-dados -date: 2018-01-15 -translation_date: 2021-06-14 -authors: -- Zoë Wilkinson Saldaña -reviewers: -- Anandi Silva Knuppel -- Puteri Zarina Megat Khalid -editors: -- Adam Crymble -translator: -- Caio Mello -translation-editor: -- Josir Cardoso Gomes -translation-reviewer: -- Bruno Ponne -- Ian Araujo -original: sentiment-analysis -review-ticket: https://github.com/programminghistorian/ph-submissions/issues/375 -difficulty: 2 -activity: analyzing -topics: [distant-reading] -abstract: "Nesta lição, você aprenderá a conduzir uma 'análise de sentimento' em textos e a interpretar os resultados. Esta é uma forma de análise exploratória de dados baseada no processamento de linguagem natural (PLN). Você aprenderá a instalar todos os softwares apropriados e a construir um programa reutilizável que pode ser aplicado aos seus próprios textos." -avatar_alt: Um homem sorridente e um homem rabugento -doi: 10.46430/phpt0017 ---- - -{% include toc.html %} - - -# Objetivos da lição - -Esta lição usa a análise de sentimento como base para uma análise exploratória de dados de um grande corpus textual. Portanto, é indicada para leitores com alguma experiência prévia em programação utilizando Python. Caso não tenha experiência com Python ou programação, a autora recomenda trabalhar nas primeiras lições da série “Introdução ao Python”. Ao final desta lição, você terá o conhecimento necessário para: - -* Elaborar questões de pesquisa que usem Processamento de Linguagem Natural (PLN) em um corpus textual. -* Utilizar Python e o Natural Language Processing Toolkit (NLTK) para gerar medidas de sentimento para um texto. -* Avaliar criticamente os resultados da análise de sentimento e ajustar os parâmetros e a metodologia conforme necessário. -* Identificar as próximas etapas para continuar o aprendizado sobre análise exploratória de dados e abordagens programáticas para dados qualitativos. - -Nota do tradutor: Devido à falta de uma biblioteca de código que funcione bem com os textos em português, optamos por manter os textos dos exercícios na língua original. - -## O que é análise exploratória de dados? - -A análise exploratória de dados é um conjunto de estratégias que trazem à tona características importantes num conjunto de dados que normalmente não são facilmente identificadas por meio da leitura tradicional. Com os insights da análise exploratória de dados em mãos, os pesquisadores podem tomar decisões mais adequadas ao selecionar um método ou abordagem para sua questão de pesquisa, ou até mesmo, identificar novas questões. - -Em 1977, o matemático John Tukey descreveu a análise exploratória de dados como uma forma de trabalho de detetive, sem a qual, os estudiosos muitas vezes perderiam descobertas interessantes, porém menos óbvias: - -> “A menos que o detetive encontre pistas, o juiz ou júri não terá como julgar. Caso a análise exploratória de dados não revele indícios, geralmente quantitativos, é provável que se considere não haver nada a ser comprovado. ” (Tukey 1977: 3, tradução livre) - -## Explorando Texto com Análise de Sentimento - -Quando confrontado com um corpus promissor, porém muito grande, como o pesquisador pode encontrar aquilo de mais importante, que pode levar às descobertas de pesquisa mais interessantes? - -O Processamento de Linguagem Natural (PLN) abrange uma ampla gama de técnicas que se baseiam na aplicação de métodos analíticos computacionais ao conteúdo textual, fornecendo meios de categorizar e quantificar o texto. Essas abordagens de PLN, que incluem análise de sentimento, podem ajudar os pesquisadores a explorar seus textos. Nas palavras de Tukey, podem ajudar o pesquisador a encontrar “pistas” sobre seus textos e “indícios” de que pode valer a pena investigar algo mais a fundo. - -Nesta lição, vamos nos concentrar numa ferramenta do kit de ferramentas do PLN: a análise de sentimento. A análise de sentimento busca quantificar a intensidade emocional de palavras e frases num texto. Algumas ferramentas de análise de sentimento levam em consideração, inclusive, o peso emocional de sinais linguísticos como a pontuação ou mesmo os emojis. As ferramentas de análise de sentimento geralmente processam uma unidade de texto (uma frase, um parágrafo, um livro, etc.) e produzem pontuações (“scores”, em inglês) ou classificações quantitativas para indicar se o algoritmo considera que aquele texto transmite emoções positivas ou negativas. Algumas ferramentas também podem quantificar o *grau de positividade* ou o *grau de negatividade* num texto. Combinada com outros métodos de PLN, como modelagem de tópicos (“topic modelling”, em inglês), a análise de sentimento fornece meios de caracterizar as emoções expressas sobre diferentes tópicos de uma conversa. Quando usada em conjunto com a análise de rede, pode lançar luz sobre as maneiras como os indivíduos interagem uns com os outros. Um pesquisador interessado em interações sobre um evento político pode usar a análise de sentimento para estudar como os indivíduos descrevem aquele evento nas redes sociais. Com os dados certos para inserir na ferramenta, pode ser possível fazer comparações regionais ou entender como diferentes grupos demográficos vêem o evento de forma diferente. Como a ferramenta pode processar muitos dados sequencialmente, é até possível analisar o sentimento em centenas de milhares ou até milhões de eventos discursivos. - -Para começar, esta lição fornece uma introdução à análise de sentimento tanto prática quanto crítica. Como qualquer ferramenta computacional, a análise de sentimento tem uma série de limitações e vieses que os pesquisadores devem levar em consideração. Os pesquisadores devem ser especialmente cautelosos ao fazer afirmações empíricas com base nos resultados da análise de sentimento. Você poderá ser melhor atendido usando a análise de sentimento em situações provisórias e exploratórias, como meio de orientar o processo de pesquisa. Ao manejar essas ferramentas com ceticismo e eficácia, é possível realizar um trabalho de detetive bastante notável. - -## Análise de grandes coleções de correspondência textual - -Correspondências escritas como cartas, e-mails, registros de bate-papo, tweets e históricos de mensagens de texto podem fornecer aos pesquisadores uma visão inestimável de seus autores. Os textos geralmente são ricos em emoções e informações que não estão disponibilizadas em nenhum outro lugar. Um pesquisador pode aprender sobre as opiniões que as pessoas, objetos de seu estudo, tiveram sobre vários tópicos ou sobre determinados eventos. Também poderia ser possível aprender sobre os relacionamentos que os indivíduos desenvolveram e mantiveram em organizações ou redes complexas. - -Embora metodologias como etnografia, leitura “manual” e análise do discurso ajudem os pesquisadores a analisar a correspondência histórica, esses métodos trazem desafios significativos quando o número de textos cresce de dezenas ou centenas para milhares ou milhões. A análise textual computacional fornece um conjunto de métodos para tornar visíveis as tendências, dinâmicas e relacionamentos que podem estar ocultos para o leitor humano por problemas de escala. Além disso, muitos métodos de computação produzem descobertas que podem ser expressas quantitativamente e que podem subsequentemente permitir que o pesquisador realize modelagem estatística, visualização de informações e aprendizado de máquina (Machine Learning) para fazer outras análises. - -## Estudo de caso: corpus de e-mails da Enron - -Este tutorial usa a correspondência de e-mail da falida empresa americana de energia Enron. A Enron ocultou uma ampla variedade de práticas contábeis ilegais até que uma investigação federal em 2001 a levou à falência. Na época, o Escândalo Enron foi o maior colapso de uma empresa de capital aberto da história. Em 2001, a empresa começou a mostrar sinais de problemas financeiros que não se alinhavam com as divulgações financeiras da empresa até aquele momento. As ações da Enron negociadas em bolsa caíram de US$ 90,75 em meados de 2000 para menos de um dólar em novembro de 2001, o que levou os acionistas a processar a empresa. Uma investigação subsequente da Comissão de Valores Mobiliários dos Estados Unidos (SEC) revelou que os executivos da Enron cometeram fraude e negligência contábil em grande escala. A Enron declarou falência em dezembro daquele ano. Nos anos que se seguiram, vários executivos enfrentaram condenações criminais por sua participação no escândalo. Para os pesquisadores, o Escândalo Enron resultou na criação de um dos maiores (e mais infames) corpus de texto por correspondência já coletado: - -> “Um dos escândalos corporativos mais infames das últimas décadas deixou curiosamente em seu rastro um dos conjuntos de dados mais valiosos disponíveis publicamente. No final de 2001, o encobrimento de fraude contábil da Enron Corporation levou à falência da gigante da energia. A Federal Energy Regulatory Commission requereu todos os registros de e-mail da Enron como parte da investigação que se seguiu. Nos dois anos seguintes, a comissão divulgou, escondeu e depois divulgou novamente o corpus de e-mail para o público após excluir e-mails que continham informações pessoais, como números de previdência social. O corpus da Enron contém e-mails cujos assuntos variam de planejamento de férias de fim de semana a tópicos de discussão de estratégia política, e continua sendo o único grande exemplo de conjuntos de dados de e-mail do mundo real disponíveis para pesquisa ”. (Hardin, Sarkis e Urc, 2015) - -Quando o conjunto de dados de e-mail da Enron - organizado e editado - foi lançado em 2004, os pesquisadores descobriram uma oportunidade sem precedentes: acesso direto à maneira espontânea e sem censura como os funcionários de uma empresa condenada se comunicavam. De repente, os pesquisadores tiveram acesso a como as pessoas se comunicam no trabalho em uma escala sem precedentes. Isso era importante para pesquisadores interessados ​​no caso especial do escândalo e colapso da Enron, mas também para pesquisadores interessados ​​em um amplo espectro de questões sobre a comunicação cotidiana no trabalho. - -Na década seguinte, centenas de novos estudos surgiram a partir desses e-mails, realizados em diversos campos como teoria das redes sociais, comunidade e detecção de anomalias, gênero e comunicação dentro das organizações, mudança de comportamento durante uma crise organizacional, insularidade e formação de comunidade. O uso da teoria das redes sociais nas humanidades oferece algumas possibilidades fascinantes, mas não é tão simples. - -Além da grande quantidade de mensagens incluídas (o corpus contém mais de 600.000 mensagens), o corpus de e-mails da Enron também inclui os metadados necessários para que os pesquisadores realizem uma série de questões de pesquisa. Assim como a presença de envelopes com endereços legíveis do remetente e do destinatário seria um excelente trunfo para pesquisadores de correspondências de cartas históricas, a presença de endereços de e-mail do remetente e do destinatário permite que os pesquisadores associem os e-mails a determinados indivíduos conhecidos dentro da corporação. Como alguns indivíduos tinham vários endereços de e-mail, ou mais de um indivíduo pode ter compartilhado o mesmo endereço, os metadados não são de uso muito óbvio, mas são potencialmente elucidativos. O restante do tutorial explicará como aplicar e interpretar a análise de sentimento de e-mails neste corpus. - -# Usando Python com o Natural Language Toolkit (NLTK) - -

    -Programando pela primeira vez? Esta lição é destinada a iniciantes, mas pode ser conveniente revisar outras lições de Python no Programming Historian. No entanto, observe que, embora muitas lições usem o Python versão 2, esta lição requer o Python versão 3. As instruções de instalação do Python 3 serão apresentadas a seguir. -
    - -Neste tutorial, Python será usado junto com algumas ferramentas do Natural Language Toolkit (NLTK) para gerar indicadores de sentimento a partir de transcrições de e-mail. Para fazer isso, você primeiro aprenderá como carregar os dados textuais no Python, selecionar as ferramentas de PLN apropriadas para análise de sentimento e escrever um algoritmo que calcula pontuações de sentimento para um determinado texto. Também exploraremos como ajustar seu algoritmo para melhor atender a seu objetivo de pesquisa. Ao final, você irá arquivar seu algoritmo de solução de problemas como um pacote de código conhecido como *função*, que poderá ser reutilizado e reaproveitado (inclusive na parte 2 deste tutorial) - -## Instalação - -Para continuar, as seguintes instalações serão necessárias: - -* Python 3 (preferivelmente 3.5 ou superior) - [Instruções para baixar e instalar Python](https://wiki.python.org/moin/BeginnersGuide/Download) -* NLTK (3.2.5 or superior) - [Instruções para baixar e instalar NLTK](http://www.nltk.org/install.html) - -## Primeiros passos com NLTK - -O Natural Language Toolkit (NLTK) é uma coleção de ferramentas Python reutilizáveis (também conhecido como uma biblioteca Python) que ajuda os pesquisadores a aplicar um conjunto de métodos computacionais a textos. As ferramentas variam desde métodos que ajudam a quebrar o texto em pedaços menores, alguns que identificam se uma palavra pertence a um determinado idioma, até aqueles textos de amostra que os pesquisadores podem usar para fins de treinamento e desenvolvimento (como o texto completo de *Moby Dick*). - -Se você precisar de ajuda para baixar e instalar o módulo para [Python 3](https://www.python.org/download/releases/3.0/), dê uma olhada na lição Instalando Módulos Python com pip de [Fred Gibbs](/en/lessons/installing-python-modules-pip) (em inglês). - -Em nosso caso, usaremos duas ferramentas NLTK em particular: - -* A ferramenta ["Análise de sentimento VADER"](http://www.nltk.org/_modules/nltk/sentiment/vader.html) (que gera pontuações de sentimento positivas, negativas e neutras para uma determinada entrada) -* A ferramenta de toquenização ‘word_tokenize’ (divide um texto grande em uma sequência de unidades menores, como frases ou palavras) - -Para usar VADER e word_tokenize, primeiro precisamos baixar e instalar alguns dados extras para NLTK. O NLTK é um kit de ferramentas muito grande e várias de suas ferramentas requerem uma segunda etapa de download para reunir a coleção de dados necessária (geralmente léxicos codificados) para funcionar corretamente. - -Para instalar a análise de sentimento e o tokenizador de palavras que usaremos neste tutorial, escreva um novo script em Python com as três linhas a seguir: - -```python -import nltk -nltk.download('vader_lexicon') -nltk.download('punkt') -``` -Você pode salvar este arquivo como `“installation.py”`. Se você não tiver certeza de como salvar e executar scripts em Python, reveja o tutorial sobre como configurar um 'Ambiente de Desenvolvimento Integrado' usando Python, substituindo o comando '% (python)% f' por '% (python3)% f' quando você chegar a esse parte no tutorial. - -* Configurando um ambiente de desenvolvimento integrado para Python no [Windows](/pt/licoes/instalacao-windows). -* Configurando um ambiente de desenvolvimento integrado para Python no [Mac](/pt/licoes/instalacao-mac). -* Configurando um ambiente de desenvolvimento integrado para Python no [Linux](/pt/licoes/instalacao-linux). - - Se você sabe como executar scripts Python, execute o arquivo usando Python 3. - - [VADER](http://www.nltk.org/_modules/nltk/sentiment/vader.html) (Valence Aware Dictionary and sEntiment Reasoner) é uma ferramenta de atribuição de intensidade de sentimento acrescentada ao NLTK em 2014. Ao contrário de outras técnicas que exigem treinamento em textos parecidos antes do uso, o VADER está pronto para ser usado sem qualquer configuração especial. O VADER é o único que faz distinções refinadas entre vários graus de positividade e negatividade. Por exemplo, VADER pontua “conforto” como moderadamente positivo e “euforia” como extremamente positivo. Ele também tenta capturar e pontuar características textuais comuns em texto online informal, como letras maiúsculas, pontos de exclamação e emoticons, conforme mostrado na tabela abaixo: - - {% include figure.html filename="analise-sentimento1.png" caption="Vader captura pequenas gradações de entusiasmo. (Hutto e Gilbert, 2014). **Versão do tradutor**. Acesse a original [aqui](/en/lessons/sentiment-analysis)" %} - - Como qualquer ferramenta de análise de texto, o VADER deve ser avaliado com criticidade e de forma contextualizada. O VADER foi desenvolvido em meados da década de 2010 principalmente para analisar microblogs em inglês e sites de rede social (especialmente o Twitter). Esse tipo de texto tende a ser muito mais informal do que o e-mail profissional, e contém padrões de linguagem e de uso de recursos que diferem dos padrões de 1999-2002 quando os e-mails da Enron foram escritos. No entanto, VADER também foi desenvolvido como uma ferramenta de análise de sentimento de propósito geral, e o estudo inicial dos autores mostra que ele se compara favoravelmente com ferramentas que foram treinadas para domínios específicos, usam léxicos especializados ou técnicas de aprendizado de máquina com muitos recursos (Hutto e Gilbert, 2014 ). A sensibilidade da ferramenta em relação aos graus de afeto se mostrou útil para descrever as sutilezas das emoções expressas nos e-mails profissionais - como pesquisadores, podemos estar especialmente interessados ​​em capturar os momentos em que a emoção surge em um texto formal. No entanto, a análise de sentimento continua se dedicando a encontrar soluções para capturar sentimentos complexos como ironia, sarcasmo e zombaria, quando o leitor médio seria capaz de fazer a distinção entre o texto literal e seu significado pretendido. - - Embora o VADER seja uma boa ferramenta de uso geral para textos contemporâneos e históricos em inglês, a ferramenta fornece apenas suporte nativo parcial para textos em outras línguas (detecta emojis / maiúsculas / etc., mas não a escolha de palavras). No entanto, os desenvolvedores incentivam os usuários a usar a tradução automática para pré-processar textos que não sejam em inglês e, em seguida, inserir os resultados no VADER. O "VADER demo" inclui um código para enviar o texto de entrada automaticamente para o serviço web ‘My Memory Translation Service’, (leitores avançados podem encontrar no [Github](https://github.com/cjhutto/vaderSentiment/blob/master/vaderSentiment/vaderSentiment.py) a partir da linha 554 - no momento da escrita deste artigo). A implementação deste método de tradução é mais indicada para usuários intermediários de Python. Você pode aprender mais sobre o estado da arte da análise de sentimento multilíngue (que infelizmente quase sempre requer uma etapa de tradução) em ["Análise de sentimento multilíngue: o estado da arte e comparação independente de técnicas"](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4981629/), de Kia Dashtipour, et al (2016). - - -## Calculando Sentimento para um Parágrafo - -Leia o seguinte trecho: - ->“Like you, I am getting very frustrated with this process. I am genuinely trying to be as reasonable as possible. I am not trying to “hold up” the deal at the last minute. I’m afraid that I am being asked to take a fairly large leap of faith after this company (I don’t mean the two of you – I mean Enron) has screwed me and the people who work for me.” - -Este é o primeiro parágrafo do e-mail de janeiro de 2012 de Timothy Belden para Louise Kitchen e John Lavorato sobre o “Acordo de Contratos de Trabalho”. Belden dirigiu os Serviços de Energia da Enron e mais tarde seria condenado por conspiração a fim de aumentar os custos de energia na Califórnia, o que levou a uma crise energética em todo o estado. - -Apesar do sentimento de frustração e ansiedade que você pode deduzir do parágrafo como um todo, observe a ambivalência das frases específicas dentro do parágrafo. Alguns parecem expressar esforços de boa fé, por exemplo: “Não estou tentando ‘atrasar’ o negócio” e “genuinamente tentando”. E, no entanto, há declarações negativas ainda mais fortes sobre "ficar frustrado", "Receio" e "esta empresa [...] ferrou comigo e com as pessoas que trabalham para mim". - -Vamos calcular as pontuações de sentimento para este e-mail usando o VADER para ter uma ideia do que a ferramenta pode fazer. Para começar, crie um novo diretório de trabalho (pasta) em seu computador chamado `“sentimento”` em algum lugar onde você possa encontrá-lo. Dentro dessa pasta, crie um novo arquivo de texto e salve-o como `“sentimento.py”`. É aqui que escreveremos o código para esta tarefa. - -Primeiro, temos que dizer ao Python onde o código NLTK para a análise de sentimento VADER está localizado. No início do nosso arquivo, importaremos o código do VADER: - -```python -# primeiro, importamos os módulos relevantes da biblioteca NLTK -from nltk.sentiment.vader import SentimentIntensityAnalyzer -``` - -Também devemos habilitar o Python para usar este código com nosso conjunto particular de código. Embora tenhamos todas as instruções de que precisamos na biblioteca NLTK, o Python gosta de agrupar essas instruções em um único `objeto` (nossa ferramenta de Análise de Sentimentos) que nosso programa pode acessar. *SentimentIntensityAnalyzer* é uma `classe`, que é um “modelo” que instrui o Python a construir um `objeto` com um conjunto especial de `funções` e `variáveis`. No nosso caso, queremos construir um único `objeto`: nosso analisador de sentimento, que segue este “modelo”. Para fazer isso, executamos *SentimentIntensityAnalyzer( )* e atribuímos a saída - nosso novo analisador de sentimento - a uma variável, que chamaremos de *‘sid’*. - -```python -# em seguida, inicializamos o VADER para que possamos usá-lo em nosso script Python -sid = SentimentIntensityAnalyzer() -``` - -Fazendo isso, fornecemos à nossa nova variável *sid* todos os recursos do código de análise de sentimento VADER. Assim, *sid* se tornou nossa ferramenta de análise de sentimento, mas com um nome mais curto. - -Em seguida, precisamos armazenar o texto que queremos analisar em um lugar que o *sid* possa acessar. Em Python, podemos armazenar uma única sequência de texto como uma variável de `string` (Nota do tradutor: Optamos por manter a palavra 'string' como no original em inglês para facilitar o entendimento de seu uso mais comum em códigos ['str']). - -```python -# a variável 'message_text' agora contém o texto que iremos analisar. -message_text = '''Like you, I am getting very frustrated with this process. I am genuinely trying to be as reasonable as possible. I am not trying to "hold up" the deal at the last minute. I'm afraid that I am being asked to take a fairly large leap of faith after this company (I don't mean the two of you -- I mean Enron) has screwed me and the people who work for me.''' -``` - -Como este texto inclui aspas e apóstrofos, é necessário circundar todo o texto com três aspas (“”” ou ’’’). Isso significa que quaisquer aspas e apóstrofos no texto serão reconhecidos como tal. Essa abordagem também mantém qualquer espaçamento que nosso texto já inclua. - -Agora você está pronto para processar o texto. - -Para fazer isso, o texto *(message_text)* deve ser inserido na ferramenta *(sid)* e o programa deve ser executado. Estamos interessados na "pontuação de polaridade" do analisador de sentimento, que nos dá uma pontuação positiva ou negativa. Este recurso é integrado ao VADER e pode ser solicitado sob demanda. - -Queremos ter certeza de capturar a saída de sid.polarity_scores () atribuindo-a a uma variável que chamaremos de *scores*: - -```python -print(message_text) - -# Utilizar método polarity_scores no sid e passar dentro dele o message_text produz um dicionário com pontuações negativas, neutras, positivas e compostas para o texto de entrada -scores = sid.polarity_scores(message_text) -``` - -Quando você executa este código, os resultados da análise de sentimento agora são armazenados no `dicionário` de *pontuação* (scores). Um dicionário, muito parecido com o tipo que você usa para pesquisar a definição de palavras, é uma variável que armazena informações conhecidas como 'valores' que são acessíveis dando ao programa a 'chave' para a entrada que você deseja ler. Isso significa que um dicionário como *scores* pode armazenar muitos `pares de valores-chave`. Para solicitar os dados, você só precisa conhecer as `chaves`. Mas não sabemos as `chaves`. Felizmente, Python nos dará uma lista de todas as `chaves`, classificadas em ordem alfabética, se usarmos a função `sorted(scores)`. - -Para imprimir cada `chave` e `valor` armazenado no dicionário, precisamos de um `for loop`, que aplica o mesmo código sequencialmente a todas as `chaves` do dicionário. - -Aqui está o código para imprimir cada par de `valores-chave` dentro da variável de pontuação (score): - -```python -# Aqui, percorremos as chaves contidas nas pontuações (pos, neu, neg e pontuações compostas) e imprimimos os pares de valores-chave na tela para digitação classificada (pontuações): -for key in sorted(scores): - print('{0}: {1}, '.format(key, scores[key]), end='') -``` - -Aqui está todo o código em um único programa: - -```python -# primeiro, importamos os módulos relevantes da biblioteca NLTK -from nltk.sentiment.vader import SentimentIntensityAnalyzer - -# a seguir, inicializamos o VADER para que possamos usá-lo em nosso script Python -sid = SentimentIntensityAnalyzer() - -# a variável 'message_text' agora contém o texto que iremos analisar. -message_text = '''Like you, I am getting very frustrated with this process. I am genuinely trying to be as reasonable as possible. I am not trying to "hold up" the deal at the last minute. I'm afraid that I am being asked to take a fairly large leap of faith after this company (I don't mean the two of you -- I mean Enron) has screwed me and the people who work for me.''' - -print(message_text) - -# Utilizar método polarity_scores no sid e passar dentro dele o message_text produz um dicionário com pontuações negativas, neutras, positivas e compostas para o texto de entrada -scores = sid.polarity_scores(message_text) - -# Aqui, percorremos as chaves contidas nas pontuações (pos, neu, neg e pontuações compostas) e imprimimos os pares de valores-chave na tela -for key in sorted(scores): - print('{0}: {1}, '.format(key, scores[key]), end='') -``` - -Salve seu arquivo Python. Agora estamos prontos para executar o código. Usando seu método preferido (ou seu Ambiente de Desenvolvimento Integrado ou a linha de comando), execute seu arquivo Python, `sentimento.py`. - -O resultado deve ser semelhante a este: - -```python -Like you, I am getting very frustrated with this process. I am genuinely trying to be as reasonable as possible. I am not trying to "hold up" the deal at the last minute. I'm afraid that I am being asked to take a fairly large leap of faith after this company (I don't mean the two of you -- I mean Enron) has screwed me and the people who work for me. - -compound: -0.3804, neg: 0.093, neu: 0.836, pos: 0.071, -``` -
    -Lembre-se de usar três aspas simples para envolver a string *message_text* acima. Se você usar aspas duplas, a string terminará mais cedo devido às aspas dentro do texto. -
    - -O VADER coleta e pontua palavras e características negativas, neutras e positivas (e é responsável por fatores como negação ao longo do caminho). Os valores “neg”, “neu” e “pos” descrevem a fração das pontuações ponderadas que se enquadram em cada categoria. VADER também soma todas as pontuações ponderadas para calcular um valor “composto” normalizado entre -1 e 1; este valor tenta descrever o efeito geral de todo o texto de fortemente negativo (-1) a fortemente positivo (1). Neste caso, a análise com VADER descreve a passagem como ligeiramente a moderadamente negativa (-0,3804). Podemos pensar nesse valor como uma estimativa da impressão geral de um leitor médio ao considerar o e-mail como um todo, apesar de alguma ambiguidade e ambivalência ao longo do caminho. - -Ao ler o texto, estaria inclinado a concordar com essa avaliação geral. O valor de saída de -0,3804 é negativo, mas não fortemente negativo. Os pesquisadores podem desejar definir um limite mínimo para positividade ou negatividade antes de declarar um texto definitivamente positivo ou negativo - por exemplo, a documentação oficial do VADER sugere um limite de -0,5 e 0,5, que este trecho específico não alcançaria (em outras palavras , este texto é negativo, mas não extremamente negativo). - -O que isso implica, para você, sobre a maneira como esse sentimento pode ser expresso em um contexto de e-mail profissional? Como você definiria seus valores limite quando o texto expressa emoções de maneira mais sutil ou cortês? Você acha que a análise de sentimento é uma ferramenta apropriada para nossa análise exploratória de dados? - -Desafio: tente substituir o conteúdo de *message_text* pelas seguintes cadeias de caracteres e execute novamente o programa. Não se esqueça de cercar cada texto com três aspas simples ao atribuí-lo à variável *message_text* (como em: *message_text* = ''' algumas palavras '''). Antes de executar o programa, tente adivinhar o resultado da análise de sentimento: positivo ou negativo? Quão positivo ou negativo? - -Texto 1: - -``` -Looks great. I think we should have a least 1 or 2 real time traders in Calgary. -``` - -Texto 2: - -``` -I think we are making great progress on the systems side. I would like to -set a deadline of November 10th to have a plan on all North American projects -(I'm ok if fundementals groups are excluded) that is signed off on by -commercial, Sally's world, and Beth's world. When I say signed off I mean -that I want signitures on a piece of paper that everyone is onside with the -plan for each project. If you don't agree don't sign. If certain projects -(ie. the gas plan) are not done yet then lay out a timeframe that the plan -will be complete. I want much more in the way of specifics about objectives -and timeframe. - -Thanks for everyone's hard work on this. -``` - -Experimente uma terceira vez com algum texto de uma de suas próprias fontes de pesquisa. Que resultados você obteve para cada um? Você concorda com os resultados? - -# Determine o escopo apropriado para e-mail - -Quando analisado por meio da ferramenta de análise de sentimento VADER, o texto produz um conjunto de pontuações positivas, neutras e negativas, que são então agregadas e dimensionadas como uma "pontuação composta". Embora seja útil saber em teoria, como esse método pode ser aplicado aos dados no exemplo da Enron - isto é, uma coleção de dados de e-mail e metadados? E o que isso pode nos dizer sobre as emoções, relacionamentos e mudanças ao longo do tempo dos funcionários da Enron? - -Nesta seção, apresentaremos a você o processo de seleção do escopo de análise para nossa ferramenta de análise de sentimento. Considere os seguintes dados brutos pertencentes a um e-mail de 3 de outubro de 2000 escrito por Jeffrey Shankman, então presidente de mercados globais da Enron (Quinn, 2006): - -``` -Message-ID: <3764632.1075857565248.JavaMail.evans@thyme> -Date: Mon, 23 Oct 2000 09:14:00 -0700 (PDT) -From: jeffrey.shankman@enron.com -To: john.nowlan@enron.com, don.schroeder@enron.com, david.botchlett@enron.com, - chris.mahoney@enron.com, ross.koller@enron.com -Subject: -Mime-Version: 1.0 -Content-Type: text/plain; charset=us-ascii -Content-Transfer-Encoding: 7bit -X-From: Jeffrey A Shankman -X-To: John L Nowlan, Don Schroeder, David J Botchlett, Chris Mahoney, Ross Koller -X-cc: -X-bcc: -X-Folder: \Jeffrey_Shankman_Jun2001\Notes Folders\Sent -X-Origin: Shankman-J -X-FileName: jshankm.nsf - -It seems to me we are in the middle of no man's land with respect to the -following: Opec production speculation, Mid east crisis and renewed -tensions, US elections and what looks like a slowing economy (?), and no -real weather anywhere in the world. I think it would be most prudent to play -the markets from a very flat price position and try to day trade more -aggressively. I have no intentions of outguessing Mr. Greenspan, the US. -electorate, the Opec ministers and their new important roles, The Israeli and -Palestinian leaders, and somewhat importantly, Mother Nature. Given that, -and that we cannot afford to lose any more money, and that Var seems to be a -problem, let's be as flat as possible. I'm ok with spread risk (not front to -backs, but commodity spreads). - - -The morning meetings are not inspiring, and I don't have a real feel for -everyone's passion with respect to the markets. As such, I'd like to ask -John N. to run the morning meetings on Mon. and Wed. - - -Thanks. Jeff -``` - -No texto da mensagem do e-mail, Shankman traça uma estratégia corporativa para avançar no que ele percebe como um contexto geopolítico ambíguo. A mensagem descreve uma série de situações difíceis, bem como exasperação ("As reuniões matinais não são inspiradoras") e incerteza ("Não tenho um sentimento real de paixão de todos"). Ao mesmo tempo, Shankman descreve um conjunto de etapas de ação junto com pedidos educados ("Eu gostaria de pedir ...") e expressões de gratidão ("Obrigado"). - -Antes de prosseguirmos, pare um minuto para refletir sobre a mensagem. Como você acha que um leitor típico descreveria a intensidade emocional deste e-mail? Considerando o que você sabe agora sobre VADER, que proporção de positividade, negatividade e neutralidade você espera que a ferramenta de análise de sentimento encontre na mensagem? Finalmente, o que você acha que a pontuação composta irá sugerir sobre o efeito geral na mensagem? - -Como discutimos acima, a análise de sentimento não fornece uma saída objetiva, mas sim indicadores de orientação que refletem nossa escolha e calibração de ferramentas analíticas. Talvez o elemento mais importante da calibração seja selecionar o escopo do texto que está sendo analisado, ou seja, quanto de uma mensagem colocamos na ferramenta de uma vez. Em nosso caso, podemos determinar o escopo da análise decidindo entre analisar a mensagem inteira como uma única unidade ou, em vez disso, dividir a mensagem em unidades menores como frases e analisar cada uma separadamente. - -Primeiro, vamos considerar uma abordagem no nível da mensagem, na qual analisamos a mensagem como um único bloco: - -```python -# Continue com o mesmo código da seção anterior, mas substitua a variável *message_text* pelo novo texto do e-mail: - -message_text = '''It seems to me we are in the middle of no man's land with respect to the following: Opec production speculation, Mid east crisis and renewed tensions, US elections and what looks like a slowing economy (?), and no real weather anywhere in the world. I think it would be most prudent to play the markets from a very flat price position and try to day trade more aggressively. I have no intentions of outguessing Mr. Greenspan, the US. electorate, the Opec ministers and their new important roles, The Israeli and Palestinian leaders, and somewhat importantly, Mother Nature. Given that, and that we cannot afford to lose any more money, and that Var seems to be a problem, let's be as flat as possible. I'm ok with spread risk (not front to backs, but commodity spreads). The morning meetings are not inspiring, and I don't have a real feel for everyone's passion with respect to the markets. As such, I'd like to ask John N. to run the morning meetings on Mon. and Wed. Thanks. Jeff''' - -``` - -Substitua `sentimento.py` pelo código acima, salve-o e execute-o. A saída deve ser semelhante a esta: - -```python -It seems to me we are in the middle of no man's land with respect to the following: Opec production speculation, Mid east crisis and renewed tensions, US elections and what looks like a slowing economy (?), and no real weather anywhere in the world. I think it would be most prudent to play the markets from a very flat price position and try to day trade more aggressively. I have no intentions of outguessing Mr. Greenspan, the US. electorate, the Opec ministers and their new important roles, The Israeli and Palestinian leaders, and somewhat importantly, Mother Nature. Given that, and that we cannot afford to lose any more money, and that Var seems to be a problem, let's be as flat as possible. I'm ok with spread risk (not front to backs, but commodity spreads). The morning meetings are not inspiring, and I don't have a real feel for everyone's passion with respect to the markets. As such, I'd like to ask John N. to run the morning meetings on Mon. and Wed. Thanks. Jeff -compound: 0.889, neg: 0.096, neu: 0.765, pos: 0.14, -``` - -Aqui você pode ver que, ao analisar o e-mail como um todo, VADER retorna valores que sugerem que a mensagem é principalmente neutra (neu: 0,765), mas que mais recursos parecem ser positivos (pos: 0,14) em vez de negativos (0,096). VADER calcula uma pontuação geral de sentimento de 0,889 para a mensagem (em uma escala de -1 a 1), o que sugere um efeito fortemente positivo para a mensagem como um todo. - -Isso atendeu às suas expectativas? Se não, por que você acha que o VADER encontrou mais características positivas do que negativas? - -No nível da entidade da mensagem, não há como destacar sentimentos particularmente positivos ou negativos na mensagem. Essa perda de detalhes pode ser irrelevante ou pode ser vital ao conduzir uma análise exploratória. Isso depende das necessidades de pesquisa de seu estudo. Por exemplo, identificar frases negativas em e-mails de outra forma adequados pode ser especialmente importante ao procurar explosões emocionais ou trocas abusivas que podem ocorrer muito raramente, mas revelam algo essencial sobre a natureza de um relacionamento. Se quisermos capturar esse nível de nuance, precisamos de um método para passar da análise do nível da mensagem para a análise do sentimento. - -Felizmente, o NLTK oferece uma coleção de ferramentas para dividir o texto em componentes menores. Os tokenizadores dividem as sequências de texto em pedaços menores, como frases. Alguns podem ainda dividir uma frase em partes específicas do discurso, como o substantivo, adjetivo e assim por diante. No nosso caso, usaremos o tokenizer english.pickle do NLTK para dividir os parágrafos em sentenças. - -Agora podemos reescrever o script de análise de sentimento para analisar cada frase separadamente: - -```python -# Abaixo está o código de análise de sentimento reescrito para uma análise por frase -# observe o novo módulo -- word_tokenize! -import nltk.data -from nltk.sentiment.vader import SentimentIntensityAnalyzer -from nltk import sentiment -from nltk import word_tokenize - -# Em seguida, inicializamos VADER para utilizá-lo em nosso script Python -sid = SentimentIntensityAnalyzer() - -# Vamos também incializar nossa função 'english.pickle' e atribuir a ela um nome curto - -tokenizer = nltk.data.load('tokenizers/punkt/english.pickle') - -message_text = '''It seems to me we are in the middle of no man's land with respect to the following: Opec production speculation, Mid east crisis and renewed tensions, US elections and what looks like a slowing economy (?), and no real weather anywhere in the world. I think it would be most prudent to play the markets from a very flat price position and try to day trade more aggressively. I have no intentions of outguessing Mr. Greenspan, the US. electorate, the Opec ministers and their new important roles, The Israeli and Palestinian leaders, and somewhat importantly, Mother Nature. Given that, and that we cannot afford to lose any more money, and that Var seems to be a problem, let's be as flat as possible. I'm ok with spread risk (not front to backs, but commodity spreads). The morning meetings are not inspiring, and I don't have a real feel for everyone's passion with respect to the markets. As such, I'd like to ask John N. to run the morning meetings on Mon. and Wed. Thanks. Jeff''' - -# O método de tokenização quebra o parágrafo em uma lista de frases (strings). Neste exemplo, observe que o tokenizer se confunde pela falta de espaçamento após o ponto final e acaba por quebrar as frases de forma equivocada. Como podemos consertar isso? - -sentences = tokenizer.tokenize(message_text) - -# Vamos adicionar um passo para percorrer a lista de frases, calcular e imprimir a pontuação de polaridade para cada uma. - -for sentence in sentences: - print(sentence) - scores = sid.polarity_scores(sentence) - for key in sorted(scores): - print('{0}: {1}, '.format(key, scores[key]), end='') - print() -``` - - -O resultado deve ser semelhante a este: - -```python -It seems to me we are in the middle of no man's land with respect to the following: Opec production speculation, Mid east crisis and renewed tensions, US elections and what looks like a slowing economy (? -compound: -0.5267, neg: 0.197, neu: 0.68, pos: 0.123, -), and no real weather anywhere in the world. -compound: -0.296, neg: 0.216, neu: 0.784, pos: 0.0, -I think it would be most prudent to play the markets from a very flat price position and try to day trade more aggressively. -compound: 0.0183, neg: 0.103, neu: 0.792, pos: 0.105, -I have no intentions of outguessing Mr. Greenspan, the US. -compound: -0.296, neg: 0.216, neu: 0.784, pos: 0.0, -electorate, the Opec ministers and their new important roles, The Israeli and Palestinian leaders, and somewhat importantly, Mother Nature. -compound: 0.4228, neg: 0.0, neu: 0.817, pos: 0.183, -Given that, and that we cannot afford to lose any more money, and that Var seems to be a problem, let's be as flat as possible. -compound: -0.1134, neg: 0.097, neu: 0.823, pos: 0.081, -I'm ok with spread risk (not front to backs, but commodity spreads). -compound: -0.0129, neg: 0.2, neu: 0.679, pos: 0.121, -The morning meetings are not inspiring, and I don't have a real feel for everyone's passion with respect to the markets. -compound: 0.5815, neg: 0.095, neu: 0.655, pos: 0.25, -As such, I'd like to ask John N. to run the morning meetings on Mon. -compound: 0.3612, neg: 0.0, neu: 0.848, pos: 0.152, -and Wed. -compound: 0.0, neg: 0.0, neu: 1.0, pos: 0.0, -Thanks. -compound: 0.4404, neg: 0.0, neu: 0.0, pos: 1.0, -Jeff -compound: 0.0, neg: 0.0, neu: 1.0, pos: 0.0, -``` - -Aqui, você notará uma visalização muito mais detalhada do sentimento neste e-mail. O VADER identifica com sucesso sentenças moderadas a fortemente negativas no e-mail, especialmente as principais descrições de crises. A análise no nível da frase permite que você identifique frases e tópicos específicos nos extremos do sentimento, o que pode ser útil mais tarde. - -Mas, mesmo nesse nível, o VADER também comete vários erros. A frase que começa com “As reuniões matinais não são inspiradoras” resulta em uma pontuação surpreendentemente positiva - talvez por causa de uma leitura incorreta dos termos “paixão” e “respeito”. - - Observe também que o ponto de interrogação no início do e-mail e o ponto de abreviação após *Mon* (Segunda-feira: *seg.*) próximo ao final fazem com que o tokenizador english.pickle quebre as frases por engano. Este é um risco constante de pontuação informal e complexa no texto. - -O que você nota sobre a distribuição dos scores de sentimento? Como você poderia coletá-los de uma maneira que o ajude a entender melhor seus dados e as questões de pesquisa de seu interesse? (Sinta-se à vontade para experimentar diferentes tipos de texto na variável *message_text* para ver como a ferramenta responde a diferentes tipos de construções de linguagem). O código que você acabou de escrever pode ser reaproveitado para qualquer texto. - -# Agradecimentos - -Meus sinceros agradecimentos a Justin Joque, Bibliotecário de Visualização da Biblioteca da Universidade de Michigan e do Digital Projects Studio, pelo apoio na formulação das ideias e abordagem por trás desta lição. Muito obrigado também a Adam Crymble, que forneceu diversas ideias e apoio durante todo o processo editorial. E obrigado a Anandi Silva Knuppel e Puteri Zarina Megat Khalid por seus comentários atenciosos. - -# Referências - -Barton, D., & Hall, N. (Eds.). (2000). Letter writing as a social practice (Vol. 9). John Benjamins Publishing. - -Hardin, J., Sarkis, G., & Urc, P. C. (2015). Network Analysis with the Enron Email Corpus. Journal of Statistics Education, 23:2. https://doi.org/10.1080/10691898.2015.11889734 - -Hutto, C.J. & Gilbert, E.E. (2014). VADER: A Parsimonious Rule-based Model for Sentiment Analysis of Social Media Text. Eighth International Conference on Weblogs and Social Media (ICWSM-14). Ann Arbor, MI, June 2014. https://www.aaai.org/ocs/index.php/ICWSM/ICWSM14/paper/viewPaper/8109 - -Klimt, B., & Yang, Y. (2004, July). Introducing the Enron Corpus. In CEAS. https://bklimt.com/papers/2004_klimt_ceas.pdf - -Klimt, B., & Yang, Y. (2004). The Enron corpus: A new dataset for email classification research. Machine learning: ECML 2004, 217-226. https://bklimt.com/papers/2004_klimt_ecml.pdf - -Tukey, J.W. (1977). Exploratory Data Analysis. Addison-Wesley Publishing Company - -Quinn, J. (2006, November 14). Ex-Enron man goes back into energy. Retrieved January 10, 2018, from http://www.telegraph.co.uk/finance/2950645/Ex-Enron-man-goes-back-into-energy.html +--- +title: Análise de sentimento para exploração de dados +layout: lesson +slug: analise-sentimento-exploracao-dados +date: 2018-01-15 +translation_date: 2021-06-14 +authors: +- Zoë Wilkinson Saldaña +reviewers: +- Anandi Silva Knuppel +- Puteri Zarina Megat Khalid +editors: +- Adam Crymble +translator: +- Caio Mello +translation-editor: +- Josir Cardoso Gomes +translation-reviewer: +- Bruno Ponne +- Ian Araujo +original: sentiment-analysis +review-ticket: https://github.com/programminghistorian/ph-submissions/issues/375 +difficulty: 2 +activity: analyzing +topics: [distant-reading] +abstract: "Nesta lição, você aprenderá a conduzir uma 'análise de sentimento' em textos e a interpretar os resultados. Esta é uma forma de análise exploratória de dados baseada no processamento de linguagem natural (PLN). Você aprenderá a instalar todos os softwares apropriados e a construir um programa reutilizável que pode ser aplicado aos seus próprios textos." +avatar_alt: Um homem sorridente e um homem rabugento +doi: 10.46430/phpt0017 +--- + +{% include toc.html %} + + +# Objetivos da lição + +Esta lição usa a análise de sentimento como base para uma análise exploratória de dados de um grande corpus textual. Portanto, é indicada para leitores com alguma experiência prévia em programação utilizando Python. Caso não tenha experiência com Python ou programação, a autora recomenda trabalhar nas primeiras lições da série “Introdução ao Python”. Ao final desta lição, você terá o conhecimento necessário para: + +* Elaborar questões de pesquisa que usem Processamento de Linguagem Natural (PLN) em um corpus textual. +* Utilizar Python e o Natural Language Processing Toolkit (NLTK) para gerar medidas de sentimento para um texto. +* Avaliar criticamente os resultados da análise de sentimento e ajustar os parâmetros e a metodologia conforme necessário. +* Identificar as próximas etapas para continuar o aprendizado sobre análise exploratória de dados e abordagens programáticas para dados qualitativos. + +Nota do tradutor: Devido à falta de uma biblioteca de código que funcione bem com os textos em português, optamos por manter os textos dos exercícios na língua original. + +## O que é análise exploratória de dados? + +A análise exploratória de dados é um conjunto de estratégias que trazem à tona características importantes num conjunto de dados que normalmente não são facilmente identificadas por meio da leitura tradicional. Com os insights da análise exploratória de dados em mãos, os pesquisadores podem tomar decisões mais adequadas ao selecionar um método ou abordagem para sua questão de pesquisa, ou até mesmo, identificar novas questões. + +Em 1977, o matemático John Tukey descreveu a análise exploratória de dados como uma forma de trabalho de detetive, sem a qual, os estudiosos muitas vezes perderiam descobertas interessantes, porém menos óbvias: + +> “A menos que o detetive encontre pistas, o juiz ou júri não terá como julgar. Caso a análise exploratória de dados não revele indícios, geralmente quantitativos, é provável que se considere não haver nada a ser comprovado. ” (Tukey 1977: 3, tradução livre) + +## Explorando Texto com Análise de Sentimento + +Quando confrontado com um corpus promissor, porém muito grande, como o pesquisador pode encontrar aquilo de mais importante, que pode levar às descobertas de pesquisa mais interessantes? + +O Processamento de Linguagem Natural (PLN) abrange uma ampla gama de técnicas que se baseiam na aplicação de métodos analíticos computacionais ao conteúdo textual, fornecendo meios de categorizar e quantificar o texto. Essas abordagens de PLN, que incluem análise de sentimento, podem ajudar os pesquisadores a explorar seus textos. Nas palavras de Tukey, podem ajudar o pesquisador a encontrar “pistas” sobre seus textos e “indícios” de que pode valer a pena investigar algo mais a fundo. + +Nesta lição, vamos nos concentrar numa ferramenta do kit de ferramentas do PLN: a análise de sentimento. A análise de sentimento busca quantificar a intensidade emocional de palavras e frases num texto. Algumas ferramentas de análise de sentimento levam em consideração, inclusive, o peso emocional de sinais linguísticos como a pontuação ou mesmo os emojis. As ferramentas de análise de sentimento geralmente processam uma unidade de texto (uma frase, um parágrafo, um livro, etc.) e produzem pontuações (“scores”, em inglês) ou classificações quantitativas para indicar se o algoritmo considera que aquele texto transmite emoções positivas ou negativas. Algumas ferramentas também podem quantificar o *grau de positividade* ou o *grau de negatividade* num texto. Combinada com outros métodos de PLN, como modelagem de tópicos (“topic modelling”, em inglês), a análise de sentimento fornece meios de caracterizar as emoções expressas sobre diferentes tópicos de uma conversa. Quando usada em conjunto com a análise de rede, pode lançar luz sobre as maneiras como os indivíduos interagem uns com os outros. Um pesquisador interessado em interações sobre um evento político pode usar a análise de sentimento para estudar como os indivíduos descrevem aquele evento nas redes sociais. Com os dados certos para inserir na ferramenta, pode ser possível fazer comparações regionais ou entender como diferentes grupos demográficos vêem o evento de forma diferente. Como a ferramenta pode processar muitos dados sequencialmente, é até possível analisar o sentimento em centenas de milhares ou até milhões de eventos discursivos. + +Para começar, esta lição fornece uma introdução à análise de sentimento tanto prática quanto crítica. Como qualquer ferramenta computacional, a análise de sentimento tem uma série de limitações e vieses que os pesquisadores devem levar em consideração. Os pesquisadores devem ser especialmente cautelosos ao fazer afirmações empíricas com base nos resultados da análise de sentimento. Você poderá ser melhor atendido usando a análise de sentimento em situações provisórias e exploratórias, como meio de orientar o processo de pesquisa. Ao manejar essas ferramentas com ceticismo e eficácia, é possível realizar um trabalho de detetive bastante notável. + +## Análise de grandes coleções de correspondência textual + +Correspondências escritas como cartas, e-mails, registros de bate-papo, tweets e históricos de mensagens de texto podem fornecer aos pesquisadores uma visão inestimável de seus autores. Os textos geralmente são ricos em emoções e informações que não estão disponibilizadas em nenhum outro lugar. Um pesquisador pode aprender sobre as opiniões que as pessoas, objetos de seu estudo, tiveram sobre vários tópicos ou sobre determinados eventos. Também poderia ser possível aprender sobre os relacionamentos que os indivíduos desenvolveram e mantiveram em organizações ou redes complexas. + +Embora metodologias como etnografia, leitura “manual” e análise do discurso ajudem os pesquisadores a analisar a correspondência histórica, esses métodos trazem desafios significativos quando o número de textos cresce de dezenas ou centenas para milhares ou milhões. A análise textual computacional fornece um conjunto de métodos para tornar visíveis as tendências, dinâmicas e relacionamentos que podem estar ocultos para o leitor humano por problemas de escala. Além disso, muitos métodos de computação produzem descobertas que podem ser expressas quantitativamente e que podem subsequentemente permitir que o pesquisador realize modelagem estatística, visualização de informações e aprendizado de máquina (Machine Learning) para fazer outras análises. + +## Estudo de caso: corpus de e-mails da Enron + +Este tutorial usa a correspondência de e-mail da falida empresa americana de energia Enron. A Enron ocultou uma ampla variedade de práticas contábeis ilegais até que uma investigação federal em 2001 a levou à falência. Na época, o Escândalo Enron foi o maior colapso de uma empresa de capital aberto da história. Em 2001, a empresa começou a mostrar sinais de problemas financeiros que não se alinhavam com as divulgações financeiras da empresa até aquele momento. As ações da Enron negociadas em bolsa caíram de US$ 90,75 em meados de 2000 para menos de um dólar em novembro de 2001, o que levou os acionistas a processar a empresa. Uma investigação subsequente da Comissão de Valores Mobiliários dos Estados Unidos (SEC) revelou que os executivos da Enron cometeram fraude e negligência contábil em grande escala. A Enron declarou falência em dezembro daquele ano. Nos anos que se seguiram, vários executivos enfrentaram condenações criminais por sua participação no escândalo. Para os pesquisadores, o Escândalo Enron resultou na criação de um dos maiores (e mais infames) corpus de texto por correspondência já coletado: + +> “Um dos escândalos corporativos mais infames das últimas décadas deixou curiosamente em seu rastro um dos conjuntos de dados mais valiosos disponíveis publicamente. No final de 2001, o encobrimento de fraude contábil da Enron Corporation levou à falência da gigante da energia. A Federal Energy Regulatory Commission requereu todos os registros de e-mail da Enron como parte da investigação que se seguiu. Nos dois anos seguintes, a comissão divulgou, escondeu e depois divulgou novamente o corpus de e-mail para o público após excluir e-mails que continham informações pessoais, como números de previdência social. O corpus da Enron contém e-mails cujos assuntos variam de planejamento de férias de fim de semana a tópicos de discussão de estratégia política, e continua sendo o único grande exemplo de conjuntos de dados de e-mail do mundo real disponíveis para pesquisa ”. (Hardin, Sarkis e Urc, 2015) + +Quando o conjunto de dados de e-mail da Enron - organizado e editado - foi lançado em 2004, os pesquisadores descobriram uma oportunidade sem precedentes: acesso direto à maneira espontânea e sem censura como os funcionários de uma empresa condenada se comunicavam. De repente, os pesquisadores tiveram acesso a como as pessoas se comunicam no trabalho em uma escala sem precedentes. Isso era importante para pesquisadores interessados ​​no caso especial do escândalo e colapso da Enron, mas também para pesquisadores interessados ​​em um amplo espectro de questões sobre a comunicação cotidiana no trabalho. + +Na década seguinte, centenas de novos estudos surgiram a partir desses e-mails, realizados em diversos campos como teoria das redes sociais, comunidade e detecção de anomalias, gênero e comunicação dentro das organizações, mudança de comportamento durante uma crise organizacional, insularidade e formação de comunidade. O uso da teoria das redes sociais nas humanidades oferece algumas possibilidades fascinantes, mas não é tão simples. + +Além da grande quantidade de mensagens incluídas (o corpus contém mais de 600.000 mensagens), o corpus de e-mails da Enron também inclui os metadados necessários para que os pesquisadores realizem uma série de questões de pesquisa. Assim como a presença de envelopes com endereços legíveis do remetente e do destinatário seria um excelente trunfo para pesquisadores de correspondências de cartas históricas, a presença de endereços de e-mail do remetente e do destinatário permite que os pesquisadores associem os e-mails a determinados indivíduos conhecidos dentro da corporação. Como alguns indivíduos tinham vários endereços de e-mail, ou mais de um indivíduo pode ter compartilhado o mesmo endereço, os metadados não são de uso muito óbvio, mas são potencialmente elucidativos. O restante do tutorial explicará como aplicar e interpretar a análise de sentimento de e-mails neste corpus. + +# Usando Python com o Natural Language Toolkit (NLTK) + +
    +Programando pela primeira vez? Esta lição é destinada a iniciantes, mas pode ser conveniente revisar outras lições de Python no Programming Historian. No entanto, observe que, embora muitas lições usem o Python versão 2, esta lição requer o Python versão 3. As instruções de instalação do Python 3 serão apresentadas a seguir. +
    + +Neste tutorial, Python será usado junto com algumas ferramentas do Natural Language Toolkit (NLTK) para gerar indicadores de sentimento a partir de transcrições de e-mail. Para fazer isso, você primeiro aprenderá como carregar os dados textuais no Python, selecionar as ferramentas de PLN apropriadas para análise de sentimento e escrever um algoritmo que calcula pontuações de sentimento para um determinado texto. Também exploraremos como ajustar seu algoritmo para melhor atender a seu objetivo de pesquisa. Ao final, você irá arquivar seu algoritmo de solução de problemas como um pacote de código conhecido como *função*, que poderá ser reutilizado e reaproveitado (inclusive na parte 2 deste tutorial) + +## Instalação + +Para continuar, as seguintes instalações serão necessárias: + +* Python 3 (preferivelmente 3.5 ou superior) - [Instruções para baixar e instalar Python](https://wiki.python.org/moin/BeginnersGuide/Download) +* NLTK (3.2.5 or superior) - [Instruções para baixar e instalar NLTK](https://www.nltk.org/install.html) + +## Primeiros passos com NLTK + +O Natural Language Toolkit (NLTK) é uma coleção de ferramentas Python reutilizáveis (também conhecido como uma biblioteca Python) que ajuda os pesquisadores a aplicar um conjunto de métodos computacionais a textos. As ferramentas variam desde métodos que ajudam a quebrar o texto em pedaços menores, alguns que identificam se uma palavra pertence a um determinado idioma, até aqueles textos de amostra que os pesquisadores podem usar para fins de treinamento e desenvolvimento (como o texto completo de *Moby Dick*). + +Se você precisar de ajuda para baixar e instalar o módulo para [Python 3](https://www.python.org/download/releases/3.0/), dê uma olhada na lição Instalando Módulos Python com pip de [Fred Gibbs](/en/lessons/installing-python-modules-pip) (em inglês). + +Em nosso caso, usaremos duas ferramentas NLTK em particular: + +* A ferramenta ["Análise de sentimento VADER"](https://www.nltk.org/_modules/nltk/sentiment/vader.html) (que gera pontuações de sentimento positivas, negativas e neutras para uma determinada entrada) +* A ferramenta de toquenização ‘word_tokenize’ (divide um texto grande em uma sequência de unidades menores, como frases ou palavras) + +Para usar VADER e word_tokenize, primeiro precisamos baixar e instalar alguns dados extras para NLTK. O NLTK é um kit de ferramentas muito grande e várias de suas ferramentas requerem uma segunda etapa de download para reunir a coleção de dados necessária (geralmente léxicos codificados) para funcionar corretamente. + +Para instalar a análise de sentimento e o tokenizador de palavras que usaremos neste tutorial, escreva um novo script em Python com as três linhas a seguir: + +```python +import nltk +nltk.download('vader_lexicon') +nltk.download('punkt') +``` +Você pode salvar este arquivo como `“installation.py”`. Se você não tiver certeza de como salvar e executar scripts em Python, reveja o tutorial sobre como configurar um 'Ambiente de Desenvolvimento Integrado' usando Python, substituindo o comando '% (python)% f' por '% (python3)% f' quando você chegar a esse parte no tutorial. + +* Configurando um ambiente de desenvolvimento integrado para Python no [Windows](/pt/licoes/instalacao-windows). +* Configurando um ambiente de desenvolvimento integrado para Python no [Mac](/pt/licoes/instalacao-mac). +* Configurando um ambiente de desenvolvimento integrado para Python no [Linux](/pt/licoes/instalacao-linux). + + Se você sabe como executar scripts Python, execute o arquivo usando Python 3. + + [VADER](https://www.nltk.org/_modules/nltk/sentiment/vader.html) (Valence Aware Dictionary and sEntiment Reasoner) é uma ferramenta de atribuição de intensidade de sentimento acrescentada ao NLTK em 2014. Ao contrário de outras técnicas que exigem treinamento em textos parecidos antes do uso, o VADER está pronto para ser usado sem qualquer configuração especial. O VADER é o único que faz distinções refinadas entre vários graus de positividade e negatividade. Por exemplo, VADER pontua “conforto” como moderadamente positivo e “euforia” como extremamente positivo. Ele também tenta capturar e pontuar características textuais comuns em texto online informal, como letras maiúsculas, pontos de exclamação e emoticons, conforme mostrado na tabela abaixo: + + {% include figure.html filename="analise-sentimento1.png" caption="Vader captura pequenas gradações de entusiasmo. (Hutto e Gilbert, 2014). **Versão do tradutor**. Acesse a original [aqui](/en/lessons/sentiment-analysis)" %} + + Como qualquer ferramenta de análise de texto, o VADER deve ser avaliado com criticidade e de forma contextualizada. O VADER foi desenvolvido em meados da década de 2010 principalmente para analisar microblogs em inglês e sites de rede social (especialmente o Twitter). Esse tipo de texto tende a ser muito mais informal do que o e-mail profissional, e contém padrões de linguagem e de uso de recursos que diferem dos padrões de 1999-2002 quando os e-mails da Enron foram escritos. No entanto, VADER também foi desenvolvido como uma ferramenta de análise de sentimento de propósito geral, e o estudo inicial dos autores mostra que ele se compara favoravelmente com ferramentas que foram treinadas para domínios específicos, usam léxicos especializados ou técnicas de aprendizado de máquina com muitos recursos (Hutto e Gilbert, 2014 ). A sensibilidade da ferramenta em relação aos graus de afeto se mostrou útil para descrever as sutilezas das emoções expressas nos e-mails profissionais - como pesquisadores, podemos estar especialmente interessados ​​em capturar os momentos em que a emoção surge em um texto formal. No entanto, a análise de sentimento continua se dedicando a encontrar soluções para capturar sentimentos complexos como ironia, sarcasmo e zombaria, quando o leitor médio seria capaz de fazer a distinção entre o texto literal e seu significado pretendido. + + Embora o VADER seja uma boa ferramenta de uso geral para textos contemporâneos e históricos em inglês, a ferramenta fornece apenas suporte nativo parcial para textos em outras línguas (detecta emojis / maiúsculas / etc., mas não a escolha de palavras). No entanto, os desenvolvedores incentivam os usuários a usar a tradução automática para pré-processar textos que não sejam em inglês e, em seguida, inserir os resultados no VADER. O "VADER demo" inclui um código para enviar o texto de entrada automaticamente para o serviço web ‘My Memory Translation Service’, (leitores avançados podem encontrar no [Github](https://github.com/cjhutto/vaderSentiment/blob/master/vaderSentiment/vaderSentiment.py) a partir da linha 554 - no momento da escrita deste artigo). A implementação deste método de tradução é mais indicada para usuários intermediários de Python. Você pode aprender mais sobre o estado da arte da análise de sentimento multilíngue (que infelizmente quase sempre requer uma etapa de tradução) em ["Análise de sentimento multilíngue: o estado da arte e comparação independente de técnicas"](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4981629/), de Kia Dashtipour, et al (2016). + + +## Calculando Sentimento para um Parágrafo + +Leia o seguinte trecho: + +>“Like you, I am getting very frustrated with this process. I am genuinely trying to be as reasonable as possible. I am not trying to “hold up” the deal at the last minute. I’m afraid that I am being asked to take a fairly large leap of faith after this company (I don’t mean the two of you – I mean Enron) has screwed me and the people who work for me.” + +Este é o primeiro parágrafo do e-mail de janeiro de 2012 de Timothy Belden para Louise Kitchen e John Lavorato sobre o “Acordo de Contratos de Trabalho”. Belden dirigiu os Serviços de Energia da Enron e mais tarde seria condenado por conspiração a fim de aumentar os custos de energia na Califórnia, o que levou a uma crise energética em todo o estado. + +Apesar do sentimento de frustração e ansiedade que você pode deduzir do parágrafo como um todo, observe a ambivalência das frases específicas dentro do parágrafo. Alguns parecem expressar esforços de boa fé, por exemplo: “Não estou tentando ‘atrasar’ o negócio” e “genuinamente tentando”. E, no entanto, há declarações negativas ainda mais fortes sobre "ficar frustrado", "Receio" e "esta empresa [...] ferrou comigo e com as pessoas que trabalham para mim". + +Vamos calcular as pontuações de sentimento para este e-mail usando o VADER para ter uma ideia do que a ferramenta pode fazer. Para começar, crie um novo diretório de trabalho (pasta) em seu computador chamado `“sentimento”` em algum lugar onde você possa encontrá-lo. Dentro dessa pasta, crie um novo arquivo de texto e salve-o como `“sentimento.py”`. É aqui que escreveremos o código para esta tarefa. + +Primeiro, temos que dizer ao Python onde o código NLTK para a análise de sentimento VADER está localizado. No início do nosso arquivo, importaremos o código do VADER: + +```python +# primeiro, importamos os módulos relevantes da biblioteca NLTK +from nltk.sentiment.vader import SentimentIntensityAnalyzer +``` + +Também devemos habilitar o Python para usar este código com nosso conjunto particular de código. Embora tenhamos todas as instruções de que precisamos na biblioteca NLTK, o Python gosta de agrupar essas instruções em um único `objeto` (nossa ferramenta de Análise de Sentimentos) que nosso programa pode acessar. *SentimentIntensityAnalyzer* é uma `classe`, que é um “modelo” que instrui o Python a construir um `objeto` com um conjunto especial de `funções` e `variáveis`. No nosso caso, queremos construir um único `objeto`: nosso analisador de sentimento, que segue este “modelo”. Para fazer isso, executamos *SentimentIntensityAnalyzer( )* e atribuímos a saída - nosso novo analisador de sentimento - a uma variável, que chamaremos de *‘sid’*. + +```python +# em seguida, inicializamos o VADER para que possamos usá-lo em nosso script Python +sid = SentimentIntensityAnalyzer() +``` + +Fazendo isso, fornecemos à nossa nova variável *sid* todos os recursos do código de análise de sentimento VADER. Assim, *sid* se tornou nossa ferramenta de análise de sentimento, mas com um nome mais curto. + +Em seguida, precisamos armazenar o texto que queremos analisar em um lugar que o *sid* possa acessar. Em Python, podemos armazenar uma única sequência de texto como uma variável de `string` (Nota do tradutor: Optamos por manter a palavra 'string' como no original em inglês para facilitar o entendimento de seu uso mais comum em códigos ['str']). + +```python +# a variável 'message_text' agora contém o texto que iremos analisar. +message_text = '''Like you, I am getting very frustrated with this process. I am genuinely trying to be as reasonable as possible. I am not trying to "hold up" the deal at the last minute. I'm afraid that I am being asked to take a fairly large leap of faith after this company (I don't mean the two of you -- I mean Enron) has screwed me and the people who work for me.''' +``` + +Como este texto inclui aspas e apóstrofos, é necessário circundar todo o texto com três aspas (“”” ou ’’’). Isso significa que quaisquer aspas e apóstrofos no texto serão reconhecidos como tal. Essa abordagem também mantém qualquer espaçamento que nosso texto já inclua. + +Agora você está pronto para processar o texto. + +Para fazer isso, o texto *(message_text)* deve ser inserido na ferramenta *(sid)* e o programa deve ser executado. Estamos interessados na "pontuação de polaridade" do analisador de sentimento, que nos dá uma pontuação positiva ou negativa. Este recurso é integrado ao VADER e pode ser solicitado sob demanda. + +Queremos ter certeza de capturar a saída de sid.polarity_scores () atribuindo-a a uma variável que chamaremos de *scores*: + +```python +print(message_text) + +# Utilizar método polarity_scores no sid e passar dentro dele o message_text produz um dicionário com pontuações negativas, neutras, positivas e compostas para o texto de entrada +scores = sid.polarity_scores(message_text) +``` + +Quando você executa este código, os resultados da análise de sentimento agora são armazenados no `dicionário` de *pontuação* (scores). Um dicionário, muito parecido com o tipo que você usa para pesquisar a definição de palavras, é uma variável que armazena informações conhecidas como 'valores' que são acessíveis dando ao programa a 'chave' para a entrada que você deseja ler. Isso significa que um dicionário como *scores* pode armazenar muitos `pares de valores-chave`. Para solicitar os dados, você só precisa conhecer as `chaves`. Mas não sabemos as `chaves`. Felizmente, Python nos dará uma lista de todas as `chaves`, classificadas em ordem alfabética, se usarmos a função `sorted(scores)`. + +Para imprimir cada `chave` e `valor` armazenado no dicionário, precisamos de um `for loop`, que aplica o mesmo código sequencialmente a todas as `chaves` do dicionário. + +Aqui está o código para imprimir cada par de `valores-chave` dentro da variável de pontuação (score): + +```python +# Aqui, percorremos as chaves contidas nas pontuações (pos, neu, neg e pontuações compostas) e imprimimos os pares de valores-chave na tela para digitação classificada (pontuações): +for key in sorted(scores): + print('{0}: {1}, '.format(key, scores[key]), end='') +``` + +Aqui está todo o código em um único programa: + +```python +# primeiro, importamos os módulos relevantes da biblioteca NLTK +from nltk.sentiment.vader import SentimentIntensityAnalyzer + +# a seguir, inicializamos o VADER para que possamos usá-lo em nosso script Python +sid = SentimentIntensityAnalyzer() + +# a variável 'message_text' agora contém o texto que iremos analisar. +message_text = '''Like you, I am getting very frustrated with this process. I am genuinely trying to be as reasonable as possible. I am not trying to "hold up" the deal at the last minute. I'm afraid that I am being asked to take a fairly large leap of faith after this company (I don't mean the two of you -- I mean Enron) has screwed me and the people who work for me.''' + +print(message_text) + +# Utilizar método polarity_scores no sid e passar dentro dele o message_text produz um dicionário com pontuações negativas, neutras, positivas e compostas para o texto de entrada +scores = sid.polarity_scores(message_text) + +# Aqui, percorremos as chaves contidas nas pontuações (pos, neu, neg e pontuações compostas) e imprimimos os pares de valores-chave na tela +for key in sorted(scores): + print('{0}: {1}, '.format(key, scores[key]), end='') +``` + +Salve seu arquivo Python. Agora estamos prontos para executar o código. Usando seu método preferido (ou seu Ambiente de Desenvolvimento Integrado ou a linha de comando), execute seu arquivo Python, `sentimento.py`. + +O resultado deve ser semelhante a este: + +```python +Like you, I am getting very frustrated with this process. I am genuinely trying to be as reasonable as possible. I am not trying to "hold up" the deal at the last minute. I'm afraid that I am being asked to take a fairly large leap of faith after this company (I don't mean the two of you -- I mean Enron) has screwed me and the people who work for me. + +compound: -0.3804, neg: 0.093, neu: 0.836, pos: 0.071, +``` +
    +Lembre-se de usar três aspas simples para envolver a string *message_text* acima. Se você usar aspas duplas, a string terminará mais cedo devido às aspas dentro do texto. +
    + +O VADER coleta e pontua palavras e características negativas, neutras e positivas (e é responsável por fatores como negação ao longo do caminho). Os valores “neg”, “neu” e “pos” descrevem a fração das pontuações ponderadas que se enquadram em cada categoria. VADER também soma todas as pontuações ponderadas para calcular um valor “composto” normalizado entre -1 e 1; este valor tenta descrever o efeito geral de todo o texto de fortemente negativo (-1) a fortemente positivo (1). Neste caso, a análise com VADER descreve a passagem como ligeiramente a moderadamente negativa (-0,3804). Podemos pensar nesse valor como uma estimativa da impressão geral de um leitor médio ao considerar o e-mail como um todo, apesar de alguma ambiguidade e ambivalência ao longo do caminho. + +Ao ler o texto, estaria inclinado a concordar com essa avaliação geral. O valor de saída de -0,3804 é negativo, mas não fortemente negativo. Os pesquisadores podem desejar definir um limite mínimo para positividade ou negatividade antes de declarar um texto definitivamente positivo ou negativo - por exemplo, a documentação oficial do VADER sugere um limite de -0,5 e 0,5, que este trecho específico não alcançaria (em outras palavras , este texto é negativo, mas não extremamente negativo). + +O que isso implica, para você, sobre a maneira como esse sentimento pode ser expresso em um contexto de e-mail profissional? Como você definiria seus valores limite quando o texto expressa emoções de maneira mais sutil ou cortês? Você acha que a análise de sentimento é uma ferramenta apropriada para nossa análise exploratória de dados? + +Desafio: tente substituir o conteúdo de *message_text* pelas seguintes cadeias de caracteres e execute novamente o programa. Não se esqueça de cercar cada texto com três aspas simples ao atribuí-lo à variável *message_text* (como em: *message_text* = ''' algumas palavras '''). Antes de executar o programa, tente adivinhar o resultado da análise de sentimento: positivo ou negativo? Quão positivo ou negativo? + +Texto 1: + +``` +Looks great. I think we should have a least 1 or 2 real time traders in Calgary. +``` + +Texto 2: + +``` +I think we are making great progress on the systems side. I would like to +set a deadline of November 10th to have a plan on all North American projects +(I'm ok if fundementals groups are excluded) that is signed off on by +commercial, Sally's world, and Beth's world. When I say signed off I mean +that I want signitures on a piece of paper that everyone is onside with the +plan for each project. If you don't agree don't sign. If certain projects +(ie. the gas plan) are not done yet then lay out a timeframe that the plan +will be complete. I want much more in the way of specifics about objectives +and timeframe. + +Thanks for everyone's hard work on this. +``` + +Experimente uma terceira vez com algum texto de uma de suas próprias fontes de pesquisa. Que resultados você obteve para cada um? Você concorda com os resultados? + +# Determine o escopo apropriado para e-mail + +Quando analisado por meio da ferramenta de análise de sentimento VADER, o texto produz um conjunto de pontuações positivas, neutras e negativas, que são então agregadas e dimensionadas como uma "pontuação composta". Embora seja útil saber em teoria, como esse método pode ser aplicado aos dados no exemplo da Enron - isto é, uma coleção de dados de e-mail e metadados? E o que isso pode nos dizer sobre as emoções, relacionamentos e mudanças ao longo do tempo dos funcionários da Enron? + +Nesta seção, apresentaremos a você o processo de seleção do escopo de análise para nossa ferramenta de análise de sentimento. Considere os seguintes dados brutos pertencentes a um e-mail de 3 de outubro de 2000 escrito por Jeffrey Shankman, então presidente de mercados globais da Enron (Quinn, 2006): + +``` +Message-ID: <3764632.1075857565248.JavaMail.evans@thyme> +Date: Mon, 23 Oct 2000 09:14:00 -0700 (PDT) +From: jeffrey.shankman@enron.com +To: john.nowlan@enron.com, don.schroeder@enron.com, david.botchlett@enron.com, + chris.mahoney@enron.com, ross.koller@enron.com +Subject: +Mime-Version: 1.0 +Content-Type: text/plain; charset=us-ascii +Content-Transfer-Encoding: 7bit +X-From: Jeffrey A Shankman +X-To: John L Nowlan, Don Schroeder, David J Botchlett, Chris Mahoney, Ross Koller +X-cc: +X-bcc: +X-Folder: \Jeffrey_Shankman_Jun2001\Notes Folders\Sent +X-Origin: Shankman-J +X-FileName: jshankm.nsf + +It seems to me we are in the middle of no man's land with respect to the +following: Opec production speculation, Mid east crisis and renewed +tensions, US elections and what looks like a slowing economy (?), and no +real weather anywhere in the world. I think it would be most prudent to play +the markets from a very flat price position and try to day trade more +aggressively. I have no intentions of outguessing Mr. Greenspan, the US. +electorate, the Opec ministers and their new important roles, The Israeli and +Palestinian leaders, and somewhat importantly, Mother Nature. Given that, +and that we cannot afford to lose any more money, and that Var seems to be a +problem, let's be as flat as possible. I'm ok with spread risk (not front to +backs, but commodity spreads). + + +The morning meetings are not inspiring, and I don't have a real feel for +everyone's passion with respect to the markets. As such, I'd like to ask +John N. to run the morning meetings on Mon. and Wed. + + +Thanks. Jeff +``` + +No texto da mensagem do e-mail, Shankman traça uma estratégia corporativa para avançar no que ele percebe como um contexto geopolítico ambíguo. A mensagem descreve uma série de situações difíceis, bem como exasperação ("As reuniões matinais não são inspiradoras") e incerteza ("Não tenho um sentimento real de paixão de todos"). Ao mesmo tempo, Shankman descreve um conjunto de etapas de ação junto com pedidos educados ("Eu gostaria de pedir ...") e expressões de gratidão ("Obrigado"). + +Antes de prosseguirmos, pare um minuto para refletir sobre a mensagem. Como você acha que um leitor típico descreveria a intensidade emocional deste e-mail? Considerando o que você sabe agora sobre VADER, que proporção de positividade, negatividade e neutralidade você espera que a ferramenta de análise de sentimento encontre na mensagem? Finalmente, o que você acha que a pontuação composta irá sugerir sobre o efeito geral na mensagem? + +Como discutimos acima, a análise de sentimento não fornece uma saída objetiva, mas sim indicadores de orientação que refletem nossa escolha e calibração de ferramentas analíticas. Talvez o elemento mais importante da calibração seja selecionar o escopo do texto que está sendo analisado, ou seja, quanto de uma mensagem colocamos na ferramenta de uma vez. Em nosso caso, podemos determinar o escopo da análise decidindo entre analisar a mensagem inteira como uma única unidade ou, em vez disso, dividir a mensagem em unidades menores como frases e analisar cada uma separadamente. + +Primeiro, vamos considerar uma abordagem no nível da mensagem, na qual analisamos a mensagem como um único bloco: + +```python +# Continue com o mesmo código da seção anterior, mas substitua a variável *message_text* pelo novo texto do e-mail: + +message_text = '''It seems to me we are in the middle of no man's land with respect to the following: Opec production speculation, Mid east crisis and renewed tensions, US elections and what looks like a slowing economy (?), and no real weather anywhere in the world. I think it would be most prudent to play the markets from a very flat price position and try to day trade more aggressively. I have no intentions of outguessing Mr. Greenspan, the US. electorate, the Opec ministers and their new important roles, The Israeli and Palestinian leaders, and somewhat importantly, Mother Nature. Given that, and that we cannot afford to lose any more money, and that Var seems to be a problem, let's be as flat as possible. I'm ok with spread risk (not front to backs, but commodity spreads). The morning meetings are not inspiring, and I don't have a real feel for everyone's passion with respect to the markets. As such, I'd like to ask John N. to run the morning meetings on Mon. and Wed. Thanks. Jeff''' + +``` + +Substitua `sentimento.py` pelo código acima, salve-o e execute-o. A saída deve ser semelhante a esta: + +```python +It seems to me we are in the middle of no man's land with respect to the following: Opec production speculation, Mid east crisis and renewed tensions, US elections and what looks like a slowing economy (?), and no real weather anywhere in the world. I think it would be most prudent to play the markets from a very flat price position and try to day trade more aggressively. I have no intentions of outguessing Mr. Greenspan, the US. electorate, the Opec ministers and their new important roles, The Israeli and Palestinian leaders, and somewhat importantly, Mother Nature. Given that, and that we cannot afford to lose any more money, and that Var seems to be a problem, let's be as flat as possible. I'm ok with spread risk (not front to backs, but commodity spreads). The morning meetings are not inspiring, and I don't have a real feel for everyone's passion with respect to the markets. As such, I'd like to ask John N. to run the morning meetings on Mon. and Wed. Thanks. Jeff +compound: 0.889, neg: 0.096, neu: 0.765, pos: 0.14, +``` + +Aqui você pode ver que, ao analisar o e-mail como um todo, VADER retorna valores que sugerem que a mensagem é principalmente neutra (neu: 0,765), mas que mais recursos parecem ser positivos (pos: 0,14) em vez de negativos (0,096). VADER calcula uma pontuação geral de sentimento de 0,889 para a mensagem (em uma escala de -1 a 1), o que sugere um efeito fortemente positivo para a mensagem como um todo. + +Isso atendeu às suas expectativas? Se não, por que você acha que o VADER encontrou mais características positivas do que negativas? + +No nível da entidade da mensagem, não há como destacar sentimentos particularmente positivos ou negativos na mensagem. Essa perda de detalhes pode ser irrelevante ou pode ser vital ao conduzir uma análise exploratória. Isso depende das necessidades de pesquisa de seu estudo. Por exemplo, identificar frases negativas em e-mails de outra forma adequados pode ser especialmente importante ao procurar explosões emocionais ou trocas abusivas que podem ocorrer muito raramente, mas revelam algo essencial sobre a natureza de um relacionamento. Se quisermos capturar esse nível de nuance, precisamos de um método para passar da análise do nível da mensagem para a análise do sentimento. + +Felizmente, o NLTK oferece uma coleção de ferramentas para dividir o texto em componentes menores. Os tokenizadores dividem as sequências de texto em pedaços menores, como frases. Alguns podem ainda dividir uma frase em partes específicas do discurso, como o substantivo, adjetivo e assim por diante. No nosso caso, usaremos o tokenizer english.pickle do NLTK para dividir os parágrafos em sentenças. + +Agora podemos reescrever o script de análise de sentimento para analisar cada frase separadamente: + +```python +# Abaixo está o código de análise de sentimento reescrito para uma análise por frase +# observe o novo módulo -- word_tokenize! +import nltk.data +from nltk.sentiment.vader import SentimentIntensityAnalyzer +from nltk import sentiment +from nltk import word_tokenize + +# Em seguida, inicializamos VADER para utilizá-lo em nosso script Python +sid = SentimentIntensityAnalyzer() + +# Vamos também incializar nossa função 'english.pickle' e atribuir a ela um nome curto + +tokenizer = nltk.data.load('tokenizers/punkt/english.pickle') + +message_text = '''It seems to me we are in the middle of no man's land with respect to the following: Opec production speculation, Mid east crisis and renewed tensions, US elections and what looks like a slowing economy (?), and no real weather anywhere in the world. I think it would be most prudent to play the markets from a very flat price position and try to day trade more aggressively. I have no intentions of outguessing Mr. Greenspan, the US. electorate, the Opec ministers and their new important roles, The Israeli and Palestinian leaders, and somewhat importantly, Mother Nature. Given that, and that we cannot afford to lose any more money, and that Var seems to be a problem, let's be as flat as possible. I'm ok with spread risk (not front to backs, but commodity spreads). The morning meetings are not inspiring, and I don't have a real feel for everyone's passion with respect to the markets. As such, I'd like to ask John N. to run the morning meetings on Mon. and Wed. Thanks. Jeff''' + +# O método de tokenização quebra o parágrafo em uma lista de frases (strings). Neste exemplo, observe que o tokenizer se confunde pela falta de espaçamento após o ponto final e acaba por quebrar as frases de forma equivocada. Como podemos consertar isso? + +sentences = tokenizer.tokenize(message_text) + +# Vamos adicionar um passo para percorrer a lista de frases, calcular e imprimir a pontuação de polaridade para cada uma. + +for sentence in sentences: + print(sentence) + scores = sid.polarity_scores(sentence) + for key in sorted(scores): + print('{0}: {1}, '.format(key, scores[key]), end='') + print() +``` + + +O resultado deve ser semelhante a este: + +```python +It seems to me we are in the middle of no man's land with respect to the following: Opec production speculation, Mid east crisis and renewed tensions, US elections and what looks like a slowing economy (? +compound: -0.5267, neg: 0.197, neu: 0.68, pos: 0.123, +), and no real weather anywhere in the world. +compound: -0.296, neg: 0.216, neu: 0.784, pos: 0.0, +I think it would be most prudent to play the markets from a very flat price position and try to day trade more aggressively. +compound: 0.0183, neg: 0.103, neu: 0.792, pos: 0.105, +I have no intentions of outguessing Mr. Greenspan, the US. +compound: -0.296, neg: 0.216, neu: 0.784, pos: 0.0, +electorate, the Opec ministers and their new important roles, The Israeli and Palestinian leaders, and somewhat importantly, Mother Nature. +compound: 0.4228, neg: 0.0, neu: 0.817, pos: 0.183, +Given that, and that we cannot afford to lose any more money, and that Var seems to be a problem, let's be as flat as possible. +compound: -0.1134, neg: 0.097, neu: 0.823, pos: 0.081, +I'm ok with spread risk (not front to backs, but commodity spreads). +compound: -0.0129, neg: 0.2, neu: 0.679, pos: 0.121, +The morning meetings are not inspiring, and I don't have a real feel for everyone's passion with respect to the markets. +compound: 0.5815, neg: 0.095, neu: 0.655, pos: 0.25, +As such, I'd like to ask John N. to run the morning meetings on Mon. +compound: 0.3612, neg: 0.0, neu: 0.848, pos: 0.152, +and Wed. +compound: 0.0, neg: 0.0, neu: 1.0, pos: 0.0, +Thanks. +compound: 0.4404, neg: 0.0, neu: 0.0, pos: 1.0, +Jeff +compound: 0.0, neg: 0.0, neu: 1.0, pos: 0.0, +``` + +Aqui, você notará uma visalização muito mais detalhada do sentimento neste e-mail. O VADER identifica com sucesso sentenças moderadas a fortemente negativas no e-mail, especialmente as principais descrições de crises. A análise no nível da frase permite que você identifique frases e tópicos específicos nos extremos do sentimento, o que pode ser útil mais tarde. + +Mas, mesmo nesse nível, o VADER também comete vários erros. A frase que começa com “As reuniões matinais não são inspiradoras” resulta em uma pontuação surpreendentemente positiva - talvez por causa de uma leitura incorreta dos termos “paixão” e “respeito”. + + Observe também que o ponto de interrogação no início do e-mail e o ponto de abreviação após *Mon* (Segunda-feira: *seg.*) próximo ao final fazem com que o tokenizador english.pickle quebre as frases por engano. Este é um risco constante de pontuação informal e complexa no texto. + +O que você nota sobre a distribuição dos scores de sentimento? Como você poderia coletá-los de uma maneira que o ajude a entender melhor seus dados e as questões de pesquisa de seu interesse? (Sinta-se à vontade para experimentar diferentes tipos de texto na variável *message_text* para ver como a ferramenta responde a diferentes tipos de construções de linguagem). O código que você acabou de escrever pode ser reaproveitado para qualquer texto. + +# Agradecimentos + +Meus sinceros agradecimentos a Justin Joque, Bibliotecário de Visualização da Biblioteca da Universidade de Michigan e do Digital Projects Studio, pelo apoio na formulação das ideias e abordagem por trás desta lição. Muito obrigado também a Adam Crymble, que forneceu diversas ideias e apoio durante todo o processo editorial. E obrigado a Anandi Silva Knuppel e Puteri Zarina Megat Khalid por seus comentários atenciosos. + +# Referências + +Barton, D., & Hall, N. (Eds.). (2000). Letter writing as a social practice (Vol. 9). John Benjamins Publishing. + +Hardin, J., Sarkis, G., & Urc, P. C. (2015). Network Analysis with the Enron Email Corpus. Journal of Statistics Education, 23:2. https://doi.org/10.1080/10691898.2015.11889734 + +Hutto, C.J. & Gilbert, E.E. (2014). VADER: A Parsimonious Rule-based Model for Sentiment Analysis of Social Media Text. Eighth International Conference on Weblogs and Social Media (ICWSM-14). Ann Arbor, MI, June 2014. https://www.aaai.org/ocs/index.php/ICWSM/ICWSM14/paper/viewPaper/8109 + +Klimt, B., & Yang, Y. (2004, July). Introducing the Enron Corpus. In CEAS. https://bklimt.com/papers/2004_klimt_ceas.pdf + +Klimt, B., & Yang, Y. (2004). The Enron corpus: A new dataset for email classification research. Machine learning: ECML 2004, 217-226. https://bklimt.com/papers/2004_klimt_ecml.pdf + +Tukey, J.W. (1977). Exploratory Data Analysis. Addison-Wesley Publishing Company + +Quinn, J. (2006, November 14). Ex-Enron man goes back into energy. Retrieved January 10, 2018, from https://www.telegraph.co.uk/finance/2950645/Ex-Enron-man-goes-back-into-energy.html diff --git a/pt/licoes/autoria-sustentavel-texto-simples-pandoc-markdown.md b/pt/licoes/autoria-sustentavel-texto-simples-pandoc-markdown.md index 411d8206b0..f95b2a4771 100644 --- a/pt/licoes/autoria-sustentavel-texto-simples-pandoc-markdown.md +++ b/pt/licoes/autoria-sustentavel-texto-simples-pandoc-markdown.md @@ -1,352 +1,352 @@ ---- -title: Autoria Sustentável em Texto Simples usando Pandoc e Markdown -layout: lesson -collection: lessons -slug: autoria-sustentavel-texto-simples-pandoc-markdown -date: 2014-03-19 -translation_date: 2022-11-27 -authors: -- Dennis Tenen -- Grant Wythoff -lesson-testers: -- Pao-Chuan Ma -tested-date: 2021-06-10 -editors: -- Fred Gibbs -translator: -- Gabriela Kucuruza -translation-editor: -- Jimmy Medeiros -translation-reviewer: -- Daniel Bonatto Seco -- André Salvo -difficulty: 2 -activity: sustaining -topics: [website, data-management] -abstract: "Neste tutorial, você aprenderá primeiro o básico do Markdown - uma sintaxe de marcação fácil de ler e escrever para texto simples - bem como Pandoc, uma ferramenta de linha de comando que converte texto simples em vários tipos de ficheiros formatados: PDF, docx, HTML, LaTeX, apresentação de slides e muito mais." -exclude_from_check: - - reviewers - - review-ticket -original: sustainable-authorship-in-plain-text-using-pandoc-and-markdown -avatar_alt: Um homem trabalhando numa mesa de desenho -doi: 10.46430/phpt0036 ---- - -{% include toc.html %} - -{% include figure.html filename="lexoriter.jpg" caption="" %} - -## Objetivos - -Neste tutorial, você aprenderá primeiro o básico do Markdown - uma sintaxe de marcação fácil de ler e de escrever para texto simples - assim como o [Pandoc](https://pandoc.org/), uma ferramenta de linha de comando que converte texto simples em vários tipos de ficheiro belamente formatados: PDF, docx, HTML, LaTeX, apresentações de slides e muito mais.[^1] Com o Pandoc como sua ferramenta de composição digital, você pode usar a sintaxe Markdown para adicionar figuras, bibliografia, formatação e alterar facilmente os estilos de citação de Chicago para MLA (por exemplo), todos usando texto simples. - -Este tutorial não pressupõe nenhum conhecimento técnico prévio, mas aumenta com a experiência, uma vez que vamos sugerir técnicas mais avançadas ao final de cada seção. Elas estão claramente marcadas e podem ser revisitadas após alguma prática e experimentação. - -Ao invés de seguir esse tutorial de maneira mecânica, recomendamos que se esforce para entender as soluções oferecidas aqui como uma _metodologia_, que pode precisar de adaptações para se adequar ao seu ambiente e fluxo de trabalho. A instalação das ferramentas necessárias apresenta talvez a maior barreira à participação. Tenha tempo e paciência suficientes para instalar tudo corretamente, ou faça isso com um/a colega que tenha uma configuração semelhante e ajudem-se mutuamente. Consulte a seção [Recursos Úteis](/pt/licoes/autoria-sustentavel-texto-simples-pandoc-markdown#recursos-uteis) abaixo se ficar preso.[^2] - -## Filosofia -Escrever, armazenar e recuperar documentos são atividades centrais para o fluxo de trabalho de pesquisa das humanidades. Mesmo assim, muitos autores baseiam suas práticas em ferramentas e formatos proprietários que, às vezes, ficam aquém dos requisitos mais básicos da escrita acadêmica. Talvez possa se lembrar de certa frustração com a fragilidade de notas de rodapé, bibliografias, figuras e rascunhos de livros escritos em Microsoft Word ou Google Docs. No entanto, a maioria dos periódicos ainda insiste em submissões no formato .docx. - -Mais do que causar frustração pessoal, essa dependência de ferramentas e de formatos proprietários tem implicações negativas de longo prazo para a comunidade acadêmica. Em tal ambiente, os periódicos devem terceirizar a composição, alienando os autores dos contextos materiais de publicação e adicionando outras barreiras desnecessárias à circulação irrestrita do conhecimento.[^3] - -Quando se usa MS Word, Google Docs ou Open Office para escrever documentos, o que se vê não é o que se obtém. Embaixo da camada visível de palavras, frases e parágrafos, encontra-se uma complicada camada de código compreensível apenas para as máquinas. Por causa dessa camada oculta, os ficheiros .docx e .pdf dependem de ferramentas proprietárias para serem visualizados corretamente. Esses documentos são difíceis de pesquisar, imprimir e converter em outros formatos de ficheiros. - -Além disso, o tempo gasto formatando documentos em MS Word ou Open Office é perdido, pois toda essa formatação é removida pelo editor durante a submissão. Tanto os autores quanto os editores se beneficiariam da troca de ficheiros com formatação mínima, deixando a composição tipográfica para o estágio final de composição do processo de publicação. - -Aqui é onde o Markdown brilha. Markdown é uma sintaxe para marcar explicitamente elementos semânticos dentro de um documento, não em alguma camada oculta. A ideia é identificar as unidades que são significativas para humanos, como títulos, seções, subseções, notas de rodapé e ilustrações. No mínimo, os seus ficheiros sempre permanecerão compreensíveis **para você**, mesmo se o editor de texto que estiver usando parar de funcionar ou "sair do mercado". - -Escrever dessa forma libera o autor da ferramenta. Markdown pode ser escrito em qualquer editor de texto simples e oferece um rico ecossistema de software que pode renderizar o texto em documentos belamente formatados. Por esta razão, o Markdown está atualmente passando por um período de crescimento, não apenas como meio para escrever artigos acadêmicos, mas como uma convenção para edição online em geral. - -Os editores de texto simples de uso geral populares incluem [Atom](https://atom.io/) (todas as plataformas) e [Notepad ++](https://notepad-plus-plus.org/) (somente para Windows). - -É importante entender que o Markdown é apenas uma convenção. Os ficheiros Markdown são armazenados como texto simples, aumentando ainda mais a flexibilidade do formato. Ficheiros de texto simples existem desde a máquina de escrever eletrônica. A longevidade deste padrão torna, de modo inerente, o texto simples mais sustentável e estável do que os formatos proprietários. Enquanto os ficheiros produzidos até dez anos atrás no Microsoft Word e no Apple Pages, podem causar problemas significativos quando abertos nas versões mais recentes, ainda é possível abrir um ficheiro escrito em qualquer editor de texto simples “morto” nas últimas décadas: AlphaPlus, Perfect Writer, Text Wizard, Spellbinder, WordStar ou o favorito de Isaac Asimov, SCRIPSIT 2.0 , feito por Radio Shack. Escrever em texto simples garante que seus ficheiros permanecerão legíveis daqui a dez, quinze, vinte anos. Neste tutorial, descrevemos um fluxo de trabalho que libera o pesquisador de softwares proprietários de processamento de texto e de formatos de ficheiro frágeis. - -Agora é possível escrever uma ampla variedade de documentos em um formato - artigos, postagens de blog, wikis, programas de estudos e cartas de recomendação - usando o mesmo conjunto de ferramentas e técnicas para pesquisar, descobrir, fazer backup e distribuir nossos materiais. Suas notas, entradas de blog, documentação de código e wikis podem ser criados no Markdown. Cada vez mais, muitas plataformas como WordPress, Reddit e GitHub suportam a autoria Markdown nativamente. A longo prazo, sua pesquisa se beneficiará desses fluxos de trabalho unificados, tornando mais fácil salvar, pesquisar, compartilhar e organizar seus materiais. - -## Princípios - -Inspirados pelas melhores práticas em uma variedade de disciplinas, nós fomos guiados pelos seguintes princípios: - -1. _Sustentabilidade_. O texto simples garante tanto transparência, como atende aos padrões de preservação de longo prazo. O Word pode seguir o caminho do [Word Perfect](https://pt.wikipedia.org/wiki/WordPerfect) no futuro, mas o texto simples sempre permanecerá fácil de ler, catalogar, extrair e transformar. Além disso, o texto simples permite um controle fácil e poderoso do versionamento do documento, o que é útil na colaboração e na organização de rascunhos. Seus ficheiros de texto simples estarão acessíveis em telefones celulares, tablets ou, talvez, em um terminal de baixa potência em alguma biblioteca remota. O texto simples é compatível com versões anteriores e à prova de futuro. Qualquer que seja o software ou hardware que vier a seguir, ele será capaz de entender os seus ficheiros de texto simples. -2. _Preferência por formatos legíveis por humanos_. Quando escrevemos no Word ou no Google Docs, o que vemos não é o que obtemos. O ficheiro .doc contem uma formatação oculta de caracteres gerados automaticamente, criando uma camada de composição tipográfica ofuscada que é difícil para o usuário solucionar. Algo tão simples como colar uma imagem ou texto do navegador pode ter efeitos imprevisíveis na formatação do seu documento. -3. _Separação entre forma e conteúdo_. Escrever e formatar ao mesmo tempo é distrativo. A ideia é escrever primeiro e formatar depois, o mais próximo possível da hora da publicação. Uma tarefa como mudar da formatação Chicago para MLA deve ser simples. Os editores de periódicos que desejam economizar tempo na formatação desnecessária e na edição de cópias devem ser capazes de fornecer aos seus autores um modelo de formatação que cuida dos detalhes da composição tipográfica. -4. _Apoio ao aparato acadêmico_. O fluxo de trabalho precisa lidar com notas de rodapé, figuras, caracteres internacionais e bibliografias com elegância. -5. _Independência de plataforma_. Na medida em que os vetores de publicação se multiplicam, precisamos ser capazes de gerar uma multiplicidade de formatos, incluindo projeção de slides, impressão, web e celular. Idealmente, gostaríamos de poder gerar os formatos mais comuns sem quebrar as dependências bibliográficas. Nosso fluxo de trabalho também precisa ser portátil - seria bom poder copiar uma pasta para um pen drive e saber que ela contém tudo o que é necessário para publicação de estudos. Escrever em texto simples significa que é possível facilmente compartilhar, editar e arquivar seus documentos em praticamente qualquer ambiente. Por exemplo, um programa escrito em Markdown pode ser salvo como PDF, impresso como um folheto e convertido em HTML para a web, tudo a partir do mesmo ficheiro. Tanto os documentos da web quanto os impressos devem ser publicados da mesma fonte e ter aparência semelhante, preservando o layout lógico do material. - -Mardown e LaTeX cumprem todos esses requisitos. Nós escolhemos Markdown (e não LaTeX) porque ele oferece a sintaxe mais leve e organizada (por isso, _mark down_) e porque quando unido com Pandoc, permite maior flexibilidade nas saídas (incluindo ficheiros .docs e .tex).[^4] - -## Requisitos de Software - -Nós omitimos propositalmente alguns dos detalhes menores vinculados à plataforma ou ao sistema operacional de instalação do software listado abaixo. Por exemplo, não faz sentido fornecer instruções de instalação para o LaTeX, quando as instruções online para o seu sistema operacional serão sempre mais atuais e completas. Da mesma forma, o processo de instalação do Pandoc é melhor explorado pesquisando por “instalar o Pandoc” no Google, com o provável primeiro resultado sendo a página inicial do Pandoc. - - - **Editor de texto simples**. Entrar no mundo de edição de texto simples expande dramaticamente as suas escolhas de ferramentas inovadoras de autoria. Pesquise online por "editor de texto markdown" e experimente as opções. Não importa qual for usada, contanto que seja explicitamente um editor de texto simples, como Atom e Notepad++. Lembre-se de que nós não estamos presos a ferramenta, é possível trocar de editor a qualquer momento. - - **Terminal de linha de comando**. Trabalhar na "linha de comando" equivale a escrever comandos no terminal. Em um Mac, apenas pesquise por "Terminal". No Windows, use o [PowerShell](https://pt.wikipedia.org/wiki/PowerShell). Usuários de Linux provavelmente já devem estar familiarizados com seus terminais. Nós iremos cobrir o básico de como procurar e usar a linha de comando abaixo. - - **Pandoc**. Instruções de instalação detalhadas e para plataformas específicas estão disponíveis no [site do Pandoc](https://pandoc.org/installing.html). _A instalação do Pandoc na sua máquina é crucial para esse tutorial_, então tome o seu tempo navegando pelas instruções. O Pandoc foi criado e é mantido por John MacFarlane, Professor de Filosofia na Universidade da Califórnia, Berkeley. Isso é a humanidade computacional em sua melhor expressão e servirá como o motor de nosso fluxo de trabalho. Com o Pandoc, será possível compilar texto e bibliografia em documentos belamente formatados e flexíveis. Depois de seguir as instruções de instalação, verifique se o Pandoc está instalado digitando `pandoc --version` na linha de comando. Presumimos que a sua versão seja ao menos a versão 1.12.3, lançada em janeiro de 2014. - -Os próximos dois softwares são recomendados, mas não requisitados para realizar esse tutorial. - -* **Zotero ou Endnote**. Softwares de referência bibliográfica como Zotero e Endnote são ferramentas indispensáveis para organizar e formatar citações em um artigo de pesquisa. Esses programas podem exportar suas bibliotecas como um ficheiro BibTeX (sobre o qual você aprenderá mais no Caso 2 a seguir). Este ficheiro, por si só um documento de texto simples formatado com todas as suas citações, permitirá que você cite referências de forma rápida e fácil usando `@tags`. Deve-se notar que também é possível digitar todas as suas referências bibliográficas à mão, usando [nossa bibliografia](https://github.com/dh-notes/pandoc-workflow/blob/master/pandoctut.bib) como modelo. -* **LaTeX**. Instruções de instalação detalhadas e específicas da plataforma estão disponíveis no [site do Pandoc](https://pandoc.org/installing.html). Embora o LaTeX não seja abordado neste tutorial, ele é usado pelo Pandoc para a criação de .pdf. Usuários avançados frequentemente irão converter para LaTeX diretamente para ter um controle mais minucioso sobre a composição do .pdf. Os iniciantes podem querer pular esta etapa. Caso contrário, digite`latex -v` para ver se o LaTeX está instalado corretamente (você receberá um erro se não estiver e algumas informações sobre a versão, se estiver). - -## Básico do Markdown - -O Markdown é uma convenção para estruturar os seus documentos de texto simples semanticamente. A ideia é identificar estruturas lógicas no seu documento (títulos, seções, subseções, notas de rodapé, etc.), marcá-las com caracteres discretos e então "compilar" o texto resultante com um interpretador de composição tipográfica que formatará o documento consistentemente, de acordo com um estilo específico. - -As convenções de Markdown vêm em várias “versões” projetadas para uso em contextos específicos, como blogs, wikis ou repositórios de código. O do Markdown usado pelo [Pandoc](https://pandoc.org/MANUAL.html#pandocs-markdown) é voltado para uso acadêmico. Suas convenções são descritas na página Markdown do Pandoc. Suas convenções incluem o bloco “[YAML](https://pandoc.org/MANUAL.html#extension-yaml_metadata_block)”, que contém alguns metadados úteis. - -Vamos agora criar um documento simples no Markdown. Abra um editor de texto simples de sua escolha e comece a digitar. Deve ser assim: - -``` ---- -title: Fluxo de Trabalho em Texto Simples -author: Gabriela Domingues -date: 20 de janeiro de 2014 -fontfamily: times ---- -``` - -A versão do Markdown usada pelo Pandoc armazena cada um dos valores acima, e "imprime-os" na localização apropriada do seu documento de saída quando o documento estiver pronto para a composição tipográfica. Aprenderemos mais tarde a adicionar outros campos mais poderosos ao bloco "YAML". Por enquanto, vamos fingir que estamos escrevendo um artigo que contém três seções, cada uma subdividida em duas subseções. Deixe uma linha em branco após os três últimos traços no bloco "YAML" e cole o seguinte: - -``` - -# Seção 1 - -## Subseção 1.1 -Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. - -O parágrafo seguinte deve começar sem recuo: - -## Subseção 1.2 -Sed ut perspiciatis unde omnis iste natus error sit voluptatem accusantium doloremque laudantium, totam rem aperiam, eaque ipsa quae ab illo inventore veritatis et quasi architecto beatae vitae dicta sunt explicabo. - -# Seção 2 - -## Subseção 2.1 -``` - -Vá em frente e escreva um texto simulado também. Espaços em branco são significativos em Markdown: não recue os seus parágrafos. Ao invés disso, separe parágrafos usando uma linha vazia. Linhas vazias também devem preceder os cabeçalhos das seções. - -Use asteriscos para adicionar ênfases em negrito ou em itálico, assim: `*itálico*` e `**negrito**`. Nós devemos também adicionar um link e uma nota de rodapé no nosso texto para cobrir os componentes básicos de um artigo médio. Digite: - -``` -Uma frase que precisa de uma nota.[^1] - -[^1]: Essa é a minha primeira nota de rodapé! E um [link](https://www.eff.org/). -``` - -Quando o texto do link e o endereço são iguais, é mais rápido escrever `` ao invés de `[www.eff.org](www.eff.org)`. - -Vamos salvar nosso ficheiro antes de avançar. Crie a nova pasta que irá armazenar esse projeto. É provável que tenha algum sistema de organização de seus documentos, projetos, ilustrações e bibliografias, mas geralmente, o seu documento, e as suas ilustrações e bibliografia estão em pastas diferentes, o que os torna mais difíceis de achar. Nosso objetivo é criar uma única pasta para cada projeto, com todos os materiais relevantes incluídos. A regra geral é um projeto, um artigo, uma pasta. Nomeie seu ficheiro como `main.md`, onde “md” significa markdown. - -Depois que seu ficheiro for salvo, vamos adicionar uma ilustração. Copie uma imagem (qualquer imagem pequena) para a sua pasta e adicione o seguinte em algum lugar no corpo do texto: `![legenda da imagem](sua_imagem.jpg)`. - -Nesse ponto, o seu `main.md` deve parecer com o que está abaixo. É possível baixar esse exemplo de ficheiro teste.md [aqui](/assets/autoria-sustentavel-texto-simples-pandoc-markdown/teste.md). - -``` ---- -title: Fluxo de trabalho de texto simples -author: Gabriela Domingues -date: 20 de Janeiro de 2014 ---- - -# Seção 1 - -## Subseção 1.1 - -Lorem *ipsum* dolor sit amet, **consectetur** adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. - -## Subseção 1.2 - -Sed ut perspiciatis unde omnis iste natus error sit voluptatem accusantium doloremque laudantium, totam rem aperiam, eaque ipsa quae ab illo inventore veritatis et quasi architecto beatae vitae dicta sunt explicabo. - -O próximo parágrafo deve começar assim. Não dê recuo. - -# Seção 2 - -## Subseção 2.1 - -![legenda da imagem](sua_imagem.jpg) - -## Subseção 2.2 - -Uma frase que precisa de uma nota.[^1] - -[^1]: Essa é a minha primeira nota de rodapé! E um [link](https://www.eff.org/). -``` -Como faremos em breve, esse ficheiro de texto simples pode ser renderizado em um belo PDF: - -{% include figure.html filename="autoria-sustentavel-texto-simples-pandoc-markdown-01.png" alt="Imagem representando o ficheiro MarkDown e a respectiva versão em Word produzida com o Pandoc" caption="Exemplo de captura de tela de Word renderizado no Pandoc" %} - -Se quiser ter uma ideia de como esse tipo de marcação será interpretado como formatação HTML, experimente esse [espaço de teste online](https://daringfireball.net/projects/markdown/dingus) e brinque com vários tipos de sintaxe. Lembre-se de que certos elementos do Markdown com o sabor do Pandoc (como o bloco de título e as notas de rodapé) não funcionarão neste formulário da web, que aceita apenas o básico. - -Neste ponto, gaste algum tempo explorando alguns dos outros recursos do Markdown, como citações (referenciadas pelo símbolo `>`), listas de marcadores que começam com `*` ou `-`, quebras de linha textuais que começam com `|` (útil para poesia), tabelas e algumas das outras funções listadas na página de marcação do Pandoc. - -Preste bastante atenção em espaços vazios e no fluxo dos parágrafos. A documentação coloca sucintamente quando define um parágrafo como "uma ou mais linhas de texto seguida por uma ou mais linhas vazias.". Note que "linhas novas são tratadas como espaços" e que "se precisa de uma quebra de linha forte, coloque dois ou mais espaços no final de uma linha." A melhor maneira de entender o que isso significa é experimentar livremente. Use o modo de visualização do seu editor ou apenas execute o Pandoc para ver os resultados dos seus experimentos. - -Acima de tudo, evite a vontade de formatar. Lembre-se de que estamos identificando unidades semânticas: seções, subseções, ênfases, notas de rodapé e figuras. Mesmo *itálico* e **negrito** em Markdown não são realmente marcos de formatação, mas indicam diferentes níveis de *ênfase*. A formatação acontecerá depois, quando souber o lugar e os requisitos da publicação. - -Existem programas que permitem que se veja uma pré-visualização em tempo real da saída do Markdown enquanto se edita o ficheiro de texto simples, que nós detalhamos abaixo na seção de Recursos Úteis. Poucos deles suportam, entretanto, notas de rodapé, figuras e bibliografias. Para aproveitar o Pandoc ao máximo, nós recomendamos que use ficheiros de texto simples armazenados localmente, no seu computador. - -## Entrando em contato com a linha de comendos do seu computador - -Antes de começarmos a publicar o nosso ficheiro `main.md` em outros formatos, nós precisamos nos orientar sobre como trabalhar com a linha de comando usando o programa de terminal do seu computador, que é o único (e melhor) modo de usar o Pandoc. - -A linha de comando é um lugar amigável, uma vez que se acostuma com ela. Se já estiver familiarizado com o uso da linha de comando, sinta-se à vontade para pular esta seção. Para outros, é importante entender que ser capaz de usar seu programa de terminal diretamente permitirá que se use uma ampla gama de poderosas ferramentas de pesquisa que não poderiam ser usadas de outra forma, e podem servir como base para um trabalho mais avançado. Para os fins deste tutorial, é preciso aprender apenas alguns comandos muito simples. - -Primeiro, abra uma janela de linha de comando. Se você estiver usando o macOS, abra o aplicativo Terminal no diretório ‘Aplicativos / Utilitários’. No Windows, recomendamos que use o PowerShell ou, para uma solução mais robusta, instale o subsistema do Windows para Linux e use o terminal que vem com sua distribuição favorita do Linux. Para obter uma excelente introdução à linha de comando, consulte [“Introdução à linha de comando Bash” (em inglês)](/en/lessons/intro-to-bash), de Ian Milligan e James Baker. - -No terminal, deve-se ver uma janela de texto e um prompt que parece com isso: `nome-do-computador:~nome-do-usuário$`. O título indica qual é o diretório do usuário, e é possível escrever `$ cd~` em qualquer ponto para retornar para o seu diretório de usuário. Não escreva o cifrão, ele apenas simboliza o prompt de comando no seu terminal, indicando que se digite algo no terminal (em oposição a digitar algo no seu documento); lembre-se de apertar "Enter" após todo comando. - -É bem provável que a sua pasta "Documentos" esteja localizada aqui. Digite `$ pwd`(= _print working directory_, exibe o diretório de trabalho) e aperte "Enter" para exibir o nome do diretório atual. Use `$ pwd` sempre que se sentir perdido. - -O comando `$ ls` (= _list_, listar) simplesmente lista os ficheiros no diretório atual. Enfim, pode usar `$cd>`(= _change directory,_ mudar diretório) assim: `$ cd NOME_DIRETÓRIO` (em que `NOME_DIRETÓRIO` é o nome do diretório que se quer acessar). Use `$ cd ..` para mover automaticamente um nível para cima na estrutura de diretórios (o diretório-pai do diretório em que se está). Uma vez que começar a digitar o nome do diretório, use a tecla Tab para completar automaticamente o texto - particularmente útil para nomes de diretório longos ou nomes de diretórios que contenham espaços.[^5] - -Esses três comandos de terminal: `pwd`, `ls` e `cd` são tudo o que é preciso para esse tutorial. Pratique-os por alguns minutos para navegar pela sua pasta de documentos e pense na forma em que os seus ficheiros estão organizados. Se quiser, acompanhe seu gerenciador gráficos regular de ficheiros para se manter informado. - -## Usando Pandoc para converter Markdown em um documento do MS Word - -Nós estamos agora prontos para formatar! Abra a sua janela de terminal, use o `$ pwd`e `$ cd NOME_DIRETÓRIO` para navegar até a pasta correta para o seu projeto. Chegando lá, digite `$ ls` no terminal para listar os ficheiros. Se encontrar o seu ficheiro .md e suas imagens, está no lugar certo. Para converter o .md em um .docx escreva: - -``` - $ pandoc main.md -o main.docx -``` -Abra o ficheiro no MS Word e confira os resultados. Alternativamente, se usa o Open- ou LibreOffice, escreva: -``` - $ pandoc main.md -o project.odt -``` -Se não estiver acostumado com a linha de comando, imagine ler o comando acima como se fosse algo como: "Pandoc, crie um ficheiro MS Word a partir do meu ficheiro Markdown". A parte `-o` é uma "bandeira", que nesse caso diz algo como "ao invés de eu lhe dizer explicitamente os formatos de ficheiro de origem e destino, apenas tente adivinhar olhando para a extensão do ficheiro" ou simplesmente "output (saída)". Muitas opções estão disponíveis através desses sinalizadores no Pandoc. É possível ver a lista completa no [site do Pandoc](https://pandoc.org/) ou digitando `$ man pandoc` no terminal. - -Tente rodar o comando: -``` - pandoc main.md -o projeto.html -``` -Agora navegue de volta para o diretório do seu projeto. O que aconteceu? - -Usuários mais avançados que tem o LaTeX instalado podem querer experimentar convertendo o Markdown em .tex ou ficheiros .pdf especialmente formatados. Uma vez que o LaTeX estiver instalado, um ficheiro PDF belamente formatado pode ser criado usando a mesma estrutura de comando: -``` - pandoc main.md -o main.pdf -``` - -
    - Se este comando falhar, você pode precisar adicionar um componente que forneça ao pandoc o caminho completo para o motor LaTeX que deseja usar, especificando onde está armazenado. A localização variará se você estiver trabalhando em Mac, Windows ou Linux. Os leitores são aconselhados a verificar o caminho correto para o motor LaTeX em seu sistema e seguir as instruções de instalação atuais (em inglês). -
    - -Se o seu documento estiver escrito em outros idiomas que não o inglês, você provavelmente precisará usar o mecanismo XeLaTeX em vez do LaTeX simples para conversão .pdf: -``` - pandoc main.md --pdf-engine=xelatex -o main.pdf -``` -Tenha certeza de que o seu editor de texto suporta a codificação UTF-8. Quando usar XeLaTeX para conversão em .pdf, ao invés do atributo `fontfamily` no "YAML" para mudar fontes, especifique o atributo `mainfont` para produzir algo como isto: -``` - --- - title: Fluxo de Trabalho de Texto Simples - author: Gabriela Domingues - date: 20 de janeiro de 2014 - mainfont: times - ___ -``` - -Por exemplo, estilos de fontes podem ser passados para o Pandoc na forma de `pandoc main.md -- mainfont=times -o destino.pdf`. Nós preferimos, entretanto, usar as opções de cabeçalho do "YAML" sempre que possível, uma vez que os comandos são mais fáceis de lembrar. Usar uma ferramenta de controle de versão como o Git preservará as mudanças "YAML", onde o que é digitado no terminal é mais efêmero. Consulte a seção de Templates (Modelos) no manual do Pandoc (`man pandoc`) para a lista de variáveis do "YAML" disponíveis. - -## Trabalhando com Bibliografias - -Nesta seção, adicionaremos uma bibliografia ao nosso documento e, em seguida, converteremos os formatos de Chicago para MLA. - -Se não estiver usando um gerenciador de referência como Endnote ou Zotero, use. Preferimos o Zotero porque, como o Pandoc, foi criado pela comunidade acadêmica e, como outros projetos de código aberto, é lançado sob a GNU, General Public License. O mais importante para nós é que o seu gerenciador de referência deve ter a capacidade de gerar bibliografias em formato de texto simples, para manter o alinhamento com nosso princípio “tudo em texto simples”. Vá em frente e abra um gerenciador de referência de sua escolha e adicione algumas entradas de amostra. Quando estiver pronto, encontre a opção de exportar sua bibliografia no formato BibTeX (.bib). Salve o ficheiro .bib no diretório do projeto e dê a ele um título razoável como “projeto.bib”. - -A ideia geral é manter as suas fontes organizadas sob um banco de dados bibliográfico centralizado, enquanto geramos ficheiros .bib menores e mais específicos que devem ficar no mesmo diretório que o seu projeto. Vá em frente e abra o seu ficheiro .bib com o editor de texto simples que escolher.[^6] - -O seu ficheiro .bib deve conter múltiplas entradas que se parecem com esta: - - @article{fyfe_digital_2011, - title = {Digital Pedagogy Unplugged}, - volume = {5}, - url = {http://digitalhumanities.org/dhq/vol/5/3/000106/000106.html}, - number = {3}, - urldate = {2013-09-28}, - author = {Fyfe, Paul}, - year = {2011}, - file = {fyfe_digital_pedagogy_unplugged_2011.pdf} - - -Raramente será necessário editá-las manualmente (embora seja possível). Na maioria dos casos, simplesmente o ficheiro .bib será exportado do Zotero ou de um gerenciador de referências semelhante. Reserve um momento para se orientar aqui. Cada entrada consiste em um tipo de documento, “artigo” em nosso caso, um identificador exclusivo (fyfe_digital_2011) e os metadados relevantes sobre título, volume, autor e assim por diante. O que mais nos interessa é o ID exclusivo que segue imediatamente a chave na primeira linha de cada entrada. O ID único é o que nos permite conectar a bibliografia ao documento principal. Deixe este ficheiro aberto por enquanto e volte para o seu ficheiro `main.md`. - -Edite a nota de rodapé na primeira linha do seu ficheiro `main.md` para se parecer com algo como os seguintes exemplos, em que o `@nome_título_data` pode ser substituído por um dos IDs únicos do seu ficheiro `projeto.bib`. - -* `Uma referência formatada como esta será renderizada apropriadamente como citação no estilo em linha - ou nota de rodapé [@nome_título_data, 67].`[^7] -* `Para citações entre aspas, coloque a vírgula fora das marcas de citação [@nome_título_data, 67]. ` - -Uma vez que rodarmos o Markdown através do Pandoc, "@fyfe_digital_2011" será expandido em uma citação completa no estilo que desejar. É possível usar a sintaxe da `@citação` como preferir: em linha com o seu texto ou em notas de rodapé. Para gerar a bibliografia simplesmente inclua uma seção chamada `# Bibliografia` no fim do documento. - -Agora, retorne para o seu cabeçalho de metadados no topo do seu documento .md, e especifique o ficheiro de bibliografia a ser usado, assim: - -``` ---- -title: Fluxo de Trabalho de Texto Simples -author: Gabriela Domingues -date: 20 de janeiro de 2014 -bibliography: projeto.bib ---- -``` -Isso diz ao Pandoc para procurar pela bibliografia no ficheiro `projeto.bib`, sob o mesmo diretório que o seu `main.md`. Vamos ver se funciona. Salve o ficheiro, mude para a janela do terminal e execute: - -``` -$ pandoc main.md --filter pandoc-citeproc -o main.docx - -``` -O filtro “pandoc-citeproc” analisará quaisquer tags de citação encontradas em seu documento. O resultado deve ser um ficheiro MS Word formatado. Se tiver o LaTeX instalado, converta para .pdf usando a mesma sintaxe para resultados mais bonitos. Não se preocupe se as coisas não estiverem exatamente como prefere - lembre-se de que fará o ajuste refinado da formatação de uma vez mais tarde, o mais próximo possível da data da publicação. Por enquanto, estamos apenas criando rascunhos baseados em padrões razoáveis. - -## Mudando estilos de citação - -O estilo de citação padrão no Pandoc é [Chicago Author-date](https://www.chicagomanualofstyle.org/tools_citationguide/citation-guide-2.html). Podemos especificar um estilo diferente usando a folha de estilo, escrita na “Linguagem de Estilo de Citação” (outra convenção de texto simples, neste caso para descrever estilos de citação) e denotada pela extensão de ficheiro .csl. Felizmente, o projeto CSL mantém um repositório de estilos de citação comuns, alguns até personalizados para periódicos específicos. Visite http://editor.citationstyles.org/about/ para localizar o ficheiro .csl para Modern Language Association (Associação de Linguagem Moderna), baixe `modern-language-association.csl` e salve no diretório do projeto como `mla.csl`. Agora precisamos dizer ao Pandoc para usar a folha de estilo MLA em vez do padrão Chicago. Fazemos isso atualizando o cabeçalho YAML: - -``` ---- -title: Fluxo de trabalho de Texto Simples -author: Gabriela Domingues -date: 20 de janeiro de 2014 -bibliography: projeto.bib -csl: mla.csl ---- -``` - -Então repita o comando Pandoc para carregar seu ficheiro markdown em seu formato de destino (.pdf ou .docx): - -``` -$ pandoc main.md --filter pandoc-citeproc -o main.pdf -``` -Traduza o comando para o Português enquanto digita. Na minha cabeça, eu traduzo o comando acima em algo como: "Pandoc, pegue o meu ficheiro markdown, aplique o filtro de citação sobre ele e retorne um ficheiro PDF". Quanto ficar mais familiarizado com as páginas de estilo de citação, considere adicionar os seus ficheiros .csl customizados para periódicos do seu campo no ficheiro como um serviço para a comunidade. - -## Resumo - -Agora, você deve ser capaz de escrever artigos em Markdown, criar rascunhos em múltiplos formatos, adicionar bibliografias e facilmente mudar os estilos de citação. Um último olhar no diretório do projeto revelará vários ficheiros de origem: o ficheiro `main.md`, o ficheiro `projeto.bib`, o ficheiro `mla.csl` e algumas imagens. Além dos ficheiros origens, deve haver alguns ficheiros "destino" que criamos ao longo desse tutorial: `main.docx` ou `main.pdf`. A sua pasta deve se parecer com isso: - -``` -Pandoc-tutorial/ - main.md - projeto.bib - mla.csl - imagem.jpg - main.docx -``` - -Trate seus ficheiros de origem como uma versão autorizada de seu texto e seus ficheiros de destino como “impressões” descartáveis que podem ser geradas facilmente com o Pandoc em tempo real. Todas as revisões devem ser feitas no `main.md`. O ficheiro `main.docx` está lá para formatação e limpeza em estágio final. Se, por exemplo, o periódico requisitar manuscritos com espaçamento duplo, é possível rapidamente colocar o espaçamento duplo no Open Office ou Microsoft Word. Mas não gaste muito tempo formatando. Lembre-se, tudo é retirado quando o seu manuscrito vai para a impressão. O tempo gasto em formatação desnecessária pode ser usado melhorando a prosa do seu rascunho. - -## Recursos úteis - -Se tiver problemas, não há lugar melhor para começar a procurar ajuda do que o [site do Pandoc](https://pandoc.org/) de John MacFarlane e a [lista de e-mails](https://groups.google.com/forum/#!forum/pandoc-discuss) associados. Pelo menos dois sites do tipo “Pergunta e Resposta” podem responder a perguntas no Pandoc: [Stack Overflow](https://stackoverflow.com/questions/tagged/pandoc) e [Digital Humanities Q&A](https://web.archive.org/web/20190203062832/http://digitalhumanities.org/answers/). As perguntas também podem ser feitas ao vivo, no Freenode IRC, canal #Pandoc, frequentado por um amigável grupo de regulares. Conforme aprender mais sobre o Pandoc, também pode explorar um de seus recursos mais poderosos: [filtros](https://github.com/jgm/pandoc/wiki/Pandoc-Filters). - -Embora nossa sugestão seja começar com um editor simples, muitas (mais de 70, de acordo com [esta postagem do blog](https://web.archive.org/web/20140120195538/http://mashable.com/2013/06/24/markdown-tools/) outras alternativas específicas do Markdown para o MS Word estão disponíveis online, e muitas vezes sem custo. Dos autônomos, gostamos de [Write Monkey](http://writemonkey.com/) e [Sublime Text](https://www.sublimetext.com/). Várias plataformas baseadas na web surgiram recentemente que fornecem interfaces gráficas elegantes para escrita colaborativa e controle de versão usando Markdown. Algumas delas são: [prose.io](http://prose.io/), [Authorea](https://www.authorea.com/), [Draft](http://www.draftin.com/) e [StackEdit](https://stackedit.io/). - -Mas o ecossistema não é limitado a editores. [Gitit](http://gitit.net/) e [Ikiwiki](https://github.com/dubiousjim/pandoc-iki) suportam autoria em Markdown com Pandoc como analisador. Podemos incluir nesta lista uma série de ferramentas que geram páginas da Web estáticas e rápidas, [Yst](https://github.com/jgm/yst), [Jekyll](https://github.com/fauno/jekyll-pandoc-multiple-formats), [Hakyll](http://jaspervdj.be/hakyll/) e o [script de shell bash](https://github.com/wcaleb/website) do historiador Caleb McDaniel. - -Por fim, plataformas de publicação completas estão se formando ao redor do uso de Markdown. O Markdown na plataforma de marketplace [Leanpub](https://leanpub.com/) pode ser uma alternativa interessante ao modelo de publicação tradicional. E nós mesmos estamos experimentando o design de periódicos acadêmicos com base no GitHub e [readthedocs.org](https://readthedocs.org/) (ferramentas geralmente usadas para documentação técnica). - - -### Notas -[^1]: Não se preocupe se não entender essa terminologia ainda! -[^2]: Os ficheiros fonte para essa documentação podem ser [baixados no GitHub](https://github.com/dh-notes/pandoc-workflow). Use a opção "raw" quando visualizar no GitHub para ver o Markdown fonte. Os autores gostariam de agradecer a Alex Gil e seus colegas do Digital Humanities Center de Columbia e aos participantes do openLab no Studio na biblioteca Butler por testar o código deste tutorial em uma variedade de plataformas. -[^3]: Veja a excelente discussão de Charlie Stross sobre esse tópico em [Porque Microsoft Word Deve Morrer (em inglês)](http://www.antipope.org/charlie/blog-static/2013/10/why-microsoft-word-must-die.html). -[^4]: Não existem boas soluções para chegar diretamente no MS Word a partir do LaTeX. -[^5]: É uma boa ideia criar o hábito de não usar espaços em nomes de pastas ou ficheiros. Traços ou sublinhados ao invés de espaços nos nomes de seus ficheiros garantem uma duradoura compatibilidade entre plataformas. -[^6]: Note que a extensão .bib pode estar "registrada" no Zotero no seu sistema operacional. Isso significa que quando se clica em um ficheiro .bib é provável que se chame o Zotero para abri-lo, enquanto nós queremos abrir com o editor de texto. Eventualmente, pode querer associar a extensão .bib ao seu editor de texto, -[^7]: Agradeço a [@njbart](https://github.com/njbart) pela correção. Em resposta a nossa sugestão original, `Algumas frases precisam de citação.^[@fyfe_digital_2011 argumenta isso também.]`, [ele escreve](https://github.com/programminghistorian/jekyll/issues/46#issue-45559983): “Isso não é recomendado, pois evita que se alterne facilmente entre os estilos de nota de rodapé e data do autor. É melhor usar o [corrigido] (sem circunflexo, sem ponto final entre colchetes e a pontuação final da frase do texto após os colchetes; com estilos de notas de rodapé, o pandoc ajusta automaticamente a posição da pontuação final). ” +--- +title: Autoria Sustentável em Texto Simples usando Pandoc e Markdown +layout: lesson +collection: lessons +slug: autoria-sustentavel-texto-simples-pandoc-markdown +date: 2014-03-19 +translation_date: 2022-11-27 +authors: +- Dennis Tenen +- Grant Wythoff +lesson-testers: +- Pao-Chuan Ma +tested-date: 2021-06-10 +editors: +- Fred Gibbs +translator: +- Gabriela Kucuruza +translation-editor: +- Jimmy Medeiros +translation-reviewer: +- Daniel Bonatto Seco +- André Salvo +difficulty: 2 +activity: sustaining +topics: [website, data-management] +abstract: "Neste tutorial, você aprenderá primeiro o básico do Markdown - uma sintaxe de marcação fácil de ler e escrever para texto simples - bem como Pandoc, uma ferramenta de linha de comando que converte texto simples em vários tipos de ficheiros formatados: PDF, docx, HTML, LaTeX, apresentação de slides e muito mais." +exclude_from_check: + - reviewers + - review-ticket +original: sustainable-authorship-in-plain-text-using-pandoc-and-markdown +avatar_alt: Um homem trabalhando numa mesa de desenho +doi: 10.46430/phpt0036 +--- + +{% include toc.html %} + +{% include figure.html filename="lexoriter.jpg" caption="" %} + +## Objetivos + +Neste tutorial, você aprenderá primeiro o básico do Markdown - uma sintaxe de marcação fácil de ler e de escrever para texto simples - assim como o [Pandoc](https://pandoc.org/), uma ferramenta de linha de comando que converte texto simples em vários tipos de ficheiro belamente formatados: PDF, docx, HTML, LaTeX, apresentações de slides e muito mais.[^1] Com o Pandoc como sua ferramenta de composição digital, você pode usar a sintaxe Markdown para adicionar figuras, bibliografia, formatação e alterar facilmente os estilos de citação de Chicago para MLA (por exemplo), todos usando texto simples. + +Este tutorial não pressupõe nenhum conhecimento técnico prévio, mas aumenta com a experiência, uma vez que vamos sugerir técnicas mais avançadas ao final de cada seção. Elas estão claramente marcadas e podem ser revisitadas após alguma prática e experimentação. + +Ao invés de seguir esse tutorial de maneira mecânica, recomendamos que se esforce para entender as soluções oferecidas aqui como uma _metodologia_, que pode precisar de adaptações para se adequar ao seu ambiente e fluxo de trabalho. A instalação das ferramentas necessárias apresenta talvez a maior barreira à participação. Tenha tempo e paciência suficientes para instalar tudo corretamente, ou faça isso com um/a colega que tenha uma configuração semelhante e ajudem-se mutuamente. Consulte a seção [Recursos Úteis](/pt/licoes/autoria-sustentavel-texto-simples-pandoc-markdown#recursos-uteis) abaixo se ficar preso.[^2] + +## Filosofia +Escrever, armazenar e recuperar documentos são atividades centrais para o fluxo de trabalho de pesquisa das humanidades. Mesmo assim, muitos autores baseiam suas práticas em ferramentas e formatos proprietários que, às vezes, ficam aquém dos requisitos mais básicos da escrita acadêmica. Talvez possa se lembrar de certa frustração com a fragilidade de notas de rodapé, bibliografias, figuras e rascunhos de livros escritos em Microsoft Word ou Google Docs. No entanto, a maioria dos periódicos ainda insiste em submissões no formato .docx. + +Mais do que causar frustração pessoal, essa dependência de ferramentas e de formatos proprietários tem implicações negativas de longo prazo para a comunidade acadêmica. Em tal ambiente, os periódicos devem terceirizar a composição, alienando os autores dos contextos materiais de publicação e adicionando outras barreiras desnecessárias à circulação irrestrita do conhecimento.[^3] + +Quando se usa MS Word, Google Docs ou Open Office para escrever documentos, o que se vê não é o que se obtém. Embaixo da camada visível de palavras, frases e parágrafos, encontra-se uma complicada camada de código compreensível apenas para as máquinas. Por causa dessa camada oculta, os ficheiros .docx e .pdf dependem de ferramentas proprietárias para serem visualizados corretamente. Esses documentos são difíceis de pesquisar, imprimir e converter em outros formatos de ficheiros. + +Além disso, o tempo gasto formatando documentos em MS Word ou Open Office é perdido, pois toda essa formatação é removida pelo editor durante a submissão. Tanto os autores quanto os editores se beneficiariam da troca de ficheiros com formatação mínima, deixando a composição tipográfica para o estágio final de composição do processo de publicação. + +Aqui é onde o Markdown brilha. Markdown é uma sintaxe para marcar explicitamente elementos semânticos dentro de um documento, não em alguma camada oculta. A ideia é identificar as unidades que são significativas para humanos, como títulos, seções, subseções, notas de rodapé e ilustrações. No mínimo, os seus ficheiros sempre permanecerão compreensíveis **para você**, mesmo se o editor de texto que estiver usando parar de funcionar ou "sair do mercado". + +Escrever dessa forma libera o autor da ferramenta. Markdown pode ser escrito em qualquer editor de texto simples e oferece um rico ecossistema de software que pode renderizar o texto em documentos belamente formatados. Por esta razão, o Markdown está atualmente passando por um período de crescimento, não apenas como meio para escrever artigos acadêmicos, mas como uma convenção para edição online em geral. + +Os editores de texto simples de uso geral populares incluem [Atom](https://atom.io/) (todas as plataformas) e [Notepad ++](https://notepad-plus-plus.org/) (somente para Windows). + +É importante entender que o Markdown é apenas uma convenção. Os ficheiros Markdown são armazenados como texto simples, aumentando ainda mais a flexibilidade do formato. Ficheiros de texto simples existem desde a máquina de escrever eletrônica. A longevidade deste padrão torna, de modo inerente, o texto simples mais sustentável e estável do que os formatos proprietários. Enquanto os ficheiros produzidos até dez anos atrás no Microsoft Word e no Apple Pages, podem causar problemas significativos quando abertos nas versões mais recentes, ainda é possível abrir um ficheiro escrito em qualquer editor de texto simples “morto” nas últimas décadas: AlphaPlus, Perfect Writer, Text Wizard, Spellbinder, WordStar ou o favorito de Isaac Asimov, SCRIPSIT 2.0 , feito por Radio Shack. Escrever em texto simples garante que seus ficheiros permanecerão legíveis daqui a dez, quinze, vinte anos. Neste tutorial, descrevemos um fluxo de trabalho que libera o pesquisador de softwares proprietários de processamento de texto e de formatos de ficheiro frágeis. + +Agora é possível escrever uma ampla variedade de documentos em um formato - artigos, postagens de blog, wikis, programas de estudos e cartas de recomendação - usando o mesmo conjunto de ferramentas e técnicas para pesquisar, descobrir, fazer backup e distribuir nossos materiais. Suas notas, entradas de blog, documentação de código e wikis podem ser criados no Markdown. Cada vez mais, muitas plataformas como WordPress, Reddit e GitHub suportam a autoria Markdown nativamente. A longo prazo, sua pesquisa se beneficiará desses fluxos de trabalho unificados, tornando mais fácil salvar, pesquisar, compartilhar e organizar seus materiais. + +## Princípios + +Inspirados pelas melhores práticas em uma variedade de disciplinas, nós fomos guiados pelos seguintes princípios: + +1. _Sustentabilidade_. O texto simples garante tanto transparência, como atende aos padrões de preservação de longo prazo. O Word pode seguir o caminho do [Word Perfect](https://pt.wikipedia.org/wiki/WordPerfect) no futuro, mas o texto simples sempre permanecerá fácil de ler, catalogar, extrair e transformar. Além disso, o texto simples permite um controle fácil e poderoso do versionamento do documento, o que é útil na colaboração e na organização de rascunhos. Seus ficheiros de texto simples estarão acessíveis em telefones celulares, tablets ou, talvez, em um terminal de baixa potência em alguma biblioteca remota. O texto simples é compatível com versões anteriores e à prova de futuro. Qualquer que seja o software ou hardware que vier a seguir, ele será capaz de entender os seus ficheiros de texto simples. +2. _Preferência por formatos legíveis por humanos_. Quando escrevemos no Word ou no Google Docs, o que vemos não é o que obtemos. O ficheiro .doc contem uma formatação oculta de caracteres gerados automaticamente, criando uma camada de composição tipográfica ofuscada que é difícil para o usuário solucionar. Algo tão simples como colar uma imagem ou texto do navegador pode ter efeitos imprevisíveis na formatação do seu documento. +3. _Separação entre forma e conteúdo_. Escrever e formatar ao mesmo tempo é distrativo. A ideia é escrever primeiro e formatar depois, o mais próximo possível da hora da publicação. Uma tarefa como mudar da formatação Chicago para MLA deve ser simples. Os editores de periódicos que desejam economizar tempo na formatação desnecessária e na edição de cópias devem ser capazes de fornecer aos seus autores um modelo de formatação que cuida dos detalhes da composição tipográfica. +4. _Apoio ao aparato acadêmico_. O fluxo de trabalho precisa lidar com notas de rodapé, figuras, caracteres internacionais e bibliografias com elegância. +5. _Independência de plataforma_. Na medida em que os vetores de publicação se multiplicam, precisamos ser capazes de gerar uma multiplicidade de formatos, incluindo projeção de slides, impressão, web e celular. Idealmente, gostaríamos de poder gerar os formatos mais comuns sem quebrar as dependências bibliográficas. Nosso fluxo de trabalho também precisa ser portátil - seria bom poder copiar uma pasta para um pen drive e saber que ela contém tudo o que é necessário para publicação de estudos. Escrever em texto simples significa que é possível facilmente compartilhar, editar e arquivar seus documentos em praticamente qualquer ambiente. Por exemplo, um programa escrito em Markdown pode ser salvo como PDF, impresso como um folheto e convertido em HTML para a web, tudo a partir do mesmo ficheiro. Tanto os documentos da web quanto os impressos devem ser publicados da mesma fonte e ter aparência semelhante, preservando o layout lógico do material. + +Mardown e LaTeX cumprem todos esses requisitos. Nós escolhemos Markdown (e não LaTeX) porque ele oferece a sintaxe mais leve e organizada (por isso, _mark down_) e porque quando unido com Pandoc, permite maior flexibilidade nas saídas (incluindo ficheiros .docs e .tex).[^4] + +## Requisitos de Software + +Nós omitimos propositalmente alguns dos detalhes menores vinculados à plataforma ou ao sistema operacional de instalação do software listado abaixo. Por exemplo, não faz sentido fornecer instruções de instalação para o LaTeX, quando as instruções online para o seu sistema operacional serão sempre mais atuais e completas. Da mesma forma, o processo de instalação do Pandoc é melhor explorado pesquisando por “instalar o Pandoc” no Google, com o provável primeiro resultado sendo a página inicial do Pandoc. + + - **Editor de texto simples**. Entrar no mundo de edição de texto simples expande dramaticamente as suas escolhas de ferramentas inovadoras de autoria. Pesquise online por "editor de texto markdown" e experimente as opções. Não importa qual for usada, contanto que seja explicitamente um editor de texto simples, como Atom e Notepad++. Lembre-se de que nós não estamos presos a ferramenta, é possível trocar de editor a qualquer momento. + - **Terminal de linha de comando**. Trabalhar na "linha de comando" equivale a escrever comandos no terminal. Em um Mac, apenas pesquise por "Terminal". No Windows, use o [PowerShell](https://pt.wikipedia.org/wiki/PowerShell). Usuários de Linux provavelmente já devem estar familiarizados com seus terminais. Nós iremos cobrir o básico de como procurar e usar a linha de comando abaixo. + - **Pandoc**. Instruções de instalação detalhadas e para plataformas específicas estão disponíveis no [site do Pandoc](https://pandoc.org/installing.html). _A instalação do Pandoc na sua máquina é crucial para esse tutorial_, então tome o seu tempo navegando pelas instruções. O Pandoc foi criado e é mantido por John MacFarlane, Professor de Filosofia na Universidade da Califórnia, Berkeley. Isso é a humanidade computacional em sua melhor expressão e servirá como o motor de nosso fluxo de trabalho. Com o Pandoc, será possível compilar texto e bibliografia em documentos belamente formatados e flexíveis. Depois de seguir as instruções de instalação, verifique se o Pandoc está instalado digitando `pandoc --version` na linha de comando. Presumimos que a sua versão seja ao menos a versão 1.12.3, lançada em janeiro de 2014. + +Os próximos dois softwares são recomendados, mas não requisitados para realizar esse tutorial. + +* **Zotero ou Endnote**. Softwares de referência bibliográfica como Zotero e Endnote são ferramentas indispensáveis para organizar e formatar citações em um artigo de pesquisa. Esses programas podem exportar suas bibliotecas como um ficheiro BibTeX (sobre o qual você aprenderá mais no Caso 2 a seguir). Este ficheiro, por si só um documento de texto simples formatado com todas as suas citações, permitirá que você cite referências de forma rápida e fácil usando `@tags`. Deve-se notar que também é possível digitar todas as suas referências bibliográficas à mão, usando [nossa bibliografia](https://github.com/dh-notes/pandoc-workflow/blob/master/pandoctut.bib) como modelo. +* **LaTeX**. Instruções de instalação detalhadas e específicas da plataforma estão disponíveis no [site do Pandoc](https://pandoc.org/installing.html). Embora o LaTeX não seja abordado neste tutorial, ele é usado pelo Pandoc para a criação de .pdf. Usuários avançados frequentemente irão converter para LaTeX diretamente para ter um controle mais minucioso sobre a composição do .pdf. Os iniciantes podem querer pular esta etapa. Caso contrário, digite`latex -v` para ver se o LaTeX está instalado corretamente (você receberá um erro se não estiver e algumas informações sobre a versão, se estiver). + +## Básico do Markdown + +O Markdown é uma convenção para estruturar os seus documentos de texto simples semanticamente. A ideia é identificar estruturas lógicas no seu documento (títulos, seções, subseções, notas de rodapé, etc.), marcá-las com caracteres discretos e então "compilar" o texto resultante com um interpretador de composição tipográfica que formatará o documento consistentemente, de acordo com um estilo específico. + +As convenções de Markdown vêm em várias “versões” projetadas para uso em contextos específicos, como blogs, wikis ou repositórios de código. O do Markdown usado pelo [Pandoc](https://pandoc.org/MANUAL.html#pandocs-markdown) é voltado para uso acadêmico. Suas convenções são descritas na página Markdown do Pandoc. Suas convenções incluem o bloco “[YAML](https://pandoc.org/MANUAL.html#extension-yaml_metadata_block)”, que contém alguns metadados úteis. + +Vamos agora criar um documento simples no Markdown. Abra um editor de texto simples de sua escolha e comece a digitar. Deve ser assim: + +``` +--- +title: Fluxo de Trabalho em Texto Simples +author: Gabriela Domingues +date: 20 de janeiro de 2014 +fontfamily: times +--- +``` + +A versão do Markdown usada pelo Pandoc armazena cada um dos valores acima, e "imprime-os" na localização apropriada do seu documento de saída quando o documento estiver pronto para a composição tipográfica. Aprenderemos mais tarde a adicionar outros campos mais poderosos ao bloco "YAML". Por enquanto, vamos fingir que estamos escrevendo um artigo que contém três seções, cada uma subdividida em duas subseções. Deixe uma linha em branco após os três últimos traços no bloco "YAML" e cole o seguinte: + +``` + +# Seção 1 + +## Subseção 1.1 +Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. + +O parágrafo seguinte deve começar sem recuo: + +## Subseção 1.2 +Sed ut perspiciatis unde omnis iste natus error sit voluptatem accusantium doloremque laudantium, totam rem aperiam, eaque ipsa quae ab illo inventore veritatis et quasi architecto beatae vitae dicta sunt explicabo. + +# Seção 2 + +## Subseção 2.1 +``` + +Vá em frente e escreva um texto simulado também. Espaços em branco são significativos em Markdown: não recue os seus parágrafos. Ao invés disso, separe parágrafos usando uma linha vazia. Linhas vazias também devem preceder os cabeçalhos das seções. + +Use asteriscos para adicionar ênfases em negrito ou em itálico, assim: `*itálico*` e `**negrito**`. Nós devemos também adicionar um link e uma nota de rodapé no nosso texto para cobrir os componentes básicos de um artigo médio. Digite: + +``` +Uma frase que precisa de uma nota.[^1] + +[^1]: Essa é a minha primeira nota de rodapé! E um [link](https://www.eff.org/). +``` + +Quando o texto do link e o endereço são iguais, é mais rápido escrever `` ao invés de `[www.eff.org](www.eff.org)`. + +Vamos salvar nosso ficheiro antes de avançar. Crie a nova pasta que irá armazenar esse projeto. É provável que tenha algum sistema de organização de seus documentos, projetos, ilustrações e bibliografias, mas geralmente, o seu documento, e as suas ilustrações e bibliografia estão em pastas diferentes, o que os torna mais difíceis de achar. Nosso objetivo é criar uma única pasta para cada projeto, com todos os materiais relevantes incluídos. A regra geral é um projeto, um artigo, uma pasta. Nomeie seu ficheiro como `main.md`, onde “md” significa markdown. + +Depois que seu ficheiro for salvo, vamos adicionar uma ilustração. Copie uma imagem (qualquer imagem pequena) para a sua pasta e adicione o seguinte em algum lugar no corpo do texto: `![legenda da imagem](sua_imagem.jpg)`. + +Nesse ponto, o seu `main.md` deve parecer com o que está abaixo. É possível baixar esse exemplo de ficheiro teste.md [aqui](/assets/autoria-sustentavel-texto-simples-pandoc-markdown/teste.md). + +``` +--- +title: Fluxo de trabalho de texto simples +author: Gabriela Domingues +date: 20 de Janeiro de 2014 +--- + +# Seção 1 + +## Subseção 1.1 + +Lorem *ipsum* dolor sit amet, **consectetur** adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. + +## Subseção 1.2 + +Sed ut perspiciatis unde omnis iste natus error sit voluptatem accusantium doloremque laudantium, totam rem aperiam, eaque ipsa quae ab illo inventore veritatis et quasi architecto beatae vitae dicta sunt explicabo. + +O próximo parágrafo deve começar assim. Não dê recuo. + +# Seção 2 + +## Subseção 2.1 + +![legenda da imagem](sua_imagem.jpg) + +## Subseção 2.2 + +Uma frase que precisa de uma nota.[^1] + +[^1]: Essa é a minha primeira nota de rodapé! E um [link](https://www.eff.org/). +``` +Como faremos em breve, esse ficheiro de texto simples pode ser renderizado em um belo PDF: + +{% include figure.html filename="autoria-sustentavel-texto-simples-pandoc-markdown-01.png" alt="Imagem representando o ficheiro MarkDown e a respectiva versão em Word produzida com o Pandoc" caption="Exemplo de captura de tela de Word renderizado no Pandoc" %} + +Se quiser ter uma ideia de como esse tipo de marcação será interpretado como formatação HTML, experimente esse [espaço de teste online](https://daringfireball.net/projects/markdown/dingus) e brinque com vários tipos de sintaxe. Lembre-se de que certos elementos do Markdown com o sabor do Pandoc (como o bloco de título e as notas de rodapé) não funcionarão neste formulário da web, que aceita apenas o básico. + +Neste ponto, gaste algum tempo explorando alguns dos outros recursos do Markdown, como citações (referenciadas pelo símbolo `>`), listas de marcadores que começam com `*` ou `-`, quebras de linha textuais que começam com `|` (útil para poesia), tabelas e algumas das outras funções listadas na página de marcação do Pandoc. + +Preste bastante atenção em espaços vazios e no fluxo dos parágrafos. A documentação coloca sucintamente quando define um parágrafo como "uma ou mais linhas de texto seguida por uma ou mais linhas vazias.". Note que "linhas novas são tratadas como espaços" e que "se precisa de uma quebra de linha forte, coloque dois ou mais espaços no final de uma linha." A melhor maneira de entender o que isso significa é experimentar livremente. Use o modo de visualização do seu editor ou apenas execute o Pandoc para ver os resultados dos seus experimentos. + +Acima de tudo, evite a vontade de formatar. Lembre-se de que estamos identificando unidades semânticas: seções, subseções, ênfases, notas de rodapé e figuras. Mesmo *itálico* e **negrito** em Markdown não são realmente marcos de formatação, mas indicam diferentes níveis de *ênfase*. A formatação acontecerá depois, quando souber o lugar e os requisitos da publicação. + +Existem programas que permitem que se veja uma pré-visualização em tempo real da saída do Markdown enquanto se edita o ficheiro de texto simples, que nós detalhamos abaixo na seção de Recursos Úteis. Poucos deles suportam, entretanto, notas de rodapé, figuras e bibliografias. Para aproveitar o Pandoc ao máximo, nós recomendamos que use ficheiros de texto simples armazenados localmente, no seu computador. + +## Entrando em contato com a linha de comendos do seu computador + +Antes de começarmos a publicar o nosso ficheiro `main.md` em outros formatos, nós precisamos nos orientar sobre como trabalhar com a linha de comando usando o programa de terminal do seu computador, que é o único (e melhor) modo de usar o Pandoc. + +A linha de comando é um lugar amigável, uma vez que se acostuma com ela. Se já estiver familiarizado com o uso da linha de comando, sinta-se à vontade para pular esta seção. Para outros, é importante entender que ser capaz de usar seu programa de terminal diretamente permitirá que se use uma ampla gama de poderosas ferramentas de pesquisa que não poderiam ser usadas de outra forma, e podem servir como base para um trabalho mais avançado. Para os fins deste tutorial, é preciso aprender apenas alguns comandos muito simples. + +Primeiro, abra uma janela de linha de comando. Se você estiver usando o macOS, abra o aplicativo Terminal no diretório ‘Aplicativos / Utilitários’. No Windows, recomendamos que use o PowerShell ou, para uma solução mais robusta, instale o subsistema do Windows para Linux e use o terminal que vem com sua distribuição favorita do Linux. Para obter uma excelente introdução à linha de comando, consulte [“Introdução à linha de comando Bash” (em inglês)](/en/lessons/intro-to-bash), de Ian Milligan e James Baker. + +No terminal, deve-se ver uma janela de texto e um prompt que parece com isso: `nome-do-computador:~nome-do-usuário$`. O título indica qual é o diretório do usuário, e é possível escrever `$ cd~` em qualquer ponto para retornar para o seu diretório de usuário. Não escreva o cifrão, ele apenas simboliza o prompt de comando no seu terminal, indicando que se digite algo no terminal (em oposição a digitar algo no seu documento); lembre-se de apertar "Enter" após todo comando. + +É bem provável que a sua pasta "Documentos" esteja localizada aqui. Digite `$ pwd`(= _print working directory_, exibe o diretório de trabalho) e aperte "Enter" para exibir o nome do diretório atual. Use `$ pwd` sempre que se sentir perdido. + +O comando `$ ls` (= _list_, listar) simplesmente lista os ficheiros no diretório atual. Enfim, pode usar `$cd>`(= _change directory,_ mudar diretório) assim: `$ cd NOME_DIRETÓRIO` (em que `NOME_DIRETÓRIO` é o nome do diretório que se quer acessar). Use `$ cd ..` para mover automaticamente um nível para cima na estrutura de diretórios (o diretório-pai do diretório em que se está). Uma vez que começar a digitar o nome do diretório, use a tecla Tab para completar automaticamente o texto - particularmente útil para nomes de diretório longos ou nomes de diretórios que contenham espaços.[^5] + +Esses três comandos de terminal: `pwd`, `ls` e `cd` são tudo o que é preciso para esse tutorial. Pratique-os por alguns minutos para navegar pela sua pasta de documentos e pense na forma em que os seus ficheiros estão organizados. Se quiser, acompanhe seu gerenciador gráficos regular de ficheiros para se manter informado. + +## Usando Pandoc para converter Markdown em um documento do MS Word + +Nós estamos agora prontos para formatar! Abra a sua janela de terminal, use o `$ pwd`e `$ cd NOME_DIRETÓRIO` para navegar até a pasta correta para o seu projeto. Chegando lá, digite `$ ls` no terminal para listar os ficheiros. Se encontrar o seu ficheiro .md e suas imagens, está no lugar certo. Para converter o .md em um .docx escreva: + +``` + $ pandoc main.md -o main.docx +``` +Abra o ficheiro no MS Word e confira os resultados. Alternativamente, se usa o Open- ou LibreOffice, escreva: +``` + $ pandoc main.md -o project.odt +``` +Se não estiver acostumado com a linha de comando, imagine ler o comando acima como se fosse algo como: "Pandoc, crie um ficheiro MS Word a partir do meu ficheiro Markdown". A parte `-o` é uma "bandeira", que nesse caso diz algo como "ao invés de eu lhe dizer explicitamente os formatos de ficheiro de origem e destino, apenas tente adivinhar olhando para a extensão do ficheiro" ou simplesmente "output (saída)". Muitas opções estão disponíveis através desses sinalizadores no Pandoc. É possível ver a lista completa no [site do Pandoc](https://pandoc.org/) ou digitando `$ man pandoc` no terminal. + +Tente rodar o comando: +``` + pandoc main.md -o projeto.html +``` +Agora navegue de volta para o diretório do seu projeto. O que aconteceu? + +Usuários mais avançados que tem o LaTeX instalado podem querer experimentar convertendo o Markdown em .tex ou ficheiros .pdf especialmente formatados. Uma vez que o LaTeX estiver instalado, um ficheiro PDF belamente formatado pode ser criado usando a mesma estrutura de comando: +``` + pandoc main.md -o main.pdf +``` + +
    + Se este comando falhar, você pode precisar adicionar um componente que forneça ao pandoc o caminho completo para o motor LaTeX que deseja usar, especificando onde está armazenado. A localização variará se você estiver trabalhando em Mac, Windows ou Linux. Os leitores são aconselhados a verificar o caminho correto para o motor LaTeX em seu sistema e seguir as instruções de instalação atuais (em inglês). +
    + +Se o seu documento estiver escrito em outros idiomas que não o inglês, você provavelmente precisará usar o mecanismo XeLaTeX em vez do LaTeX simples para conversão .pdf: +``` + pandoc main.md --pdf-engine=xelatex -o main.pdf +``` +Tenha certeza de que o seu editor de texto suporta a codificação UTF-8. Quando usar XeLaTeX para conversão em .pdf, ao invés do atributo `fontfamily` no "YAML" para mudar fontes, especifique o atributo `mainfont` para produzir algo como isto: +``` + --- + title: Fluxo de Trabalho de Texto Simples + author: Gabriela Domingues + date: 20 de janeiro de 2014 + mainfont: times + ___ +``` + +Por exemplo, estilos de fontes podem ser passados para o Pandoc na forma de `pandoc main.md -- mainfont=times -o destino.pdf`. Nós preferimos, entretanto, usar as opções de cabeçalho do "YAML" sempre que possível, uma vez que os comandos são mais fáceis de lembrar. Usar uma ferramenta de controle de versão como o Git preservará as mudanças "YAML", onde o que é digitado no terminal é mais efêmero. Consulte a seção de Templates (Modelos) no manual do Pandoc (`man pandoc`) para a lista de variáveis do "YAML" disponíveis. + +## Trabalhando com Bibliografias + +Nesta seção, adicionaremos uma bibliografia ao nosso documento e, em seguida, converteremos os formatos de Chicago para MLA. + +Se não estiver usando um gerenciador de referência como Endnote ou Zotero, use. Preferimos o Zotero porque, como o Pandoc, foi criado pela comunidade acadêmica e, como outros projetos de código aberto, é lançado sob a GNU, General Public License. O mais importante para nós é que o seu gerenciador de referência deve ter a capacidade de gerar bibliografias em formato de texto simples, para manter o alinhamento com nosso princípio “tudo em texto simples”. Vá em frente e abra um gerenciador de referência de sua escolha e adicione algumas entradas de amostra. Quando estiver pronto, encontre a opção de exportar sua bibliografia no formato BibTeX (.bib). Salve o ficheiro .bib no diretório do projeto e dê a ele um título razoável como “projeto.bib”. + +A ideia geral é manter as suas fontes organizadas sob um banco de dados bibliográfico centralizado, enquanto geramos ficheiros .bib menores e mais específicos que devem ficar no mesmo diretório que o seu projeto. Vá em frente e abra o seu ficheiro .bib com o editor de texto simples que escolher.[^6] + +O seu ficheiro .bib deve conter múltiplas entradas que se parecem com esta: + + @article{fyfe_digital_2011, + title = {Digital Pedagogy Unplugged}, + volume = {5}, + url = {https://digitalhumanities.org/dhq/vol/5/3/000106/000106.html}, + number = {3}, + urldate = {2013-09-28}, + author = {Fyfe, Paul}, + year = {2011}, + file = {fyfe_digital_pedagogy_unplugged_2011.pdf} + + +Raramente será necessário editá-las manualmente (embora seja possível). Na maioria dos casos, simplesmente o ficheiro .bib será exportado do Zotero ou de um gerenciador de referências semelhante. Reserve um momento para se orientar aqui. Cada entrada consiste em um tipo de documento, “artigo” em nosso caso, um identificador exclusivo (fyfe_digital_2011) e os metadados relevantes sobre título, volume, autor e assim por diante. O que mais nos interessa é o ID exclusivo que segue imediatamente a chave na primeira linha de cada entrada. O ID único é o que nos permite conectar a bibliografia ao documento principal. Deixe este ficheiro aberto por enquanto e volte para o seu ficheiro `main.md`. + +Edite a nota de rodapé na primeira linha do seu ficheiro `main.md` para se parecer com algo como os seguintes exemplos, em que o `@nome_título_data` pode ser substituído por um dos IDs únicos do seu ficheiro `projeto.bib`. + +* `Uma referência formatada como esta será renderizada apropriadamente como citação no estilo em linha - ou nota de rodapé [@nome_título_data, 67].`[^7] +* `Para citações entre aspas, coloque a vírgula fora das marcas de citação [@nome_título_data, 67]. ` + +Uma vez que rodarmos o Markdown através do Pandoc, "@fyfe_digital_2011" será expandido em uma citação completa no estilo que desejar. É possível usar a sintaxe da `@citação` como preferir: em linha com o seu texto ou em notas de rodapé. Para gerar a bibliografia simplesmente inclua uma seção chamada `# Bibliografia` no fim do documento. + +Agora, retorne para o seu cabeçalho de metadados no topo do seu documento .md, e especifique o ficheiro de bibliografia a ser usado, assim: + +``` +--- +title: Fluxo de Trabalho de Texto Simples +author: Gabriela Domingues +date: 20 de janeiro de 2014 +bibliography: projeto.bib +--- +``` +Isso diz ao Pandoc para procurar pela bibliografia no ficheiro `projeto.bib`, sob o mesmo diretório que o seu `main.md`. Vamos ver se funciona. Salve o ficheiro, mude para a janela do terminal e execute: + +``` +$ pandoc main.md --filter pandoc-citeproc -o main.docx + +``` +O filtro “pandoc-citeproc” analisará quaisquer tags de citação encontradas em seu documento. O resultado deve ser um ficheiro MS Word formatado. Se tiver o LaTeX instalado, converta para .pdf usando a mesma sintaxe para resultados mais bonitos. Não se preocupe se as coisas não estiverem exatamente como prefere - lembre-se de que fará o ajuste refinado da formatação de uma vez mais tarde, o mais próximo possível da data da publicação. Por enquanto, estamos apenas criando rascunhos baseados em padrões razoáveis. + +## Mudando estilos de citação + +O estilo de citação padrão no Pandoc é [Chicago Author-date](https://www.chicagomanualofstyle.org/tools_citationguide/citation-guide-2.html). Podemos especificar um estilo diferente usando a folha de estilo, escrita na “Linguagem de Estilo de Citação” (outra convenção de texto simples, neste caso para descrever estilos de citação) e denotada pela extensão de ficheiro .csl. Felizmente, o projeto CSL mantém um repositório de estilos de citação comuns, alguns até personalizados para periódicos específicos. Visite https://editor.citationstyles.org/about/ para localizar o ficheiro .csl para Modern Language Association (Associação de Linguagem Moderna), baixe `modern-language-association.csl` e salve no diretório do projeto como `mla.csl`. Agora precisamos dizer ao Pandoc para usar a folha de estilo MLA em vez do padrão Chicago. Fazemos isso atualizando o cabeçalho YAML: + +``` +--- +title: Fluxo de trabalho de Texto Simples +author: Gabriela Domingues +date: 20 de janeiro de 2014 +bibliography: projeto.bib +csl: mla.csl +--- +``` + +Então repita o comando Pandoc para carregar seu ficheiro markdown em seu formato de destino (.pdf ou .docx): + +``` +$ pandoc main.md --filter pandoc-citeproc -o main.pdf +``` +Traduza o comando para o Português enquanto digita. Na minha cabeça, eu traduzo o comando acima em algo como: "Pandoc, pegue o meu ficheiro markdown, aplique o filtro de citação sobre ele e retorne um ficheiro PDF". Quanto ficar mais familiarizado com as páginas de estilo de citação, considere adicionar os seus ficheiros .csl customizados para periódicos do seu campo no ficheiro como um serviço para a comunidade. + +## Resumo + +Agora, você deve ser capaz de escrever artigos em Markdown, criar rascunhos em múltiplos formatos, adicionar bibliografias e facilmente mudar os estilos de citação. Um último olhar no diretório do projeto revelará vários ficheiros de origem: o ficheiro `main.md`, o ficheiro `projeto.bib`, o ficheiro `mla.csl` e algumas imagens. Além dos ficheiros origens, deve haver alguns ficheiros "destino" que criamos ao longo desse tutorial: `main.docx` ou `main.pdf`. A sua pasta deve se parecer com isso: + +``` +Pandoc-tutorial/ + main.md + projeto.bib + mla.csl + imagem.jpg + main.docx +``` + +Trate seus ficheiros de origem como uma versão autorizada de seu texto e seus ficheiros de destino como “impressões” descartáveis que podem ser geradas facilmente com o Pandoc em tempo real. Todas as revisões devem ser feitas no `main.md`. O ficheiro `main.docx` está lá para formatação e limpeza em estágio final. Se, por exemplo, o periódico requisitar manuscritos com espaçamento duplo, é possível rapidamente colocar o espaçamento duplo no Open Office ou Microsoft Word. Mas não gaste muito tempo formatando. Lembre-se, tudo é retirado quando o seu manuscrito vai para a impressão. O tempo gasto em formatação desnecessária pode ser usado melhorando a prosa do seu rascunho. + +## Recursos úteis + +Se tiver problemas, não há lugar melhor para começar a procurar ajuda do que o [site do Pandoc](https://pandoc.org/) de John MacFarlane e a [lista de e-mails](https://groups.google.com/forum/#!forum/pandoc-discuss) associados. Pelo menos dois sites do tipo “Pergunta e Resposta” podem responder a perguntas no Pandoc: [Stack Overflow](https://stackoverflow.com/questions/tagged/pandoc) e [Digital Humanities Q&A](https://web.archive.org/web/20190203062832/https://digitalhumanities.org/answers/). As perguntas também podem ser feitas ao vivo, no Freenode IRC, canal #Pandoc, frequentado por um amigável grupo de regulares. Conforme aprender mais sobre o Pandoc, também pode explorar um de seus recursos mais poderosos: [filtros](https://github.com/jgm/pandoc/wiki/Pandoc-Filters). + +Embora nossa sugestão seja começar com um editor simples, muitas (mais de 70, de acordo com [esta postagem do blog](https://web.archive.org/web/20140120195538/https://mashable.com/2013/06/24/markdown-tools/) outras alternativas específicas do Markdown para o MS Word estão disponíveis online, e muitas vezes sem custo. Dos autônomos, gostamos de [Write Monkey](https://writemonkey.com/) e [Sublime Text](https://www.sublimetext.com/). Várias plataformas baseadas na web surgiram recentemente que fornecem interfaces gráficas elegantes para escrita colaborativa e controle de versão usando Markdown. Algumas delas são: [prose.io](https://prose.io/), [Authorea](https://www.authorea.com/), [Draft](https://www.draftin.com/) e [StackEdit](https://stackedit.io/). + +Mas o ecossistema não é limitado a editores. [Gitit](https://gitit.net/) e [Ikiwiki](https://github.com/dubiousjim/pandoc-iki) suportam autoria em Markdown com Pandoc como analisador. Podemos incluir nesta lista uma série de ferramentas que geram páginas da Web estáticas e rápidas, [Yst](https://github.com/jgm/yst), [Jekyll](https://github.com/fauno/jekyll-pandoc-multiple-formats), [Hakyll](https://jaspervdj.be/hakyll/) e o [script de shell bash](https://github.com/wcaleb/website) do historiador Caleb McDaniel. + +Por fim, plataformas de publicação completas estão se formando ao redor do uso de Markdown. O Markdown na plataforma de marketplace [Leanpub](https://leanpub.com/) pode ser uma alternativa interessante ao modelo de publicação tradicional. E nós mesmos estamos experimentando o design de periódicos acadêmicos com base no GitHub e [readthedocs.org](https://readthedocs.org/) (ferramentas geralmente usadas para documentação técnica). + + +### Notas +[^1]: Não se preocupe se não entender essa terminologia ainda! +[^2]: Os ficheiros fonte para essa documentação podem ser [baixados no GitHub](https://github.com/dh-notes/pandoc-workflow). Use a opção "raw" quando visualizar no GitHub para ver o Markdown fonte. Os autores gostariam de agradecer a Alex Gil e seus colegas do Digital Humanities Center de Columbia e aos participantes do openLab no Studio na biblioteca Butler por testar o código deste tutorial em uma variedade de plataformas. +[^3]: Veja a excelente discussão de Charlie Stross sobre esse tópico em [Porque Microsoft Word Deve Morrer (em inglês)](https://www.antipope.org/charlie/blog-static/2013/10/why-microsoft-word-must-die.html). +[^4]: Não existem boas soluções para chegar diretamente no MS Word a partir do LaTeX. +[^5]: É uma boa ideia criar o hábito de não usar espaços em nomes de pastas ou ficheiros. Traços ou sublinhados ao invés de espaços nos nomes de seus ficheiros garantem uma duradoura compatibilidade entre plataformas. +[^6]: Note que a extensão .bib pode estar "registrada" no Zotero no seu sistema operacional. Isso significa que quando se clica em um ficheiro .bib é provável que se chame o Zotero para abri-lo, enquanto nós queremos abrir com o editor de texto. Eventualmente, pode querer associar a extensão .bib ao seu editor de texto, +[^7]: Agradeço a [@njbart](https://github.com/njbart) pela correção. Em resposta a nossa sugestão original, `Algumas frases precisam de citação.^[@fyfe_digital_2011 argumenta isso também.]`, [ele escreve](https://github.com/programminghistorian/jekyll/issues/46#issue-45559983): “Isso não é recomendado, pois evita que se alterne facilmente entre os estilos de nota de rodapé e data do autor. É melhor usar o [corrigido] (sem circunflexo, sem ponto final entre colchetes e a pontuação final da frase do texto após os colchetes; com estilos de notas de rodapé, o pandoc ajusta automaticamente a posição da pontuação final). ” diff --git a/pt/licoes/camadas-vetoriais-qgis.md b/pt/licoes/camadas-vetoriais-qgis.md index 449669d29b..210fd303d3 100644 --- a/pt/licoes/camadas-vetoriais-qgis.md +++ b/pt/licoes/camadas-vetoriais-qgis.md @@ -1,247 +1,247 @@ ---- -title: Criar novas camadas vetoriais com o QGIS 2.0 -layout: lesson -slug: camadas-vetoriais-qgis -date: 2013-12-13 -translation_date: 2021-03-30 -authors: -- Jim Clifford -- Josh MacFadyen -- Daniel Macfarlane -reviewers: -- Finn Arne Jørgensen -- Peter Webster -- Abby Schreiber -editors: -- Adam Crymble -translator: -- Rafael Laguardia -translation-editor: -- Joana Vieira Paulino -translation-reviewer: -- Luis Ferla -- Ana Alcântara -difficulty: 2 -review-ticket: https://github.com/programminghistorian/ph-submissions/issues/365 -activity: presenting -topics: [mapping, data-visualization] -abstract: "Nesta lição, aprenderá como criar camadas vetoriais com base em mapas históricos digitalizados." -original: vector-layers-qgis -avatar_alt: Mapa de ruas da cidade -doi: 10.46430/phpt0009 ---- - -{% include toc.html %} - - - - - -## Objetivos da lição - -Nesta lição, aprenderá como criar camadas vetoriais com base em mapas históricos digitalizados. [Na introdução ao Google Maps e Google Earth](/en/lessons/googlemaps-googleearth) (em inglês), usou camadas vetoriais e criou atributos no Google Earth. Faremos o mesmo nesta lição, embora num nível mais avançado, usando o software QGIS. - -As camadas vetoriais (ou shapefiles) são, junto com as camadas raster, um dos dois tipos básicos de estruturas de armazenamento de dados. As camadas vetoriais usam as três feições1 básicas do SIG (Sistema de Informações Geográficas) - pontos, linhas e polígonos - para representar aspectos do mundo real em formato digital. Pontos podem ser usados para representar locais específicos, como cidades, edifícios, eventos, etc. (a escala do seu mapa determinará o que você representa como um ponto - no mapa de uma província, uma cidade seria um ponto, enquanto no mapa de uma cidade, um edifício pode ser um ponto). Linhas podem representar estradas, rios, canais, ferrovias, etc. Polígonos (formas fechadas) são usados para representar objetos mais complexos, como os limites de um lago, país, divisão administrativa ou eleitoral, etc. (novamente, a escala afetará sua escolha - grandes edifícios num mapa de pormenor de uma cidade podem ser melhor representados como polígonos do que como pontos). - -Nesta lição, criará shapefiles (que são um formato de armazenamento de dados vetoriais) para representar o desenvolvimento histórico de comunidades e estradas na Ilha Prince Edward. Cada shapefile pode ser criado como um dos três tipos de feições: ponto, linha, polígono (embora essas feições não possam ser misturadas num shapefile). Cada feição que cria num shapefile possui um conjunto correspondente de atributos, que são armazenados numa tabela de atributos. Criará feições e aprenderá como modificá-las, o que envolve não apenas a criação visual dos três tipos de feições, mas também a modificação de seus atributos. Para fazer isso, usaremos os ficheiros da lição [instalar o QGIS 2.0 e adicionaremos camadas](/en/lessons/qgis-layers) (em inglês) referentes à Ilha Prince Edward. - -## Começando - -Comece por descarregar o [mapa PEI_Holland](/assets/vector-layers-qgis/PEI_HollandMap1798_compLZW.tif) para a pasta do projeto. - -Abra o ficheiro que você salvou no final da lição [instalar o QGIS 2.0 e adicionar camadas](/en/lessons/qgis-layers) (em inglês). Deve ter as seguintes camadas na aba Camadas: - -- PEI\_placenames -- PEI\_highway -- PEI HYDRONETWORK -- 1935 inventory\_region -- coastline\_polygon -- PEI-CumminsMap1927 - -Desmarque todas essas camadas, exceto 'PEI_placenames', 'coastline_polygon' e 'PEI_CumminsMap1927'. - -{% include figure.html filename="pei1.png" caption="Figura 1" %} - -Agora vamos adicionar um segundo mapa histórico como uma camada raster. - -{% include figure.html filename="pei2.png" caption="Figura 2" %} - -- Em Camada na barra de ferramentas, escolha Adicionar Camada Raster (alternativamente, o mesmo ícone que vê ao lado de 'Adicionar Camada Raster' também pode ser selecionado na barra de ferramentas) -- Encontre o ficheiro que descarregou intitulado 'PEI_HollandMap1798' -- Ser-lhe-á solicitado que defina o sistema de coordenadas desta camada. Na caixa de filtro, pesquise por '2291' e, na caixa abaixo, selecione 'NAD83 (CSRS98) / Prince Edward Isl. Stereographic' -- Se não lhe for solicitado que defina o sistema de coordenadas da camada, será necessário alterar uma configuração. Clique em 'Configurações' e, em seguida, em 'Opções'. Clique em 'CRS' no menu à direita e escolha 'Solicitar CRS' a partir das opções abaixo. 'Quando uma nova camada é criada, ou quando uma camada é carregada sem CRS'. Clique 'OK'. Remova a camada 'PEI_HollandMap1798' (clique com o botão direito sobre ela e clique em Remover) e tente adicioná-la novamente. Desta vez, deve-lhe ser solicitado que forneça um 'CRS' e pode selecionar a opção 'NAD83' (veja acima). - -{% include figure.html filename="Figura3.jpg" caption="Figura 3" %} - -Nas etapas anteriores, selecionou e desmarcou camadas na janela 'Camadas' marcando e desmarcando as caixas ao lado delas. Essas camadas são organizadas em ordem decrescente de visibilidade. Ou seja, a camada superior é a camada superior da janela do visualizador (desde que esteja selecionada). Pode arrastar as camadas para cima e para baixo na janela de camadas para alterar a ordem em que ficarão visíveis na janela de visualização. A camada raster 'litoral_polygon' não está visível no momento porque está abaixo das camadas 'PEI_HollandMap1798' e 'PEI_Cummins1927'. Em geral, é melhor manter as camadas vetoriais acima das camadas raster. - -Desmarque 'PEI_Cummins1927' para que a única camada restante seja 'PEI_HollandMap1798'. Observe que o mapa aparece torto na tela; isso ocorre porque já foi georreferenciado pelos redatores da lição para coincidir com as camadas vetoriais de SIG. Saiba mais sobre georreferenciamento em [georreferenciamento no QGIS 2.0](/en/lessons/georeferencing-qgis) (em inglês). - -{% include figure.html filename="pei4.png" caption="Figura 4" %} - -Agora criaremos um shapefile de pontos, que é uma camada vetorial. Clique em 'Camada' -> 'Nova' -> 'Nova Camada Shapefile' - -- Alternativamente, pode selecionar o ícone 'Nova camada Shapefile' no topo da janela da barra de ferramentas QGIS - -{% include figure.html filename="Figura5.jpg" caption="Figura 5" %} - -Depois de selecionar 'Nova Camada Shapefile', aparece uma janela intitulada 'Nova Camada Vetorial' - -- Na categoria 'Tipo', 'ponto' já está selecionado. Clique no botão 'Especificar CRS' e selecione 'NAD83 (CSRS98) / Prince Edward Isl. Estereográfico (EPSG: 2291)' e, em seguida, clique em OK (para obter informações sobre como entender e selecionar a zona UTM: [https://lib.uwaterloo.ca/locations/umd/digital/clump_classes.html](https://lib.uwaterloo.ca/locations/umd/digital/clump_classes.html) - -{% include figure.html filename="Figura6.jpg" caption="Figura 6" %} - -Retornando à janela 'Nova Camada vetorial', iremos criar alguns atributos. Para criar o primeiro atributo: - -- Em 'Novo atributo', no campo ao lado de 'Nome', digite 'Nome_Assentamento' (observe que ao trabalhar em bancos de dados não pode usar espaços vazios nos nomes, por isso a convenção é usar sublinhados em seus lugares) -- Clique em 'Adicionar' à lista de atributos - -Agora vamos criar um segundo atributo: - -- Em 'Novo Atributo', no campo ao lado de 'Nome', digite 'Ano' -- Desta vez, vamos mudar o 'Tipo' para 'Número Inteiro' -- Clique em 'Adicionar à lista de atributos' - -Para o terceiro atributo: - -- Sob Novo atributo, no campo ao lado de Nome, digite 'Ano_Final' (o SIG nem sempre é ideal para lidar com mudanças ao longo do tempo, então em alguns casos é importante ter um campo para identificar aproximadamente quando algo deixou de existir) -- Mude o 'Tipo' novamente para 'Número Inteiro' -- Clique em Adicionar à lista de atributos - -{% include figure.html filename="Figura7.jpg" caption="Figura 7" %} - -- Ao concluir essas três etapas, termine de criar esse shapefile clicando em OK na parte inferior direita da janela 'Nova Camada Vetorial'. Um 'pop-up' irá surgir, nomeie-o de 'Assentamentos' e salve-o com os seus outros ficheiros SIG. - -Observe que uma camada chamada 'Assentamentos' agora aparece na janela 'Camadas'. Reposicione-a acima das camadas raster. - -{% include figure.html filename="Figura8.jpg" caption="Figura 8" %} - -Desmarque todas as camadas, exceto 'Assentamentos'. A janela de visualização agora está em branco, pois não criaámos nenhum dado. Agora criaremos novos dados do 'PEI_CumminsMap1927' e do 'PEI_HollandMap 1798' para mostrar o aumento da ocupação entre o final do século XVIII e o início do século XX. - -- Nós começaremos com o mapa mais recente e, portanto, geralmente mais preciso. Selecione novamente (ou seja, marque as caixas ao lado) 'coast_polygon' e 'PEI_CumminsMap1927'. -- Na janela de visualização, aumente o 'Zoom' em 'Charlottetown' (dica: 'Charlottetown' fica perto do meio da ilha no lado sul, na confluência de três rios). -- Selecione a camada de 'Assentamentos' na janela 'Camadas'. -- Na barra de menu, selecione 'Alternar Edição'. - -{% include figure.html filename="pei9.png" caption="Figura 9" %} - -- Depois de selecionar 'Alternar Edição', os botões de edição ficarão disponíveis à direita na barra de menus. Selecione o botão de feição com 'três pontos'. - -{% include figure.html filename="pei10.png" caption="Figura 10" %} - -- O cursor aparece agora como uma cruz - aponte a cruz para 'Charlottetown' (se por acaso não conhecer a geografia do 'PEI', pode ter ajuda adicionando a camada 'PEI_nomes de local'), mantendo-a dentro da linha costeira atual e clique (a digitalização é sempre um compromisso entre precisão e funcionalidade; dependendo da qualidade do mapa original e da digitalização, para a maioria das aplicações históricas, a precisão extrema não é necessária). -- Uma janela de atributos aparecerá. Deixe o campo 'id' em branco (no momento da escrita, o QGIS criará dois campos 'id' e este é desnecessário). No campo 'Assentamento', digite 'Charlottetown'. No campo 'Ano', digite '1764'. Clique em 'OK'. -Vamos agora repetir as etapas que realizámos com 'Charlottetown' para 'Montague', 'Summerside' e 'Cavendish' (novamente, pode encontrar esses locais adicionando as camadas 'PEI_nomes de local'). Encontre 'Montague' no mapa, selecione o botão de feição com 'três pontos' e clique em Montague no mapa. Quando a janela 'Atributos' aparecer, insira 'Montague' e '1732' nos campos apropriados. Repita para 'Summerside (1876)' e 'Cavendish (1790)'. - -{% include figure.html filename="Figura11.jpg" caption="Figura 11" %} - -Na janela 'Camadas', desmarque 'PEI_CumminsMap1927' e selecione 'PEI_HollandMap1798'. Agora vamos identificar dois assentamentos ('Princetown' e 'Havre-St-Pierre') que já não existem. - -- Para localizar 'Princetown', procure 'Richmond Bay' e 'Cape Aylebsury' (na costa norte a oeste de 'Cavendish'), aqui você encontrará 'Princetown' (sombreado) perto da fronteira entre o amarelo e o azul. - -- Se consultar a [entrada da Wikipedia](https://pt.wikipedia.org/wiki/Ilha_do_Pr%C3%ADncipe_Eduardo) desta cidade, notará que por causa de um porto raso, 'Princetown' não se tornou um assentamento importante. Foi renomeado em 1947 e, posteriormente, rebaixado para uma aldeia. Por esse motivo, incluiremos 1947 como a data final para este assentamento. - -- Com o cursor do mouse (em formato de cruz), clique em 'Princetown'. Na 'tabela de atributos' que aparece, coloque 'Princetown' no campo 'Assentamento', coloque '1764' no campo 'Ano' e coloque '1947' em 'Ano_Final'. Clique 'OK'. - -{% include figure.html filename="Figura12.jpg" caption="Figura 12" %} - -- Clique no ícone 'Salvar edições' na barra de menu (fica entre 'Alternar' e 'Adicione Feição'). - -- Clique duas vezes na camada de 'Assentamentos' na janela 'Camadas', escolha a guia 'Etiquetas' na parte superior da janela seguinte. Clique na caixa ao lado de 'Mostrar etiquetas'. Em Campo contendo rótulo, selecione 'Ano' (se necessário), altere o tamanho da fonte para 18,0, altere 'Posicionamento para Acima à esquerda' e clique em 'OK'. - -Na costa norte do 'lote 39', entre 'Britain's Pond' e 'St. Peters Bay', colocaremos agora um ponto para a localização de uma aldeia há muito perdida chamada 'Havre-St-Pierre'. - -- 'Havre-St-Pierre' foi o primeiro assentamento acadiano da ilha, mas está desabitado desde a deportação dos acadianos em 1758. - -- Com o cursor do mouse (em formato de cruz), clique em 'Havre-St. Pierre'. Na 'tabela de Atributos' que aparece, coloque 'Havre-St-Pierre' no campo 'Assentamento', coloque '1720' no campo 'Ano' e '1758' em 'Ano_Final'. Clique 'OK'. - -{% include figure.html filename="pei13.png" caption="Figura 13" %} - -Agora vamos criar outra camada vetorial: um vetor linha. Clique em 'Camada' -> 'Nova' -> 'Nova Camada Shapefile'. A janela 'Nova Camada Vetorial' aparecerá (na categoria 'Tipo', no topo, selecione 'Linha') - -- Clique no botão 'Especificar CRS' e selecione 'NAD83 (CSRS98) / Prince Edward Isl. Estereográfico (EPSG: 2291)' e clique em 'OK'. -- Em 'Novo atributo', no campo ao lado de 'Nome', digite 'Nome_Estrada'. -- Clique em 'Adicionar campos à lista'. - -Crie um segundo atributo: - -- Em 'Novo atributo', no campo ao lado de 'Nome', digite 'Ano'. -- Mude o 'Tipo' para 'Número Inteiro'. -- Clique em 'Adicionar à lista de Atributos'. -- Para terminar de criar este ficheiro, clique em 'OK' na parte inferior direita da janela 'Nova Camada Vetorial'. Uma tela para 'salvar' aparece - chame-a de 'estradas' e salve-a com seus outros ficheiros SIG. - -Vamos agora traçar as estradas do 'mapa de 1798' para que possamos compará-las com as estradas atuais. Certifique-se de ter as camadas 'PEI_Holland1798' e 'Assentamentos' marcadas na janela de 'Camadas'. Selecione a camada 'estradas' na janela de 'camadas', selecione 'Alternar Edição' na barra de ferramentas superior e selecione 'Adicionar Feição'. - -{% include figure.html filename="pei14.png" caption="Figura 14" %} - -- Primeiro trace a estrada de 'Charlottetown' a 'Princetown'. Clique em 'Charlottetown' e depois clique repetidamente em pontos ao longo da estrada para 'Princetown' e verá a linha a ser criada. Repita até chegar a 'Princetown' e clique com o botão direito. Na janela 'Atributos' - estrada que aparece, no campo 'Nome', insira 'para Princetown' e no campo 'Ano' insira '1798'. Clique em 'OK'. - -{% include figure.html filename="pei15.png" caption="Figura 15" %} - -- Repita esta etapa para mais 3 a 4 estradas encontradas no 'PEI_HollandMap1798'. - -- Clique em 'Salvar mudanças' e, em seguida, clique em 'Alternar Edição' para desligá-lo. - -Desmarque 'PEI_HollandMap1798' na janela 'Camadas' e selecione o mapa 'PEI_highway'. Compare as estradas representadas no mapa 'PEI_highway' (as linhas vermelhas pontilhadas) com as estradas que você acabou de traçar. - -{% include figure.html filename="pei16.png" caption="Figura 16" %} - -- Podemos ver que algumas dessas estradas correspondem exatamente às estradas atuais, enquanto outras não correspondem de forma alguma. Seriam necessárias mais pesquisas históricas para determinar se isso ocorre simplesmente porque o mapa da Holanda não representa suficientemente as estradas na época, ou se as estradas mudaram consideravelmente desde então. - -Agora crie um terceiro tipo de camada vetorial: um vetor poligonal. Clique em 'Camada' -> 'Nova' -> 'Nova Camada Vetorial'. A janela 'Nova Camada Vetorial' aparecerá - na categoria 'Tipo', no topo, selecione 'Polígono'. - -- Clique no botão 'Selecione o SRC' e selecione 'NAD83 (CSRS98) / Prince Edward Isl. Estereográfico (EPSG: 2291)' e clique em 'OK'. -- Em 'Novo Atributo', no campo ao lado de 'Nome', digite 'nome_lote' no campo ao lado de 'Ano'. -- Clique em 'Adicionar campos à lista'. - -Crie um segundo atributo: - -- Em 'Novo atributo', no campo ao lado de 'Nome', digite 'Ano'. -- Mude o 'Tipo' para 'Número Inteiro'. -- Clique em 'Adicionar à lista de Atributos'. - -{% include figure.html filename="Figura17.jpg" caption="Figura 17" %} - -Comece criando um polígono para o 'Lote 66', que é o único lote retangular na ilha. - -- Clique em 'Alternar Edição' na barra de ferramentas superior e, em seguida, clique em 'Adicionar Feição'. -- Clique nos quatro cantos do 'lote 66' e você verá um polígono criado. -- Clique com o botão direito no canto final e uma janela de 'Atributos' aparecerá. Adicione '66' ao campo 'nome_lote' e adicione '1764' (o ano em que esses lotes foram inventariados) ao campo 'Ano'. - -{% include figure.html filename="Figura18.jpg" caption="Figura 18" %} - -Agora vamos rastrear o 'Lote 38', que fica a oeste de 'Havre-St-Pierre'. Certifique-se de que há uma marca de seleção na caixa ao lado da camada 'PEI_HollandMap1798' na janela 'Camadas'. - -Clique em 'Alternar Edição' na barra de ferramentas superior e, em seguida, clique em 'Adicionar Feição'. - -Trace o contorno do 'Lote 38', que é mais difícil por causa da linha costeira, com a maior precisão possível. Para mostrar a feição 'Ajuste', queremos que trace ao longo da costa atual (o 'ajuste' é uma operação de edição automática que ajusta a feição que você desenhou para coincidir ou alinhar exatamente com as coordenadas e forma de outra feição próxima). - -- Selecione 'Configurações'-> 'Opções de Ajuste'. - -{% include figure.html filename="Figura19.jpg" caption="Figura 19" %} - -- Uma janela de 'opções de ajuste' irá abrir: clique na caixa ao lado de 'coast_polygon', para a categoria 'Modo' selecione 'vértice e segmento', para 'Tolerância' selecione '10.0', e para 'Unidades' selecione 'pixels'. Clique 'OK'. -- -{% include figure.html filename="Figura20.jpg" caption="Figura 20" %} - -Certifique-se de que a camada de 'lotes' esteja selecionada na janela 'Camadas' e selecione 'Adicionar feição' na barra de ferramentas. - -- Com o cursor, clique nos dois cantos inferiores do polígono, assim como fez com o 'lote 38'. Na linha costeira, você notará que tem uma coleção de linhas para traçar ao redor do 'Savage Harbour'. É aqui que os recursos de aderência se tornam úteis. Enquanto traçar a linha ao longo da costa atual, sua precisão aumentará significativamente, encaixando os 'cliques' diretamente no topo da linha existente. Quanto mais 'cliques' você fizer, mais preciso será, mas tenha em mente que, para muitos fins de SIGH (SIG histórico), obter extrema precisão às vezes produz retornos decrescentes. - -{% include figure.html filename="pei21.png" caption="Figura 21" %} - -Quando terminar de traçar e criar o polígono, selecione e desmarque as várias 'camadas' que criou, comparando e vendo quais relações pode deduzir. -No Google Earth, havia limitações nos tipos de 'feições', 'atributos' e dados fornecidos, e o Google Earth fez grande parte do trabalho por si. Isso é bom quando está aprendendo ou deseja criar mapas rapidamente. A vantagem de usar o software QGIS para criar novas camadas vetoriais é a liberdade e controle sobre os tipos de dados que se pode usar e as 'feições' e 'atributos' que se podem criar. Assim, é possível criar mapas personalizados e ir muito além do que pode ser alcançado no Google Earth ou no Google Maps Engine Lite. Viu isso em primeira mão com as camadas vetoriais de pontos, linhas e polígonos que aprendeu a criar nesta lição. Se tiver dados sobre, por exemplo, registros de saúde pública no século XVIII, pode criar uma nova camada mostrando a distribuição de surtos de febre tifoide e ver se há correlações com estradas e assentamentos principais. Além disso, o software SIG permite não apenas representar e apresentar dados espaciais de maneiras mais sofisticadas, mas também analisar e criar novos dados que não seriam possíveis de outra forma. - -**Aprendeu como criar camadas vetoriais. Certifique-se de salvar seu trabalho!** - -1 É possível identificar a palavra 'feição', em traduções no QGIS BR, ao referir os três tipos de 'formas' ou 'geometrias' usadas nas camadas vetoriais dos SIG. Mas, isto cria uma diferença entre as versões do QGIS BR e QGIS PT. - -*Esta lição é parte do [Geospatial Historian][].* - - [Intro to Google Maps and Google Earth]: /lessons/googlemaps-googleearth - [Installing QGIS 2.0 and Adding Layers]: /lessons/qgis-layers - [PEI_Holland map]: /assets/vector-layers-qgis/PEI_HollandMap1798_compLZW.tif - [Georeferencing in QGIS 2.0]: /lessons/georeferencing-qgis - [Wikipedia entry]: http://en.wikipedia.org/wiki/Prince_Royalty,_Prince_Edward_Island - [Geospatial Historian]: http://geospatialhistorian.wordpress.com/ +--- +title: Criar novas camadas vetoriais com o QGIS 2.0 +layout: lesson +slug: camadas-vetoriais-qgis +date: 2013-12-13 +translation_date: 2021-03-30 +authors: +- Jim Clifford +- Josh MacFadyen +- Daniel Macfarlane +reviewers: +- Finn Arne Jørgensen +- Peter Webster +- Abby Schreiber +editors: +- Adam Crymble +translator: +- Rafael Laguardia +translation-editor: +- Joana Vieira Paulino +translation-reviewer: +- Luis Ferla +- Ana Alcântara +difficulty: 2 +review-ticket: https://github.com/programminghistorian/ph-submissions/issues/365 +activity: presenting +topics: [mapping, data-visualization] +abstract: "Nesta lição, aprenderá como criar camadas vetoriais com base em mapas históricos digitalizados." +original: vector-layers-qgis +avatar_alt: Mapa de ruas da cidade +doi: 10.46430/phpt0009 +--- + +{% include toc.html %} + + + + + +## Objetivos da lição + +Nesta lição, aprenderá como criar camadas vetoriais com base em mapas históricos digitalizados. [Na introdução ao Google Maps e Google Earth](/en/lessons/googlemaps-googleearth) (em inglês), usou camadas vetoriais e criou atributos no Google Earth. Faremos o mesmo nesta lição, embora num nível mais avançado, usando o software QGIS. + +As camadas vetoriais (ou shapefiles) são, junto com as camadas raster, um dos dois tipos básicos de estruturas de armazenamento de dados. As camadas vetoriais usam as três feições1 básicas do SIG (Sistema de Informações Geográficas) - pontos, linhas e polígonos - para representar aspectos do mundo real em formato digital. Pontos podem ser usados para representar locais específicos, como cidades, edifícios, eventos, etc. (a escala do seu mapa determinará o que você representa como um ponto - no mapa de uma província, uma cidade seria um ponto, enquanto no mapa de uma cidade, um edifício pode ser um ponto). Linhas podem representar estradas, rios, canais, ferrovias, etc. Polígonos (formas fechadas) são usados para representar objetos mais complexos, como os limites de um lago, país, divisão administrativa ou eleitoral, etc. (novamente, a escala afetará sua escolha - grandes edifícios num mapa de pormenor de uma cidade podem ser melhor representados como polígonos do que como pontos). + +Nesta lição, criará shapefiles (que são um formato de armazenamento de dados vetoriais) para representar o desenvolvimento histórico de comunidades e estradas na Ilha Prince Edward. Cada shapefile pode ser criado como um dos três tipos de feições: ponto, linha, polígono (embora essas feições não possam ser misturadas num shapefile). Cada feição que cria num shapefile possui um conjunto correspondente de atributos, que são armazenados numa tabela de atributos. Criará feições e aprenderá como modificá-las, o que envolve não apenas a criação visual dos três tipos de feições, mas também a modificação de seus atributos. Para fazer isso, usaremos os ficheiros da lição [instalar o QGIS 2.0 e adicionaremos camadas](/en/lessons/qgis-layers) (em inglês) referentes à Ilha Prince Edward. + +## Começando + +Comece por descarregar o [mapa PEI_Holland](/assets/vector-layers-qgis/PEI_HollandMap1798_compLZW.tif) para a pasta do projeto. + +Abra o ficheiro que você salvou no final da lição [instalar o QGIS 2.0 e adicionar camadas](/en/lessons/qgis-layers) (em inglês). Deve ter as seguintes camadas na aba Camadas: + +- PEI\_placenames +- PEI\_highway +- PEI HYDRONETWORK +- 1935 inventory\_region +- coastline\_polygon +- PEI-CumminsMap1927 + +Desmarque todas essas camadas, exceto 'PEI_placenames', 'coastline_polygon' e 'PEI_CumminsMap1927'. + +{% include figure.html filename="pei1.png" caption="Figura 1" %} + +Agora vamos adicionar um segundo mapa histórico como uma camada raster. + +{% include figure.html filename="pei2.png" caption="Figura 2" %} + +- Em Camada na barra de ferramentas, escolha Adicionar Camada Raster (alternativamente, o mesmo ícone que vê ao lado de 'Adicionar Camada Raster' também pode ser selecionado na barra de ferramentas) +- Encontre o ficheiro que descarregou intitulado 'PEI_HollandMap1798' +- Ser-lhe-á solicitado que defina o sistema de coordenadas desta camada. Na caixa de filtro, pesquise por '2291' e, na caixa abaixo, selecione 'NAD83 (CSRS98) / Prince Edward Isl. Stereographic' +- Se não lhe for solicitado que defina o sistema de coordenadas da camada, será necessário alterar uma configuração. Clique em 'Configurações' e, em seguida, em 'Opções'. Clique em 'CRS' no menu à direita e escolha 'Solicitar CRS' a partir das opções abaixo. 'Quando uma nova camada é criada, ou quando uma camada é carregada sem CRS'. Clique 'OK'. Remova a camada 'PEI_HollandMap1798' (clique com o botão direito sobre ela e clique em Remover) e tente adicioná-la novamente. Desta vez, deve-lhe ser solicitado que forneça um 'CRS' e pode selecionar a opção 'NAD83' (veja acima). + +{% include figure.html filename="Figura3.jpg" caption="Figura 3" %} + +Nas etapas anteriores, selecionou e desmarcou camadas na janela 'Camadas' marcando e desmarcando as caixas ao lado delas. Essas camadas são organizadas em ordem decrescente de visibilidade. Ou seja, a camada superior é a camada superior da janela do visualizador (desde que esteja selecionada). Pode arrastar as camadas para cima e para baixo na janela de camadas para alterar a ordem em que ficarão visíveis na janela de visualização. A camada raster 'litoral_polygon' não está visível no momento porque está abaixo das camadas 'PEI_HollandMap1798' e 'PEI_Cummins1927'. Em geral, é melhor manter as camadas vetoriais acima das camadas raster. + +Desmarque 'PEI_Cummins1927' para que a única camada restante seja 'PEI_HollandMap1798'. Observe que o mapa aparece torto na tela; isso ocorre porque já foi georreferenciado pelos redatores da lição para coincidir com as camadas vetoriais de SIG. Saiba mais sobre georreferenciamento em [georreferenciamento no QGIS 2.0](/en/lessons/georeferencing-qgis) (em inglês). + +{% include figure.html filename="pei4.png" caption="Figura 4" %} + +Agora criaremos um shapefile de pontos, que é uma camada vetorial. Clique em 'Camada' -> 'Nova' -> 'Nova Camada Shapefile' + +- Alternativamente, pode selecionar o ícone 'Nova camada Shapefile' no topo da janela da barra de ferramentas QGIS + +{% include figure.html filename="Figura5.jpg" caption="Figura 5" %} + +Depois de selecionar 'Nova Camada Shapefile', aparece uma janela intitulada 'Nova Camada Vetorial' + +- Na categoria 'Tipo', 'ponto' já está selecionado. Clique no botão 'Especificar CRS' e selecione 'NAD83 (CSRS98) / Prince Edward Isl. Estereográfico (EPSG: 2291)' e, em seguida, clique em OK (para obter informações sobre como entender e selecionar a zona UTM: [https://lib.uwaterloo.ca/locations/umd/digital/clump_classes.html](https://lib.uwaterloo.ca/locations/umd/digital/clump_classes.html) + +{% include figure.html filename="Figura6.jpg" caption="Figura 6" %} + +Retornando à janela 'Nova Camada vetorial', iremos criar alguns atributos. Para criar o primeiro atributo: + +- Em 'Novo atributo', no campo ao lado de 'Nome', digite 'Nome_Assentamento' (observe que ao trabalhar em bancos de dados não pode usar espaços vazios nos nomes, por isso a convenção é usar sublinhados em seus lugares) +- Clique em 'Adicionar' à lista de atributos + +Agora vamos criar um segundo atributo: + +- Em 'Novo Atributo', no campo ao lado de 'Nome', digite 'Ano' +- Desta vez, vamos mudar o 'Tipo' para 'Número Inteiro' +- Clique em 'Adicionar à lista de atributos' + +Para o terceiro atributo: + +- Sob Novo atributo, no campo ao lado de Nome, digite 'Ano_Final' (o SIG nem sempre é ideal para lidar com mudanças ao longo do tempo, então em alguns casos é importante ter um campo para identificar aproximadamente quando algo deixou de existir) +- Mude o 'Tipo' novamente para 'Número Inteiro' +- Clique em Adicionar à lista de atributos + +{% include figure.html filename="Figura7.jpg" caption="Figura 7" %} + +- Ao concluir essas três etapas, termine de criar esse shapefile clicando em OK na parte inferior direita da janela 'Nova Camada Vetorial'. Um 'pop-up' irá surgir, nomeie-o de 'Assentamentos' e salve-o com os seus outros ficheiros SIG. + +Observe que uma camada chamada 'Assentamentos' agora aparece na janela 'Camadas'. Reposicione-a acima das camadas raster. + +{% include figure.html filename="Figura8.jpg" caption="Figura 8" %} + +Desmarque todas as camadas, exceto 'Assentamentos'. A janela de visualização agora está em branco, pois não criaámos nenhum dado. Agora criaremos novos dados do 'PEI_CumminsMap1927' e do 'PEI_HollandMap 1798' para mostrar o aumento da ocupação entre o final do século XVIII e o início do século XX. + +- Nós começaremos com o mapa mais recente e, portanto, geralmente mais preciso. Selecione novamente (ou seja, marque as caixas ao lado) 'coast_polygon' e 'PEI_CumminsMap1927'. +- Na janela de visualização, aumente o 'Zoom' em 'Charlottetown' (dica: 'Charlottetown' fica perto do meio da ilha no lado sul, na confluência de três rios). +- Selecione a camada de 'Assentamentos' na janela 'Camadas'. +- Na barra de menu, selecione 'Alternar Edição'. + +{% include figure.html filename="pei9.png" caption="Figura 9" %} + +- Depois de selecionar 'Alternar Edição', os botões de edição ficarão disponíveis à direita na barra de menus. Selecione o botão de feição com 'três pontos'. + +{% include figure.html filename="pei10.png" caption="Figura 10" %} + +- O cursor aparece agora como uma cruz - aponte a cruz para 'Charlottetown' (se por acaso não conhecer a geografia do 'PEI', pode ter ajuda adicionando a camada 'PEI_nomes de local'), mantendo-a dentro da linha costeira atual e clique (a digitalização é sempre um compromisso entre precisão e funcionalidade; dependendo da qualidade do mapa original e da digitalização, para a maioria das aplicações históricas, a precisão extrema não é necessária). +- Uma janela de atributos aparecerá. Deixe o campo 'id' em branco (no momento da escrita, o QGIS criará dois campos 'id' e este é desnecessário). No campo 'Assentamento', digite 'Charlottetown'. No campo 'Ano', digite '1764'. Clique em 'OK'. +Vamos agora repetir as etapas que realizámos com 'Charlottetown' para 'Montague', 'Summerside' e 'Cavendish' (novamente, pode encontrar esses locais adicionando as camadas 'PEI_nomes de local'). Encontre 'Montague' no mapa, selecione o botão de feição com 'três pontos' e clique em Montague no mapa. Quando a janela 'Atributos' aparecer, insira 'Montague' e '1732' nos campos apropriados. Repita para 'Summerside (1876)' e 'Cavendish (1790)'. + +{% include figure.html filename="Figura11.jpg" caption="Figura 11" %} + +Na janela 'Camadas', desmarque 'PEI_CumminsMap1927' e selecione 'PEI_HollandMap1798'. Agora vamos identificar dois assentamentos ('Princetown' e 'Havre-St-Pierre') que já não existem. + +- Para localizar 'Princetown', procure 'Richmond Bay' e 'Cape Aylebsury' (na costa norte a oeste de 'Cavendish'), aqui você encontrará 'Princetown' (sombreado) perto da fronteira entre o amarelo e o azul. + +- Se consultar a [entrada da Wikipedia](https://pt.wikipedia.org/wiki/Ilha_do_Pr%C3%ADncipe_Eduardo) desta cidade, notará que por causa de um porto raso, 'Princetown' não se tornou um assentamento importante. Foi renomeado em 1947 e, posteriormente, rebaixado para uma aldeia. Por esse motivo, incluiremos 1947 como a data final para este assentamento. + +- Com o cursor do mouse (em formato de cruz), clique em 'Princetown'. Na 'tabela de atributos' que aparece, coloque 'Princetown' no campo 'Assentamento', coloque '1764' no campo 'Ano' e coloque '1947' em 'Ano_Final'. Clique 'OK'. + +{% include figure.html filename="Figura12.jpg" caption="Figura 12" %} + +- Clique no ícone 'Salvar edições' na barra de menu (fica entre 'Alternar' e 'Adicione Feição'). + +- Clique duas vezes na camada de 'Assentamentos' na janela 'Camadas', escolha a guia 'Etiquetas' na parte superior da janela seguinte. Clique na caixa ao lado de 'Mostrar etiquetas'. Em Campo contendo rótulo, selecione 'Ano' (se necessário), altere o tamanho da fonte para 18,0, altere 'Posicionamento para Acima à esquerda' e clique em 'OK'. + +Na costa norte do 'lote 39', entre 'Britain's Pond' e 'St. Peters Bay', colocaremos agora um ponto para a localização de uma aldeia há muito perdida chamada 'Havre-St-Pierre'. + +- 'Havre-St-Pierre' foi o primeiro assentamento acadiano da ilha, mas está desabitado desde a deportação dos acadianos em 1758. + +- Com o cursor do mouse (em formato de cruz), clique em 'Havre-St. Pierre'. Na 'tabela de Atributos' que aparece, coloque 'Havre-St-Pierre' no campo 'Assentamento', coloque '1720' no campo 'Ano' e '1758' em 'Ano_Final'. Clique 'OK'. + +{% include figure.html filename="pei13.png" caption="Figura 13" %} + +Agora vamos criar outra camada vetorial: um vetor linha. Clique em 'Camada' -> 'Nova' -> 'Nova Camada Shapefile'. A janela 'Nova Camada Vetorial' aparecerá (na categoria 'Tipo', no topo, selecione 'Linha') + +- Clique no botão 'Especificar CRS' e selecione 'NAD83 (CSRS98) / Prince Edward Isl. Estereográfico (EPSG: 2291)' e clique em 'OK'. +- Em 'Novo atributo', no campo ao lado de 'Nome', digite 'Nome_Estrada'. +- Clique em 'Adicionar campos à lista'. + +Crie um segundo atributo: + +- Em 'Novo atributo', no campo ao lado de 'Nome', digite 'Ano'. +- Mude o 'Tipo' para 'Número Inteiro'. +- Clique em 'Adicionar à lista de Atributos'. +- Para terminar de criar este ficheiro, clique em 'OK' na parte inferior direita da janela 'Nova Camada Vetorial'. Uma tela para 'salvar' aparece - chame-a de 'estradas' e salve-a com seus outros ficheiros SIG. + +Vamos agora traçar as estradas do 'mapa de 1798' para que possamos compará-las com as estradas atuais. Certifique-se de ter as camadas 'PEI_Holland1798' e 'Assentamentos' marcadas na janela de 'Camadas'. Selecione a camada 'estradas' na janela de 'camadas', selecione 'Alternar Edição' na barra de ferramentas superior e selecione 'Adicionar Feição'. + +{% include figure.html filename="pei14.png" caption="Figura 14" %} + +- Primeiro trace a estrada de 'Charlottetown' a 'Princetown'. Clique em 'Charlottetown' e depois clique repetidamente em pontos ao longo da estrada para 'Princetown' e verá a linha a ser criada. Repita até chegar a 'Princetown' e clique com o botão direito. Na janela 'Atributos' - estrada que aparece, no campo 'Nome', insira 'para Princetown' e no campo 'Ano' insira '1798'. Clique em 'OK'. + +{% include figure.html filename="pei15.png" caption="Figura 15" %} + +- Repita esta etapa para mais 3 a 4 estradas encontradas no 'PEI_HollandMap1798'. + +- Clique em 'Salvar mudanças' e, em seguida, clique em 'Alternar Edição' para desligá-lo. + +Desmarque 'PEI_HollandMap1798' na janela 'Camadas' e selecione o mapa 'PEI_highway'. Compare as estradas representadas no mapa 'PEI_highway' (as linhas vermelhas pontilhadas) com as estradas que você acabou de traçar. + +{% include figure.html filename="pei16.png" caption="Figura 16" %} + +- Podemos ver que algumas dessas estradas correspondem exatamente às estradas atuais, enquanto outras não correspondem de forma alguma. Seriam necessárias mais pesquisas históricas para determinar se isso ocorre simplesmente porque o mapa da Holanda não representa suficientemente as estradas na época, ou se as estradas mudaram consideravelmente desde então. + +Agora crie um terceiro tipo de camada vetorial: um vetor poligonal. Clique em 'Camada' -> 'Nova' -> 'Nova Camada Vetorial'. A janela 'Nova Camada Vetorial' aparecerá - na categoria 'Tipo', no topo, selecione 'Polígono'. + +- Clique no botão 'Selecione o SRC' e selecione 'NAD83 (CSRS98) / Prince Edward Isl. Estereográfico (EPSG: 2291)' e clique em 'OK'. +- Em 'Novo Atributo', no campo ao lado de 'Nome', digite 'nome_lote' no campo ao lado de 'Ano'. +- Clique em 'Adicionar campos à lista'. + +Crie um segundo atributo: + +- Em 'Novo atributo', no campo ao lado de 'Nome', digite 'Ano'. +- Mude o 'Tipo' para 'Número Inteiro'. +- Clique em 'Adicionar à lista de Atributos'. + +{% include figure.html filename="Figura17.jpg" caption="Figura 17" %} + +Comece criando um polígono para o 'Lote 66', que é o único lote retangular na ilha. + +- Clique em 'Alternar Edição' na barra de ferramentas superior e, em seguida, clique em 'Adicionar Feição'. +- Clique nos quatro cantos do 'lote 66' e você verá um polígono criado. +- Clique com o botão direito no canto final e uma janela de 'Atributos' aparecerá. Adicione '66' ao campo 'nome_lote' e adicione '1764' (o ano em que esses lotes foram inventariados) ao campo 'Ano'. + +{% include figure.html filename="Figura18.jpg" caption="Figura 18" %} + +Agora vamos rastrear o 'Lote 38', que fica a oeste de 'Havre-St-Pierre'. Certifique-se de que há uma marca de seleção na caixa ao lado da camada 'PEI_HollandMap1798' na janela 'Camadas'. + +Clique em 'Alternar Edição' na barra de ferramentas superior e, em seguida, clique em 'Adicionar Feição'. + +Trace o contorno do 'Lote 38', que é mais difícil por causa da linha costeira, com a maior precisão possível. Para mostrar a feição 'Ajuste', queremos que trace ao longo da costa atual (o 'ajuste' é uma operação de edição automática que ajusta a feição que você desenhou para coincidir ou alinhar exatamente com as coordenadas e forma de outra feição próxima). + +- Selecione 'Configurações'-> 'Opções de Ajuste'. + +{% include figure.html filename="Figura19.jpg" caption="Figura 19" %} + +- Uma janela de 'opções de ajuste' irá abrir: clique na caixa ao lado de 'coast_polygon', para a categoria 'Modo' selecione 'vértice e segmento', para 'Tolerância' selecione '10.0', e para 'Unidades' selecione 'pixels'. Clique 'OK'. +- +{% include figure.html filename="Figura20.jpg" caption="Figura 20" %} + +Certifique-se de que a camada de 'lotes' esteja selecionada na janela 'Camadas' e selecione 'Adicionar feição' na barra de ferramentas. + +- Com o cursor, clique nos dois cantos inferiores do polígono, assim como fez com o 'lote 38'. Na linha costeira, você notará que tem uma coleção de linhas para traçar ao redor do 'Savage Harbour'. É aqui que os recursos de aderência se tornam úteis. Enquanto traçar a linha ao longo da costa atual, sua precisão aumentará significativamente, encaixando os 'cliques' diretamente no topo da linha existente. Quanto mais 'cliques' você fizer, mais preciso será, mas tenha em mente que, para muitos fins de SIGH (SIG histórico), obter extrema precisão às vezes produz retornos decrescentes. + +{% include figure.html filename="pei21.png" caption="Figura 21" %} + +Quando terminar de traçar e criar o polígono, selecione e desmarque as várias 'camadas' que criou, comparando e vendo quais relações pode deduzir. +No Google Earth, havia limitações nos tipos de 'feições', 'atributos' e dados fornecidos, e o Google Earth fez grande parte do trabalho por si. Isso é bom quando está aprendendo ou deseja criar mapas rapidamente. A vantagem de usar o software QGIS para criar novas camadas vetoriais é a liberdade e controle sobre os tipos de dados que se pode usar e as 'feições' e 'atributos' que se podem criar. Assim, é possível criar mapas personalizados e ir muito além do que pode ser alcançado no Google Earth ou no Google Maps Engine Lite. Viu isso em primeira mão com as camadas vetoriais de pontos, linhas e polígonos que aprendeu a criar nesta lição. Se tiver dados sobre, por exemplo, registros de saúde pública no século XVIII, pode criar uma nova camada mostrando a distribuição de surtos de febre tifoide e ver se há correlações com estradas e assentamentos principais. Além disso, o software SIG permite não apenas representar e apresentar dados espaciais de maneiras mais sofisticadas, mas também analisar e criar novos dados que não seriam possíveis de outra forma. + +**Aprendeu como criar camadas vetoriais. Certifique-se de salvar seu trabalho!** + +1 É possível identificar a palavra 'feição', em traduções no QGIS BR, ao referir os três tipos de 'formas' ou 'geometrias' usadas nas camadas vetoriais dos SIG. Mas, isto cria uma diferença entre as versões do QGIS BR e QGIS PT. + +*Esta lição é parte do [Geospatial Historian][].* + + [Intro to Google Maps and Google Earth]: /lessons/googlemaps-googleearth + [Installing QGIS 2.0 and Adding Layers]: /lessons/qgis-layers + [PEI_Holland map]: /assets/vector-layers-qgis/PEI_HollandMap1798_compLZW.tif + [Georeferencing in QGIS 2.0]: /lessons/georeferencing-qgis + [Wikipedia entry]: https://en.wikipedia.org/wiki/Prince_Royalty,_Prince_Edward_Island + [Geospatial Historian]: https://geospatialhistorian.wordpress.com/ diff --git a/pt/licoes/contagem-mineracao-dados-investigacao-unix.md b/pt/licoes/contagem-mineracao-dados-investigacao-unix.md index f9f8c657c1..f22c27510b 100644 --- a/pt/licoes/contagem-mineracao-dados-investigacao-unix.md +++ b/pt/licoes/contagem-mineracao-dados-investigacao-unix.md @@ -1,139 +1,139 @@ ---- -title: Contagem e mineração de dados de investigação com Unix -slug: contagem-mineracao-dados-investigacao-unix -layout: lesson -date: 2014-09-20 -translation_date: 2021-12-17 -authors: -- James Baker -- Ian Milligan -reviewers: -- M. H. Beals -- Allison Hegel -editors: -- Adam Crymble -translator: -- Felipe Lamarca -translation-editor: -- Jimmy Medeiros -translation-reviewer: -- Daniel Bonatto Seco -- Ian Araujo -difficulty: 2 -review-ticket: https://github.com/programminghistorian/ph-submissions/issues/440 -activity: transforming -topics: [data-manipulation] -abstract: "Esta lição examinará como dados de investigação, quando organizados de maneira clara e previsível, podem ser contabilizados e minerados utilizando o shell do Unix." -original: research-data-with-unix -avatar_alt: Um diagrama de um mineiro classificando minério com um aparelho -doi: 10.46430/phpt0019 ---- - -{% include toc.html %} - -# Contagem e mineração de dados de investigação com Unix - -## Introdução - -Esta lição examinará como dados de investigação, quando organizados de maneira clara e previsível, podem ser contabilizados e minerados utilizando o shell do Unix. Esta lição se baseia nas lições "[Preservar seus dados de investigação](/pt/licoes/preservar-os-seus-dados-de-investigacao)" e "[Introduction to the Bash Command Line](/en/lessons/intro-to-bash)" (em inglês). Dependendo do quão confiante estiver no uso do shell do Unix, ela também pode ser usada como uma lição independente ou uma revisão. - -Uma vez acumulados dados de investigação para um projeto, um historiador pode fazer diferentes perguntas aos mesmos dados durante um projeto subsequente. Caso estes dados estejam espalhados em vários ficheiros - uma série de dados tabulares, um conjunto de textos transcritos, uma coleção de imagens - eles podem ser contabilizados e minerados utilizando comandos Unix simples. - -O shell do Unix oferece acesso a uma ampla gama de comandos que podem transformar o modo como você contabiliza e minera dados de investigação. Essa lição irá apresentá-lo a uma série de comandos que usam contagem e mineração de dados tabulares, embora eles apenas arranhem a superfície do que o shell do Unix pode fazer. Ao aprender apenas alguns comandos simples, será capaz de realizar tarefas que são impossíveis no Libre Office Calc, Microsoft Excel ou outros programas de estatística similares. Esses comandos podem facilmente ter o seu uso estendido para dados não-estruturados. - -Essa lição também irá demonstrar que as opções disponíveis para manipulação, contagem e mineração de dados geralmente dependem da quantidade de metadados, ou texto descritivo, contidos nos nomes dos ficheiros dos dados que estiver utilizando, tanto quanto da gama de comandos Unix que aprendeu a utilizar. Portanto, ainda que não seja um pré-requisito do trabalho com o shell do Unix, reservar um momento para estruturar os seus dados de investigação e convenções de nomes de ficheiros de uma maneira consistente e previsível é certamente um passo significativo para aproveitar ao máximo os comandos Unix e ser capaz de contar e minerar os seus dados de investigação. Para entender a importância de dedicar um tempo a tornar os seus dados consistentes e previsíveis, além de questões de preservação, consulte: "[Preservar seus dados de investigação](/pt/licoes/preservar-os-seus-dados-de-investigacao)". - -_____ - -## Software e configuração - -Usuários de Windows precisarão instalar o Git Bash. Ele pode ser instalado fazendo o download do instalador mais recente na [página web do git for windows](https://gitforwindows.org/) (em inglês). Instruções de instalação estão disponíveis na [documentação do Git for Windows](https://github.com/git-for-windows/git/wiki/Technical-overview) (em inglês). - -Usuários de OS X e Linux deverão utilizar os próprios terminais para seguir esta lição, como foi discutido em "[Introduction to the Bash Command Line](/en/lessons/intro-to-bash)" (em inglês). - -Esta lição foi revista utilizando o Git Bash 2.34.1 e o sistema operacional Windows 10. Caminhos de ficheiro equivalentes para OS X/Linux foram incluídos sempre que possível. No entanto, como os comandos e flags podem mudar ligeiramente entre os sistemas operacionais OS X/Linux, sugere-se que os usuários verifiquem Deborah S. Ray e Eric J. Ray, "[*Unix and Linux: Visual Quickstart Guide*](https://www.worldcat.org/title/unix-and-linux/oclc/308171076&referer=brief_results)", 4ª edição, que cobre a interoperabilidade em maiores detalhes. - -Os ficheiros utilizados nesta lição estão disponíveis em "[Figshare](https://doi.org/10.6084/m9.figshare.1172094)" (em inglês). Os dados contêm os metadados para artigos de periódicos categorizados em 'History' no banco de dados ESTAR da British Library. Os dados são compartilhados sob isenção dos direitos autorais CC0. - -Faça o download dos ficheiros necessários, salve-os no seu computador e descompacte-os. Caso não tenha um software padrão para lidar com ficheiros .zip, recomendamos [7-zip](http://www.7-zip.org/) (em inglês) para este propósito. No Windows, recomendamos descompactar a pasta em sua unidade C: para que os ficheiros estejam em `c:\proghist\`. No entanto, qualquer localização servirá, mas precisará ajustar os seus comandos à medida que for avançando na lição caso use uma localização diferente. No caso de OS X ou Linux, recomendamos de modo similar que descompacte os ficheiros no seu diretório de usuário, de modo que eles apareçam em `/usuario/NOME-DE-USUARIO/proghist/`. Em ambos os casos, isso significa que, ao abrir uma nova janela de terminal, pode simplesmente digitar `cd proghist` para mover para o diretório correto (no Windows, se o comando referido não resultar, poderá ter de digitar `cd C:\proghist` para acessar o diretório). - -_____ - -## Contabilizando ficheiros - -Você começará esta lição contabilizando os conteúdos dos ficheiros utilizando o shell do Unix. O shell do Unix pode ser usado para rapidamente gerar contagens de ficheiros, algo difícil de se conseguir usando interfaces gráficas de usuário (do inglês, *Graphical User Interfaces* - GUI) de suítes padrão de escritório, como o pacote Office, por exemplo. - -Abra o shell do Unix e navegue até o diretório que contém os nossos dados, o subdiretório `data` do diretório `proghist`. Lembre-se: caso não tenha certeza de onde está na sua estrutura de diretórios, digite `pwd` e use o comando `cd` para mover para onde precisa estar. A estrutura de diretórios é um pouco diferente entre OS X/Linux e Windows: no primeiro caso, o diretório está em um formato como `~/usuario/NOME-DE-USUARIO/proghist/data`, e no Windows o formato é do tipo `c:\proghist\data`. - -Digite `ls` e pressione a tecla Enter. Isso exibe uma lista que inclui dois ficheiros e um subdiretório. - -Os ficheiros nesse diretório são a base de dados `2014-01_JA.csv`, que contém os metadados dos artigos de periódico, e um ficheiro contendo a documentação a respeito do `2014-01_JA.csv` chamado `2014-01_JA.txt`. - -O subdiretório é nomeado como `derived_data`. Ele contém quatro ficheiros [.tsv](http://en.wikipedia.org/wiki/Tab-separated_values) derivados do `2014-01_JA.csv`. Cada um deles inclui todos os dados em que uma palavra-chave como `africa` ou `america` aparece no campo `Title` do `2014-01_JA.csv`. O diretório `derived_data` também inclui um subdiretório chamado `results`. - -*Nota: Ficheiros [CSV](https://pt.wikipedia.org/wiki/Comma-separated_values) são aqueles nos quais as unidades de dados (ou células) são separadas por vírgula (comma-separated-values) e ficheiros TSV são aqueles nos quais as unidades são separadas por tabulação. Ambos podem ser lidos em editores de texto simples ou em programas de estatística como Libre Office Calc ou Microsoft Excel.* - -Antes de começar a trabalhar com esses ficheiros, deve mover-se para dentro do diretório no qual eles estão armazenados. Navegue até `c:\proghist\data\derived_data` no Windows ou `~/usuario/NOME-DE-USUARIO/proghist/data/derived_data` no OS X/Linux. - -Agora que já está aqui, pode contabilizar o conteúdo dos ficheiros. - -No Unix, o comando `wc` é usado para contar os conteúdos de um ficheiro ou de uma série de ficheiros. Digite `wc -w 2014-01-31_JA_africa.tsv` e pressione a tecla Enter. A flag `-w` combinado com `wc` instrui o computador a exibir no shell uma contagem de palavras e o nome do ficheiro que foi contabilizado. - -Como foi visto no "[Introduction to the Bash Command Line](/en/lessons/intro-to-bash)", flags como `-w` são parte essencial para aproveitar ao máximo o shell do Unix, uma vez que eles oferecem melhor controle sobre os comandos. - -Se a sua investigação está mais interessada no número de entradas (ou linhas) do que no número de palavras, pode utilizar a flag de contagem de linhas. Digite `wc -l 2014-01-31_JA_africa.tsv` e pressione Enter. Combinado com o `wc`, a flag `-l` exibe uma contagem de linhas e o nome do ficheiro que foi contabilizado. - -Finalmente, digite `wc -c 2014-01-31_JA_africa.tsv` e pressione Enter. Isso usa a flag `-c` combinado com o comando `wc` para exibir uma contagem de caracteres do `2014-01-31_JA_africa.tsv`. - -*Nota: Usuários de OS X e Linux devem substituir a flag `-c` por `-m`.* - -Com essas três flags, o uso mais simples que um historiador pode fazer do comando `wc` é comparar o formato das fontes no formato digital - por exemplo, a contagem do número de palavras por página de um livro, a distribuição de caracteres por página ao longo de uma coleção de jornais, o comprimento médio das linhas usadas pelos poetas. Também pode utilizar `wc` com uma combinação de curingas / caracteres variáveis (*wildcards*) e flags para construir *queries* mais complexas. Digite `wc -l 2014-01-31_JA_a*.tsv` e pressione Enter. Isso exibe a contagem de linhas para `2014-01-31_JA_africa.tsv` e `2014-01-31_JA_america.tsv`, além da soma das linhas destes ficheiros, oferecendo uma maneira simples de comparar esses dois conjuntos de dados de investigação. Claro, pode ser mais rápido comparar a contagem de linhas desses dois documentos no Libre Office Calc, Microsoft Excel ou outro programa similar. Mas quando desejar comparar a contagem de linhas de dezenas, centenas ou milhares de documentos, o shell do Unix tem uma clara vantagem em velocidade. - -Além disso, à medida que os nossos conjuntos de dados aumentam de tamanho, pode utilizar o shell do Unix para fazer mais do que copiar essas contagens de linha manualmente, com capturas de tela ou com métodos de copiar e colar. Ao utilizar o operador de redirecionamento `>` pode exportar os resultados da sua *query* em um novo ficheiro. Digite `wc -l 2014-01-31_JA_a*.tsv > results/2014-01-31_JA_a_wc.txt` e pressione Enter. Isso executa a mesma *query* anterior, mas, ao invés de exibir os resultados no shell do Unix, ele salva os resultados como `2014-01-31_JA_a_wc.txt`. Ao preceder com `results/`, ele move o ficheiro .txt para o subdiretório `results`. Para verificar isso, navegue até ao subdiretório `results`, pressione Enter, digite `ls` e pressione Enter mais uma vez para ver este ficheiro listado em `c:\proghist\data\derived_data\results` no Windows ou `/usuario/NOME-DE-USUARIO/proghist/data/derived_data/results` no OS X/Linux. - -## Minerando ficheiros - -O shell do Unix pode fazer muito mais do que contar palavras, caracteres e linhas de um ficheiro. O comando `grep` (que significa '*global regular expression print*') é usado para buscar *strings* (cadeias de caracteres) específicas ao longo de múltiplos ficheiros. Ele é capaz de fazer isso muito mais rapidamente do que interfaces gráficas de busca oferecidas pela maioria dos sistemas operacionais ou suítes de escritório. Combinado com o operador `>`, o comando `grep` se torna uma ferramenta de investigação poderosa, que pode ser usada para minerar os seus dados em busca de características ou grupos de palavras que aparecem ao longo de múltiplos ficheiros e então exportar esses dados para um novo ficheiro. As únicas limitações aqui são a sua imaginação, o formato dos seus dados e - quando trabalhando com milhares ou milhões de ficheiros - o poder de processamento ao seu dispor. - -Para começar a utilizar o `grep`, primeiro navegue até o diretório `derived_data` (`cd ..`). Aqui digite `grep 1999 *.tsv` e pressione Enter. Essa *query* busca em todos os ficheiros no diretório que se enquadram nos critérios fornecidos (os ficheiros .tsv) por instâncias da *string*, ou cluster de caracteres, '1999'. Em seguida, exibe no shell. - -
    -Há uma grande quantidade de dados a serem exibidos. Então, caso fique entediado, pressione `ctrl+c` para cancelar a ação. Ctrl+c é utilizado para cancelar qualquer processo no shell do Unix. -
    - -Pressione a seta para cima uma vez para voltar à ação mais recente. Altere `grep 1999 *.tsv` para `grep -c 1999 *.tsv` e pressione Enter. O shell irá agora exibir o número de vezes que a *string* '1999' apareceu em cada um dos ficheiros .tsv. Volte à linha anterior novamente, altere para `grep -c 1999 2014-01-31_JA_*.tsv > results/2014-01-31_JA_1999.txt` e pressione Enter. Essa *query* procura instâncias da *string* '1999' em todos os documentos que se adequam aos critérios e as salva em `2014-01-31_JA_1999.txt` no subdiretório `results`. - -*Strings* não precisam ser números. `grep -c revolution 2014-01-31_JA_america.tsv 2014-02-02_JA_britain.tsv`, por exemplo, conta todas as instâncias da *string* `revolution` dentro dos ficheiros definidos e exibe essas contagens no shell. Execute esse comando e o altere para `grep -ci revolution 2014-01-31_JA_america.tsv 2014-02-02_JA_britain.tsv`. Isso repete a *query*, mas imprime um resultado que não diferencia maiúsculas de minúsculas, combinando a flag -i com -c, (incluindo instâncias `revolution` e `Revolution`). Note que a contagem aumentou quase 30 vezes para os títulos de artigos de períodicos que contêm a palavra-chave `revolution`. Como antes, voltar ao comando anterior e adicionar `> results/`, seguido do nome do ficheiro (idealmente no formato .txt), armazenará os resultados em um ficheiro. - -Também pode utilizar o `grep` para criar subconjuntos de dados tabulares. Digite `grep -i revolution 2014-01-31_JA_america.tsv 2014-02-02_JA_britain.tsv > ANO-MES-DIA_JA_america_britain_i_revolution.tsv` (onde `ANO-MES-DIA` é a data em que você está completando esta lição) e pressione Enter. Este comando verifica ambos os ficheiros definidos e exporta todas as linhas contendo `revolution` (sem diferenciar maiúsculas de minúsculas) para o ficheiro .tsv especificado. - -O dado não foi salvo ao diretório `results` porque ele não é estritamente um resultado; é um dado derivado. Dependendo do seu projeto de investigação, pode ser mais fácil armazenar isso em outro subdiretório. Por enquanto, dê uma olhada neste ficheiro para verificar o seu conteúdo e, quando estiver satisfeito, delete-o usando o comando `rm`. - -*Nota: O comando `rm` é muito poderoso e deve ser usado com cautela. Por favor, verifique "[Introduction to the Bash Command Line](/en/lessons/intro-to-bash)" (em inglês) para instruções de como utilizar esse comando corretamente.* - -Finalmente, pode usar outra flag, `-v`, para excluir elementos ao usar o comando `grep`. Digite `grep -iv revolution 2014*_JA_a*.tsv > 2014_JA_iv_revolution.csv` e pressione Enter. Essa *query* busca nos ficheiros definidos (três no total) e exporta todas as linhas que não contêm `revolution` ou `Revolution` ao `c:\proghist\data\derived_data\2014_JA_iv_revolution.csv`. - -Note que transformou os dados de um formato para outro - de .tsv para .csv. Frequentemente há uma perda de estrutura dos dados ao realizar essas transformações. Para observar isso, execute `grep -iv revolution 2014*_JA_a*.tsv > 2014_JA_iv_revolution.tsv` e abra os ficheiros .csv e .tsv no Libre Office Calc, Microsoft Excel, ou outro programa similar. Observe as diferenças no delineamento da coluna entre os dois ficheiros. - -*Resumo* - -Agora no shell do Unix você pode: - -- usar o comando `wc` com as flags `-w` e `-l` para contar as palavras e linhas de um ficheiro ou uma série de ficheiros. -- usar o redirecionador ou estrutura `subdiretório/nome-do-ficheiro` para armazenar os resultados em um subdiretório. -- usar o comando `grep` para buscar por instâncias de uma *string*. -- usar `grep` com a flag `-c` para contar instâncias de uma *string*, a flag `-i` para retornar buscas por *strings* ignorando diferenças entre maiúsculas e minúsculas, e a flag `-v` para excluir uma *string* dos resultados. -- combinar esses comandos e flags para construir *queries* complexas de uma forma que sugere o potencial de uso do shell do Unix para contabilizar e minerar os seus dados de investigação e projetos de investigação. - -_____ - -#### Conclusão - -Nessa lição aprendeu a executar contagens básicas em ficheiros, realizar *queries* em dados de investigação em busca de *strings* comuns e armazenar resultados e dados derivados. Ainda que essa lição seja restrita ao uso do shell do Unix para contabilizar e minerar dados tabulares, os processos podem facilmente ser estendidos a textos livres. Para isso, recomendamos dois guias escritos por William Turkel: - -- William Turkel, '[Basic Text Analysis with Command Line Tools in Linux](http://williamjturkel.net/2013/06/15/basic-text-analysis-with-command-line-tools-in-linux/)' (15 de junho de 2013) -- William Turkel, '[Pattern Matching and Permuted Term Indexing with Command Line Tools in Linux](http://williamjturkel.net/2013/06/20/pattern-matching-and-permuted-term-indexing-with-command-line-tools-in-linux/)' (20 de junho de 2013) - -Como essas recomendações sugerem, a presente lição apenas aborda superficialmente o que o ambiente do shell do Unix é capaz de fazer. Espera-se, no entanto, que tenha oferecido uma prova suficiente para estimular uma investigação mais aprofundada e uma prática produtiva. - -Para muitos historiadores, o potencial total dessas ferramentas deve surgir somente ao incorporar essas habilidades em um projeto de investigação real. Uma vez que a sua investigação cresce e, com isso, os seus dados de investigação, ser capaz de manipular, contabilizar e minerar milhares de ficheiros será extremamente útil. Caso opte por trabalhar nesta lição e investigar o shell do Unix mais a fundo, descobrirá que mesmo uma grande coleção de ficheiros que não contêm quaisquer elementos de dados alfanuméricos, como ficheiros de imagem, podem ser facilmente classificados, selecionados e consultados em um shell do Unix. +--- +title: Contagem e mineração de dados de investigação com Unix +slug: contagem-mineracao-dados-investigacao-unix +layout: lesson +date: 2014-09-20 +translation_date: 2021-12-17 +authors: +- James Baker +- Ian Milligan +reviewers: +- M. H. Beals +- Allison Hegel +editors: +- Adam Crymble +translator: +- Felipe Lamarca +translation-editor: +- Jimmy Medeiros +translation-reviewer: +- Daniel Bonatto Seco +- Ian Araujo +difficulty: 2 +review-ticket: https://github.com/programminghistorian/ph-submissions/issues/440 +activity: transforming +topics: [data-manipulation] +abstract: "Esta lição examinará como dados de investigação, quando organizados de maneira clara e previsível, podem ser contabilizados e minerados utilizando o shell do Unix." +original: research-data-with-unix +avatar_alt: Um diagrama de um mineiro classificando minério com um aparelho +doi: 10.46430/phpt0019 +--- + +{% include toc.html %} + +# Contagem e mineração de dados de investigação com Unix + +## Introdução + +Esta lição examinará como dados de investigação, quando organizados de maneira clara e previsível, podem ser contabilizados e minerados utilizando o shell do Unix. Esta lição se baseia nas lições "[Preservar seus dados de investigação](/pt/licoes/preservar-os-seus-dados-de-investigacao)" e "[Introduction to the Bash Command Line](/en/lessons/intro-to-bash)" (em inglês). Dependendo do quão confiante estiver no uso do shell do Unix, ela também pode ser usada como uma lição independente ou uma revisão. + +Uma vez acumulados dados de investigação para um projeto, um historiador pode fazer diferentes perguntas aos mesmos dados durante um projeto subsequente. Caso estes dados estejam espalhados em vários ficheiros - uma série de dados tabulares, um conjunto de textos transcritos, uma coleção de imagens - eles podem ser contabilizados e minerados utilizando comandos Unix simples. + +O shell do Unix oferece acesso a uma ampla gama de comandos que podem transformar o modo como você contabiliza e minera dados de investigação. Essa lição irá apresentá-lo a uma série de comandos que usam contagem e mineração de dados tabulares, embora eles apenas arranhem a superfície do que o shell do Unix pode fazer. Ao aprender apenas alguns comandos simples, será capaz de realizar tarefas que são impossíveis no Libre Office Calc, Microsoft Excel ou outros programas de estatística similares. Esses comandos podem facilmente ter o seu uso estendido para dados não-estruturados. + +Essa lição também irá demonstrar que as opções disponíveis para manipulação, contagem e mineração de dados geralmente dependem da quantidade de metadados, ou texto descritivo, contidos nos nomes dos ficheiros dos dados que estiver utilizando, tanto quanto da gama de comandos Unix que aprendeu a utilizar. Portanto, ainda que não seja um pré-requisito do trabalho com o shell do Unix, reservar um momento para estruturar os seus dados de investigação e convenções de nomes de ficheiros de uma maneira consistente e previsível é certamente um passo significativo para aproveitar ao máximo os comandos Unix e ser capaz de contar e minerar os seus dados de investigação. Para entender a importância de dedicar um tempo a tornar os seus dados consistentes e previsíveis, além de questões de preservação, consulte: "[Preservar seus dados de investigação](/pt/licoes/preservar-os-seus-dados-de-investigacao)". + +_____ + +## Software e configuração + +Usuários de Windows precisarão instalar o Git Bash. Ele pode ser instalado fazendo o download do instalador mais recente na [página web do git for windows](https://gitforwindows.org/) (em inglês). Instruções de instalação estão disponíveis na [documentação do Git for Windows](https://github.com/git-for-windows/git/wiki/Technical-overview) (em inglês). + +Usuários de OS X e Linux deverão utilizar os próprios terminais para seguir esta lição, como foi discutido em "[Introduction to the Bash Command Line](/en/lessons/intro-to-bash)" (em inglês). + +Esta lição foi revista utilizando o Git Bash 2.34.1 e o sistema operacional Windows 10. Caminhos de ficheiro equivalentes para OS X/Linux foram incluídos sempre que possível. No entanto, como os comandos e flags podem mudar ligeiramente entre os sistemas operacionais OS X/Linux, sugere-se que os usuários verifiquem Deborah S. Ray e Eric J. Ray, "[*Unix and Linux: Visual Quickstart Guide*](https://www.worldcat.org/title/unix-and-linux/oclc/308171076&referer=brief_results)", 4ª edição, que cobre a interoperabilidade em maiores detalhes. + +Os ficheiros utilizados nesta lição estão disponíveis em "[Figshare](https://doi.org/10.6084/m9.figshare.1172094)" (em inglês). Os dados contêm os metadados para artigos de periódicos categorizados em 'History' no banco de dados ESTAR da British Library. Os dados são compartilhados sob isenção dos direitos autorais CC0. + +Faça o download dos ficheiros necessários, salve-os no seu computador e descompacte-os. Caso não tenha um software padrão para lidar com ficheiros .zip, recomendamos [7-zip](https://www.7-zip.org/) (em inglês) para este propósito. No Windows, recomendamos descompactar a pasta em sua unidade C: para que os ficheiros estejam em `c:\proghist\`. No entanto, qualquer localização servirá, mas precisará ajustar os seus comandos à medida que for avançando na lição caso use uma localização diferente. No caso de OS X ou Linux, recomendamos de modo similar que descompacte os ficheiros no seu diretório de usuário, de modo que eles apareçam em `/usuario/NOME-DE-USUARIO/proghist/`. Em ambos os casos, isso significa que, ao abrir uma nova janela de terminal, pode simplesmente digitar `cd proghist` para mover para o diretório correto (no Windows, se o comando referido não resultar, poderá ter de digitar `cd C:\proghist` para acessar o diretório). + +_____ + +## Contabilizando ficheiros + +Você começará esta lição contabilizando os conteúdos dos ficheiros utilizando o shell do Unix. O shell do Unix pode ser usado para rapidamente gerar contagens de ficheiros, algo difícil de se conseguir usando interfaces gráficas de usuário (do inglês, *Graphical User Interfaces* - GUI) de suítes padrão de escritório, como o pacote Office, por exemplo. + +Abra o shell do Unix e navegue até o diretório que contém os nossos dados, o subdiretório `data` do diretório `proghist`. Lembre-se: caso não tenha certeza de onde está na sua estrutura de diretórios, digite `pwd` e use o comando `cd` para mover para onde precisa estar. A estrutura de diretórios é um pouco diferente entre OS X/Linux e Windows: no primeiro caso, o diretório está em um formato como `~/usuario/NOME-DE-USUARIO/proghist/data`, e no Windows o formato é do tipo `c:\proghist\data`. + +Digite `ls` e pressione a tecla Enter. Isso exibe uma lista que inclui dois ficheiros e um subdiretório. + +Os ficheiros nesse diretório são a base de dados `2014-01_JA.csv`, que contém os metadados dos artigos de periódico, e um ficheiro contendo a documentação a respeito do `2014-01_JA.csv` chamado `2014-01_JA.txt`. + +O subdiretório é nomeado como `derived_data`. Ele contém quatro ficheiros [.tsv](https://en.wikipedia.org/wiki/Tab-separated_values) derivados do `2014-01_JA.csv`. Cada um deles inclui todos os dados em que uma palavra-chave como `africa` ou `america` aparece no campo `Title` do `2014-01_JA.csv`. O diretório `derived_data` também inclui um subdiretório chamado `results`. + +*Nota: Ficheiros [CSV](https://pt.wikipedia.org/wiki/Comma-separated_values) são aqueles nos quais as unidades de dados (ou células) são separadas por vírgula (comma-separated-values) e ficheiros TSV são aqueles nos quais as unidades são separadas por tabulação. Ambos podem ser lidos em editores de texto simples ou em programas de estatística como Libre Office Calc ou Microsoft Excel.* + +Antes de começar a trabalhar com esses ficheiros, deve mover-se para dentro do diretório no qual eles estão armazenados. Navegue até `c:\proghist\data\derived_data` no Windows ou `~/usuario/NOME-DE-USUARIO/proghist/data/derived_data` no OS X/Linux. + +Agora que já está aqui, pode contabilizar o conteúdo dos ficheiros. + +No Unix, o comando `wc` é usado para contar os conteúdos de um ficheiro ou de uma série de ficheiros. Digite `wc -w 2014-01-31_JA_africa.tsv` e pressione a tecla Enter. A flag `-w` combinado com `wc` instrui o computador a exibir no shell uma contagem de palavras e o nome do ficheiro que foi contabilizado. + +Como foi visto no "[Introduction to the Bash Command Line](/en/lessons/intro-to-bash)", flags como `-w` são parte essencial para aproveitar ao máximo o shell do Unix, uma vez que eles oferecem melhor controle sobre os comandos. + +Se a sua investigação está mais interessada no número de entradas (ou linhas) do que no número de palavras, pode utilizar a flag de contagem de linhas. Digite `wc -l 2014-01-31_JA_africa.tsv` e pressione Enter. Combinado com o `wc`, a flag `-l` exibe uma contagem de linhas e o nome do ficheiro que foi contabilizado. + +Finalmente, digite `wc -c 2014-01-31_JA_africa.tsv` e pressione Enter. Isso usa a flag `-c` combinado com o comando `wc` para exibir uma contagem de caracteres do `2014-01-31_JA_africa.tsv`. + +*Nota: Usuários de OS X e Linux devem substituir a flag `-c` por `-m`.* + +Com essas três flags, o uso mais simples que um historiador pode fazer do comando `wc` é comparar o formato das fontes no formato digital - por exemplo, a contagem do número de palavras por página de um livro, a distribuição de caracteres por página ao longo de uma coleção de jornais, o comprimento médio das linhas usadas pelos poetas. Também pode utilizar `wc` com uma combinação de curingas / caracteres variáveis (*wildcards*) e flags para construir *queries* mais complexas. Digite `wc -l 2014-01-31_JA_a*.tsv` e pressione Enter. Isso exibe a contagem de linhas para `2014-01-31_JA_africa.tsv` e `2014-01-31_JA_america.tsv`, além da soma das linhas destes ficheiros, oferecendo uma maneira simples de comparar esses dois conjuntos de dados de investigação. Claro, pode ser mais rápido comparar a contagem de linhas desses dois documentos no Libre Office Calc, Microsoft Excel ou outro programa similar. Mas quando desejar comparar a contagem de linhas de dezenas, centenas ou milhares de documentos, o shell do Unix tem uma clara vantagem em velocidade. + +Além disso, à medida que os nossos conjuntos de dados aumentam de tamanho, pode utilizar o shell do Unix para fazer mais do que copiar essas contagens de linha manualmente, com capturas de tela ou com métodos de copiar e colar. Ao utilizar o operador de redirecionamento `>` pode exportar os resultados da sua *query* em um novo ficheiro. Digite `wc -l 2014-01-31_JA_a*.tsv > results/2014-01-31_JA_a_wc.txt` e pressione Enter. Isso executa a mesma *query* anterior, mas, ao invés de exibir os resultados no shell do Unix, ele salva os resultados como `2014-01-31_JA_a_wc.txt`. Ao preceder com `results/`, ele move o ficheiro .txt para o subdiretório `results`. Para verificar isso, navegue até ao subdiretório `results`, pressione Enter, digite `ls` e pressione Enter mais uma vez para ver este ficheiro listado em `c:\proghist\data\derived_data\results` no Windows ou `/usuario/NOME-DE-USUARIO/proghist/data/derived_data/results` no OS X/Linux. + +## Minerando ficheiros + +O shell do Unix pode fazer muito mais do que contar palavras, caracteres e linhas de um ficheiro. O comando `grep` (que significa '*global regular expression print*') é usado para buscar *strings* (cadeias de caracteres) específicas ao longo de múltiplos ficheiros. Ele é capaz de fazer isso muito mais rapidamente do que interfaces gráficas de busca oferecidas pela maioria dos sistemas operacionais ou suítes de escritório. Combinado com o operador `>`, o comando `grep` se torna uma ferramenta de investigação poderosa, que pode ser usada para minerar os seus dados em busca de características ou grupos de palavras que aparecem ao longo de múltiplos ficheiros e então exportar esses dados para um novo ficheiro. As únicas limitações aqui são a sua imaginação, o formato dos seus dados e - quando trabalhando com milhares ou milhões de ficheiros - o poder de processamento ao seu dispor. + +Para começar a utilizar o `grep`, primeiro navegue até o diretório `derived_data` (`cd ..`). Aqui digite `grep 1999 *.tsv` e pressione Enter. Essa *query* busca em todos os ficheiros no diretório que se enquadram nos critérios fornecidos (os ficheiros .tsv) por instâncias da *string*, ou cluster de caracteres, '1999'. Em seguida, exibe no shell. + +
    +Há uma grande quantidade de dados a serem exibidos. Então, caso fique entediado, pressione `ctrl+c` para cancelar a ação. Ctrl+c é utilizado para cancelar qualquer processo no shell do Unix. +
    + +Pressione a seta para cima uma vez para voltar à ação mais recente. Altere `grep 1999 *.tsv` para `grep -c 1999 *.tsv` e pressione Enter. O shell irá agora exibir o número de vezes que a *string* '1999' apareceu em cada um dos ficheiros .tsv. Volte à linha anterior novamente, altere para `grep -c 1999 2014-01-31_JA_*.tsv > results/2014-01-31_JA_1999.txt` e pressione Enter. Essa *query* procura instâncias da *string* '1999' em todos os documentos que se adequam aos critérios e as salva em `2014-01-31_JA_1999.txt` no subdiretório `results`. + +*Strings* não precisam ser números. `grep -c revolution 2014-01-31_JA_america.tsv 2014-02-02_JA_britain.tsv`, por exemplo, conta todas as instâncias da *string* `revolution` dentro dos ficheiros definidos e exibe essas contagens no shell. Execute esse comando e o altere para `grep -ci revolution 2014-01-31_JA_america.tsv 2014-02-02_JA_britain.tsv`. Isso repete a *query*, mas imprime um resultado que não diferencia maiúsculas de minúsculas, combinando a flag -i com -c, (incluindo instâncias `revolution` e `Revolution`). Note que a contagem aumentou quase 30 vezes para os títulos de artigos de períodicos que contêm a palavra-chave `revolution`. Como antes, voltar ao comando anterior e adicionar `> results/`, seguido do nome do ficheiro (idealmente no formato .txt), armazenará os resultados em um ficheiro. + +Também pode utilizar o `grep` para criar subconjuntos de dados tabulares. Digite `grep -i revolution 2014-01-31_JA_america.tsv 2014-02-02_JA_britain.tsv > ANO-MES-DIA_JA_america_britain_i_revolution.tsv` (onde `ANO-MES-DIA` é a data em que você está completando esta lição) e pressione Enter. Este comando verifica ambos os ficheiros definidos e exporta todas as linhas contendo `revolution` (sem diferenciar maiúsculas de minúsculas) para o ficheiro .tsv especificado. + +O dado não foi salvo ao diretório `results` porque ele não é estritamente um resultado; é um dado derivado. Dependendo do seu projeto de investigação, pode ser mais fácil armazenar isso em outro subdiretório. Por enquanto, dê uma olhada neste ficheiro para verificar o seu conteúdo e, quando estiver satisfeito, delete-o usando o comando `rm`. + +*Nota: O comando `rm` é muito poderoso e deve ser usado com cautela. Por favor, verifique "[Introduction to the Bash Command Line](/en/lessons/intro-to-bash)" (em inglês) para instruções de como utilizar esse comando corretamente.* + +Finalmente, pode usar outra flag, `-v`, para excluir elementos ao usar o comando `grep`. Digite `grep -iv revolution 2014*_JA_a*.tsv > 2014_JA_iv_revolution.csv` e pressione Enter. Essa *query* busca nos ficheiros definidos (três no total) e exporta todas as linhas que não contêm `revolution` ou `Revolution` ao `c:\proghist\data\derived_data\2014_JA_iv_revolution.csv`. + +Note que transformou os dados de um formato para outro - de .tsv para .csv. Frequentemente há uma perda de estrutura dos dados ao realizar essas transformações. Para observar isso, execute `grep -iv revolution 2014*_JA_a*.tsv > 2014_JA_iv_revolution.tsv` e abra os ficheiros .csv e .tsv no Libre Office Calc, Microsoft Excel, ou outro programa similar. Observe as diferenças no delineamento da coluna entre os dois ficheiros. + +*Resumo* + +Agora no shell do Unix você pode: + +- usar o comando `wc` com as flags `-w` e `-l` para contar as palavras e linhas de um ficheiro ou uma série de ficheiros. +- usar o redirecionador ou estrutura `subdiretório/nome-do-ficheiro` para armazenar os resultados em um subdiretório. +- usar o comando `grep` para buscar por instâncias de uma *string*. +- usar `grep` com a flag `-c` para contar instâncias de uma *string*, a flag `-i` para retornar buscas por *strings* ignorando diferenças entre maiúsculas e minúsculas, e a flag `-v` para excluir uma *string* dos resultados. +- combinar esses comandos e flags para construir *queries* complexas de uma forma que sugere o potencial de uso do shell do Unix para contabilizar e minerar os seus dados de investigação e projetos de investigação. + +_____ + +#### Conclusão + +Nessa lição aprendeu a executar contagens básicas em ficheiros, realizar *queries* em dados de investigação em busca de *strings* comuns e armazenar resultados e dados derivados. Ainda que essa lição seja restrita ao uso do shell do Unix para contabilizar e minerar dados tabulares, os processos podem facilmente ser estendidos a textos livres. Para isso, recomendamos dois guias escritos por William Turkel: + +- William Turkel, '[Basic Text Analysis with Command Line Tools in Linux](https://williamjturkel.net/2013/06/15/basic-text-analysis-with-command-line-tools-in-linux/)' (15 de junho de 2013) +- William Turkel, '[Pattern Matching and Permuted Term Indexing with Command Line Tools in Linux](https://williamjturkel.net/2013/06/20/pattern-matching-and-permuted-term-indexing-with-command-line-tools-in-linux/)' (20 de junho de 2013) + +Como essas recomendações sugerem, a presente lição apenas aborda superficialmente o que o ambiente do shell do Unix é capaz de fazer. Espera-se, no entanto, que tenha oferecido uma prova suficiente para estimular uma investigação mais aprofundada e uma prática produtiva. + +Para muitos historiadores, o potencial total dessas ferramentas deve surgir somente ao incorporar essas habilidades em um projeto de investigação real. Uma vez que a sua investigação cresce e, com isso, os seus dados de investigação, ser capaz de manipular, contabilizar e minerar milhares de ficheiros será extremamente útil. Caso opte por trabalhar nesta lição e investigar o shell do Unix mais a fundo, descobrirá que mesmo uma grande coleção de ficheiros que não contêm quaisquer elementos de dados alfanuméricos, como ficheiros de imagem, podem ser facilmente classificados, selecionados e consultados em um shell do Unix. diff --git a/pt/licoes/contar-frequencias-palavras-python.md b/pt/licoes/contar-frequencias-palavras-python.md index 1c9f544cbc..70907d71bc 100644 --- a/pt/licoes/contar-frequencias-palavras-python.md +++ b/pt/licoes/contar-frequencias-palavras-python.md @@ -1,360 +1,360 @@ ---- -title: Contagem de Frequências de Palavras com Python -layout: lesson -slug: contar-frequencias-palavras-python -date: 2012-07-17 -translation_date: 2022-01-13 -authors: -- William J. Turkel -- Adam Crymble -reviewers: -- Jim Clifford -- Frederik Elwert -editors: -- Miriam Posner -translator: -- Felipe Lamarca -translation-editor: -- Jimmy Medeiros -translation-reviewer: -- Ana Carolina Erthal -- Joana Vieira Paulino -difficulty: 2 -review-ticket: https://github.com/programminghistorian/ph-submissions/issues/461 -activity: analyzing -topics: [python] -abstract: "Contar a frequência de palavras específicas de uma lista pode fornecer dados esclarecedores. Esta lição ensinará uma maneira fácil de contar essas frequências com Python." -original: counting-frequencies -avatar_alt: Homem descontente sentado em um tronco cercado por pássaros -doi: 10.46430/phpt0023 ---- - -{% include toc.html %} - -## Objetivos da Lição - -Sua lista agora está limpa o suficiente para que possa começar a analisar seu conteúdo de maneiras significativas. Contar a frequência de palavras específicas de uma lista pode fornecer dados esclarecedores. Python possui uma maneira fácil de contar frequências, mas requer o uso de um novo tipo de variável: o *dicionário*. Antes de começar a trabalhar com um dicionário, considere os processos utilizados para calcular frequências em uma lista. - -### Ficheiros Necessários para esta Lição - -- `obo.py` - -Caso não possua esse ficheiro, pode fazer o *download* do ficheiro ([zip][]) que contém todo o código das lições anteriores desta série. - -## Frequências - -Agora desejamos contar a frequência de cada palavra em nossa lista. Já viu que é fácil de processar uma lista utilizando um `for` *loop*. Tente salvar e executar o exemplo a seguir. Lembre-se de que `+=` informa ao programa para acrescentar algo ao final de uma variável existente. - -``` python -# count-list-items-1.py - -wordstring = 'foi o melhor dos tempos foi o pior dos tempos ' -wordstring += 'foi a idade da sabedoria foi a idade da ignorância' -wordlist = wordstring.split() - -wordfreq = [] -for w in wordlist: - wordfreq.append(wordlist.count(w)) - -print("String\n" + wordstring +"\n") -print("Lista\n" + str(wordlist) + "\n") -print("Frequências\n" + str(wordfreq) + "\n") -print("Pares\n" + str(list(zip(wordlist, wordfreq)))) -``` - -Aqui, começamos com uma string e separamo-la em uma lista, como fizemos anteriormente. Depois disso criamos uma lista (inicialmente vazia) chamada `wordfreq`, percorremos cada palavra na `wordlist` e contamos o número de vezes que aquela palavra aparece em toda a lista. Então, adicionamos a contagem de cada palavra à nossa lista `wordfreq`. Utilizando a operação `zip`, somos capazes de combinar a primeira palavra da lista de palavras com o primeiro número na lista de frequências, a segunda palavra e a segunda frequência e assim por diante. Terminamos com uma lista de pares de palavras e frequências. A função `str` converte qualquer objeto numa string para que ele possa ser exibido. - -Deve obter algo assim: - -``` python -String -foi o melhor dos tempos foi o pior dos tempos foi a idade da sabedoria foi a idade da ignorância - -Lista -['foi', 'o', 'melhor', 'dos', 'tempos', 'foi', 'o', 'pior', 'dos', 'tempos', 'foi', 'a', 'idade', 'da', 'sabedoria', 'foi', 'a', 'idade', 'da', 'ignorância'] - -Frequências -[4, 2, 1, 2, 2, 4, 2, 1, 2, 2, 4, 2, 2, 2, 1, 4, 2, 2, 2, 1] - -Pares -[('foi', 4), ('o', 2), ('melhor', 1), ('dos', 2), ('tempos', 2), ('foi', 4), ('o', 2), ('pior', 1), ('dos', 2), ('tempos', 2), ('foi', 4), ('a', 2), ('idade', 2), ('da', 2), ('sabedoria', 1), ('foi', 4), ('a', 2), ('idade', 2), ('da', 2), ('ignorância', 1)] -``` - -Valerá a pena estudar o código acima até entendê-lo antes de continuar. - -O Python também inclui uma ferramenta muito conveniente chamada *[list comprehension][]* (ver uma explicação do método de [compreensão de lista](https://pt.wikipedia.org/wiki/Compreens%C3%A3o_de_lista) em português), que pode ser utilizada para fazer o mesmo que um `for` *loop* de maneira mais económica. - -``` python -# count-list-items-1.py - -wordstring = 'foi o melhor dos tempos foi o pior dos tempos ' -wordstring += 'foi a idade da sabedoria foi a idade da ignorância' -wordlist = wordstring.split() - -wordfreq = [wordlist.count(w) for w in wordlist] # uma list comprehension - -print("String\n" + wordstring +"\n") -print("Lista\n" + str(wordlist) + "\n") -print("Frequências\n" + str(wordfreq) + "\n") -print("Pares\n" + str(list(zip(wordlist, wordfreq)))) -``` - -Se estudar esse método de compreensão de lista cuidadosamente, descobrirá que ele faz exatamente o mesmo que o `for` *loop* no exemplo anterior, mas de maneira condensada. Qualquer um dos métodos funcionará bem, então use a versão com a qual se sente mais confortável. - -Em geral é prudente utilizar um código que entenda ao invés de um código que seja executado mais rapidamente. - -Neste ponto, temos uma lista de pares, onde cada par contém uma palavra e sua frequência. Essa lista é um pouco redundante. Se 'the' ocorre 500 vezes, então essa lista contém quinhentas cópias do par ('the', 500). Essa lista também está ordenada pelas palavras no texto original, ao invés de listar as palavras na ordem da mais frequente para a menos frequente. Podemos resolver esses problemas convertendo-a em um dicionário, e depois exibindo o dicionário na ordem do item mais comum para o menos comum. - -## Dicionários de Python - -Tanto strings quanto listas são ordenadas sequencialmente, o que significa que pode acessar seus conteúdos utilizando um índice (*index*), um número que começa no 0. Caso tenha uma lista contendo strings, pode utilizar um par de índices para acessar uma string particular na lista, e depois um caractere particular naquela string. Estude os exemplos abaixo: - - -``` python - -s = 'olá mundo' -print(s[0]) --> o - -print(s[1]) --> l - -m = ['olá', 'mundo'] -print(m[0]) --> olá - -print(m[1]) --> mundo - -print(m[0][1]) --> l - -print(m[1][0]) --> m -``` - -Para manter controle sobre as frequências, utilizaremos outro tipo de objeto Python: um dicionário. O dicionário é uma coleção não ordenada de objetos. Isso significa que não pode utilizar índices para recuperar seus elementos. Pode, por outro lado, buscá-los utilizando uma chave, ou *key* no inglês (daí o nome "dicionário"). Estude o exemplo a seguir: - - -``` python - -d = {'mundo': 1, 'olá': 0} -print(d['olá']) --> 0 - -print(d['mundo']) --> 1 - -print(d.keys()) --> dict_keys(['mundo', 'olá']) -``` - -Dicionários podem ser um pouco confusos para um novo programador. Tente pensar neles como um dicionário de idiomas. Caso não saiba (ou não se lembre) como exatamente "*bijection*" difere de "*surjection*", pode buscar pelos dois termos no *Oxford English Dictionary*. O mesmo princípio se aplica quando realiza um `print(d['olá'])` exceto pelo fato de que, ao invés de exibir uma definição literária, ele exibe o valor associado à palavra-chave 'olá', conforme definido por você quando criou o dicionário chamado `d`. Nesse caso, esse valor é "0". - -Observe que usa chaves para definir um dicionário, mas colchetes para acessar coisas dentro dele. A operação `keys` retorna uma lista de chaves que estão definidas no dicionário. - -## Pares Palavra-Frequência - -Com base no que temos até agora, queremos uma função que seja capaz de converter uma lista de palavras em um dicionário de pares palavra-frequência. O único comando novo que vamos precisar é `dict`, que faz um dicionário a partir de uma lista de pares. Copie o código a seguir e adicione-o ao módulo `obo.py`: - -``` python -# Dada uma lista de palavras, retorna um dicionário de pares palavra-frequência. - -def wordListToFreqDict(wordlist): - wordfreq = [wordlist.count(p) for p in wordlist] - return dict(list(zip(wordlist,wordfreq))) -``` - -Também vamos querer uma função que seja capaz de ordenar o dicionário de pares palavra-frequência por frequência decrescente. Copie o código a seguir e adicione-o também ao módulo `obo.py`: - - -``` python -# Ordena um dicionário de pares palavra-frequência em ordem decrescente de frequência. - -def sortFreqDict(freqdict): - aux = [(freqdict[key], key) for key in freqdict] - aux.sort() - aux.reverse() - return aux -``` - -Agora podemos escrever um programa que recebe uma URL e retorna pares palavra-frequência para a página web, de acordo com a ordem decrescente de frequência. Copie o programa a seguir no Komodo Edit, armazene-o como `html-to-freq.py` e execute-o. Estude o programa e seu resultado cuidadosamente antes de continuar. - - -``` python -#html-to-freq.py - -import urllib.request, urllib.error, urllib.parse, obo - -url = 'http://www.oldbaileyonline.org/browse.jsp?id=t17800628-33&div=t17800628-33' - -response = urllib.request.urlopen(url) -html = response.read().decode('UTF-8') -text = obo.stripTags(html).lower() -wordlist = obo.stripNonAlphaNum(text) -dictionary = obo.wordListToFreqDict(wordlist) -sorteddict = obo.sortFreqDict(dictionary) - -for s in sorteddict: print(str(s)) -``` - -## Removendo *Stop Words* - -Quando vemos o resultado do nosso programa `html-to-freq.py`, verificamos que muitas das palavras mais frequentes no texto são palavras funcionais como *the*, *of*, *to* e *and*. - -``` python -(192, 'the') -(105, 'i') -(74, 'to') -(71, 'was') -(67, 'of') -(62, 'in') -(53, 'a') -(52, 'and') -(50, 'you') -(50, 'he') -(40, 'that') -(39, 'his') -(36, 'it') -``` - -Essas palavras são geralmente as mais comuns em qualquer texto de língua inglesa, então elas não nos dizem muito a respeito do julgamento de Bowsey. Em geral, estamos mais interessados em encontrar as palavras que nos auxiliarão a diferenciar esse texto de outros textos sobre assuntos distintos. Desse modo, vamos remover as palavras funcionais comuns. Palavras que são ignoradas dessa forma são conhecidas como _stopwords_[^1]. Utilizaremos a lista a seguir, adaptada de uma publicação *online* por [cientistas da computação em Glasgow][]. Copie-a e adicione-a no início da biblioteca `obo.py` que está construindo. - -``` python -stopwords = ['a', 'about', 'above', 'across', 'after', 'afterwards'] -stopwords += ['again', 'against', 'all', 'almost', 'alone', 'along'] -stopwords += ['already', 'also', 'although', 'always', 'am', 'among'] -stopwords += ['amongst', 'amoungst', 'amount', 'an', 'and', 'another'] -stopwords += ['any', 'anyhow', 'anyone', 'anything', 'anyway', 'anywhere'] -stopwords += ['are', 'around', 'as', 'at', 'back', 'be', 'became'] -stopwords += ['because', 'become', 'becomes', 'becoming', 'been'] -stopwords += ['before', 'beforehand', 'behind', 'being', 'below'] -stopwords += ['beside', 'besides', 'between', 'beyond', 'bill', 'both'] -stopwords += ['bottom', 'but', 'by', 'call', 'can', 'cannot', 'cant'] -stopwords += ['co', 'computer', 'con', 'could', 'couldnt', 'cry', 'de'] -stopwords += ['describe', 'detail', 'did', 'do', 'done', 'down', 'due'] -stopwords += ['during', 'each', 'eg', 'eight', 'either', 'eleven', 'else'] -stopwords += ['elsewhere', 'empty', 'enough', 'etc', 'even', 'ever'] -stopwords += ['every', 'everyone', 'everything', 'everywhere', 'except'] -stopwords += ['few', 'fifteen', 'fifty', 'fill', 'find', 'fire', 'first'] -stopwords += ['five', 'for', 'former', 'formerly', 'forty', 'found'] -stopwords += ['four', 'from', 'front', 'full', 'further', 'get', 'give'] -stopwords += ['go', 'had', 'has', 'hasnt', 'have', 'he', 'hence', 'her'] -stopwords += ['here', 'hereafter', 'hereby', 'herein', 'hereupon', 'hers'] -stopwords += ['herself', 'him', 'himself', 'his', 'how', 'however'] -stopwords += ['hundred', 'i', 'ie', 'if', 'in', 'inc', 'indeed'] -stopwords += ['interest', 'into', 'is', 'it', 'its', 'itself', 'keep'] -stopwords += ['last', 'latter', 'latterly', 'least', 'less', 'ltd', 'made'] -stopwords += ['many', 'may', 'me', 'meanwhile', 'might', 'mill', 'mine'] -stopwords += ['more', 'moreover', 'most', 'mostly', 'move', 'much'] -stopwords += ['must', 'my', 'myself', 'name', 'namely', 'neither', 'never'] -stopwords += ['nevertheless', 'next', 'nine', 'no', 'nobody', 'none'] -stopwords += ['noone', 'nor', 'not', 'nothing', 'now', 'nowhere', 'of'] -stopwords += ['off', 'often', 'on','once', 'one', 'only', 'onto', 'or'] -stopwords += ['other', 'others', 'otherwise', 'our', 'ours', 'ourselves'] -stopwords += ['out', 'over', 'own', 'part', 'per', 'perhaps', 'please'] -stopwords += ['put', 'rather', 're', 's', 'same', 'see', 'seem', 'seemed'] -stopwords += ['seeming', 'seems', 'serious', 'several', 'she', 'should'] -stopwords += ['show', 'side', 'since', 'sincere', 'six', 'sixty', 'so'] -stopwords += ['some', 'somehow', 'someone', 'something', 'sometime'] -stopwords += ['sometimes', 'somewhere', 'still', 'such', 'system', 'take'] -stopwords += ['ten', 'than', 'that', 'the', 'their', 'them', 'themselves'] -stopwords += ['then', 'thence', 'there', 'thereafter', 'thereby'] -stopwords += ['therefore', 'therein', 'thereupon', 'these', 'they'] -stopwords += ['thick', 'thin', 'third', 'this', 'those', 'though', 'three'] -stopwords += ['three', 'through', 'throughout', 'thru', 'thus', 'to'] -stopwords += ['together', 'too', 'top', 'toward', 'towards', 'twelve'] -stopwords += ['twenty', 'two', 'un', 'under', 'until', 'up', 'upon'] -stopwords += ['us', 'very', 'via', 'was', 'we', 'well', 'were', 'what'] -stopwords += ['whatever', 'when', 'whence', 'whenever', 'where'] -stopwords += ['whereafter', 'whereas', 'whereby', 'wherein', 'whereupon'] -stopwords += ['wherever', 'whether', 'which', 'while', 'whither', 'who'] -stopwords += ['whoever', 'whole', 'whom', 'whose', 'why', 'will', 'with'] -stopwords += ['within', 'without', 'would', 'yet', 'you', 'your'] -stopwords += ['yours', 'yourself', 'yourselves'] -``` - -Agora, livrar-se das *stop words* em uma lista é fácil: basta usar outra *list comprehension*. Adicione também essa função ao módulo `obo.py`: - -``` python -# Dada uma lista de palavras, remove qualquer uma que esteja em uma lista de stop words - -def removeStopwords(wordlist, stopwords): - return [w for w in wordlist if w not in stopwords] -``` - -## Juntando Tudo - -Agora temos tudo o que precisamos para determinar frequências de palavras para páginas web. Copie o código a seguir no Komodo Edit, armazene-o como `html-to-freq-2.py` e execute-o: - - -``` python -# html-to-freq-2.py - -import urllib.request, urllib.error, urllib.parse -import obo - -url = 'http://www.oldbaileyonline.org/browse.jsp?id=t17800628-33&div=t17800628-33' - -response = urllib.request.urlopen(url) -html = response.read().decode('UTF-8') -text = obo.stripTags(html).lower() -fullwordlist = obo.stripNonAlphaNum(text) -wordlist = obo.removeStopwords(fullwordlist, obo.stopwords) -dictionary = obo.wordListToFreqDict(wordlist) -sorteddict = obo.sortFreqDict(dictionary) - -for s in sorteddict: print(str(s)) -``` - -Se tudo correu bem, sua saída deve-se parecer com isto: - -``` python -(25, 'house') -(20, 'yes') -(20, 'prisoner') -(19, 'mr') -(17, 'man') -(15, 'akerman') -(14, 'mob') -(13, 'black') -(12, 'night') -(11, 'saw') -(9, 'went') -(9, 'sworn') -(9, 'room') -(9, 'pair') -(9, 'know') -(9, 'face') -(8, 'time') -(8, 'thing') -(8, 'june') -(8, 'believe') -... -``` - -## Leituras Sugeridas - -Lutz, Learning Python - -- Ch. 9: Tuples, Files, and Everything Else -- Ch. 11: Assignment, Expressions, and print -- Ch. 12: if Tests -- Ch. 13: while and for Loops - -Pilgrim, Diving into Python - -- Ch. 7: [Regular Expressions][] - -## Sincronização de Código - -Para acompanhar lições futuras, é importante ter os ficheiros e programas corretos no seu diretório “programming-historian”. No final de cada lição, é possível fazer o *download* do ficheiro zip “programming-historian” para garantir que possui o código correto. - -- programming-historian-5 ([zip sync][]) - - [list comprehension]: http://docs.python.org/tutorial/datastructures.html#list-comprehensions - [cientistas da computação em Glasgow]: http://ir.dcs.gla.ac.uk/resources/linguistic_utils/stop_words - [Regular Expressions]: https://web.archive.org/web/20180416143856/http://www.diveintopython.net/regular_expressions/index.html - [zip]: https://programminghistorian.org/assets/python-lessons4.zip - [zip sync]: https://programminghistorian.org/assets/python-lessons5.zip - [^1]: Na língua portuguesa, palavras similares seriam "e", "de", "da", "do", "um", "uma", dentre outras, a depender de cada caso. +--- +title: Contagem de Frequências de Palavras com Python +layout: lesson +slug: contar-frequencias-palavras-python +date: 2012-07-17 +translation_date: 2022-01-13 +authors: +- William J. Turkel +- Adam Crymble +reviewers: +- Jim Clifford +- Frederik Elwert +editors: +- Miriam Posner +translator: +- Felipe Lamarca +translation-editor: +- Jimmy Medeiros +translation-reviewer: +- Ana Carolina Erthal +- Joana Vieira Paulino +difficulty: 2 +review-ticket: https://github.com/programminghistorian/ph-submissions/issues/461 +activity: analyzing +topics: [python] +abstract: "Contar a frequência de palavras específicas de uma lista pode fornecer dados esclarecedores. Esta lição ensinará uma maneira fácil de contar essas frequências com Python." +original: counting-frequencies +avatar_alt: Homem descontente sentado em um tronco cercado por pássaros +doi: 10.46430/phpt0023 +--- + +{% include toc.html %} + +## Objetivos da Lição + +Sua lista agora está limpa o suficiente para que possa começar a analisar seu conteúdo de maneiras significativas. Contar a frequência de palavras específicas de uma lista pode fornecer dados esclarecedores. Python possui uma maneira fácil de contar frequências, mas requer o uso de um novo tipo de variável: o *dicionário*. Antes de começar a trabalhar com um dicionário, considere os processos utilizados para calcular frequências em uma lista. + +### Ficheiros Necessários para esta Lição + +- `obo.py` + +Caso não possua esse ficheiro, pode fazer o *download* do ficheiro ([zip][]) que contém todo o código das lições anteriores desta série. + +## Frequências + +Agora desejamos contar a frequência de cada palavra em nossa lista. Já viu que é fácil de processar uma lista utilizando um `for` *loop*. Tente salvar e executar o exemplo a seguir. Lembre-se de que `+=` informa ao programa para acrescentar algo ao final de uma variável existente. + +``` python +# count-list-items-1.py + +wordstring = 'foi o melhor dos tempos foi o pior dos tempos ' +wordstring += 'foi a idade da sabedoria foi a idade da ignorância' +wordlist = wordstring.split() + +wordfreq = [] +for w in wordlist: + wordfreq.append(wordlist.count(w)) + +print("String\n" + wordstring +"\n") +print("Lista\n" + str(wordlist) + "\n") +print("Frequências\n" + str(wordfreq) + "\n") +print("Pares\n" + str(list(zip(wordlist, wordfreq)))) +``` + +Aqui, começamos com uma string e separamo-la em uma lista, como fizemos anteriormente. Depois disso criamos uma lista (inicialmente vazia) chamada `wordfreq`, percorremos cada palavra na `wordlist` e contamos o número de vezes que aquela palavra aparece em toda a lista. Então, adicionamos a contagem de cada palavra à nossa lista `wordfreq`. Utilizando a operação `zip`, somos capazes de combinar a primeira palavra da lista de palavras com o primeiro número na lista de frequências, a segunda palavra e a segunda frequência e assim por diante. Terminamos com uma lista de pares de palavras e frequências. A função `str` converte qualquer objeto numa string para que ele possa ser exibido. + +Deve obter algo assim: + +``` python +String +foi o melhor dos tempos foi o pior dos tempos foi a idade da sabedoria foi a idade da ignorância + +Lista +['foi', 'o', 'melhor', 'dos', 'tempos', 'foi', 'o', 'pior', 'dos', 'tempos', 'foi', 'a', 'idade', 'da', 'sabedoria', 'foi', 'a', 'idade', 'da', 'ignorância'] + +Frequências +[4, 2, 1, 2, 2, 4, 2, 1, 2, 2, 4, 2, 2, 2, 1, 4, 2, 2, 2, 1] + +Pares +[('foi', 4), ('o', 2), ('melhor', 1), ('dos', 2), ('tempos', 2), ('foi', 4), ('o', 2), ('pior', 1), ('dos', 2), ('tempos', 2), ('foi', 4), ('a', 2), ('idade', 2), ('da', 2), ('sabedoria', 1), ('foi', 4), ('a', 2), ('idade', 2), ('da', 2), ('ignorância', 1)] +``` + +Valerá a pena estudar o código acima até entendê-lo antes de continuar. + +O Python também inclui uma ferramenta muito conveniente chamada *[list comprehension][]* (ver uma explicação do método de [compreensão de lista](https://pt.wikipedia.org/wiki/Compreens%C3%A3o_de_lista) em português), que pode ser utilizada para fazer o mesmo que um `for` *loop* de maneira mais económica. + +``` python +# count-list-items-1.py + +wordstring = 'foi o melhor dos tempos foi o pior dos tempos ' +wordstring += 'foi a idade da sabedoria foi a idade da ignorância' +wordlist = wordstring.split() + +wordfreq = [wordlist.count(w) for w in wordlist] # uma list comprehension + +print("String\n" + wordstring +"\n") +print("Lista\n" + str(wordlist) + "\n") +print("Frequências\n" + str(wordfreq) + "\n") +print("Pares\n" + str(list(zip(wordlist, wordfreq)))) +``` + +Se estudar esse método de compreensão de lista cuidadosamente, descobrirá que ele faz exatamente o mesmo que o `for` *loop* no exemplo anterior, mas de maneira condensada. Qualquer um dos métodos funcionará bem, então use a versão com a qual se sente mais confortável. + +Em geral é prudente utilizar um código que entenda ao invés de um código que seja executado mais rapidamente. + +Neste ponto, temos uma lista de pares, onde cada par contém uma palavra e sua frequência. Essa lista é um pouco redundante. Se 'the' ocorre 500 vezes, então essa lista contém quinhentas cópias do par ('the', 500). Essa lista também está ordenada pelas palavras no texto original, ao invés de listar as palavras na ordem da mais frequente para a menos frequente. Podemos resolver esses problemas convertendo-a em um dicionário, e depois exibindo o dicionário na ordem do item mais comum para o menos comum. + +## Dicionários de Python + +Tanto strings quanto listas são ordenadas sequencialmente, o que significa que pode acessar seus conteúdos utilizando um índice (*index*), um número que começa no 0. Caso tenha uma lista contendo strings, pode utilizar um par de índices para acessar uma string particular na lista, e depois um caractere particular naquela string. Estude os exemplos abaixo: + + +``` python + +s = 'olá mundo' +print(s[0]) +-> o + +print(s[1]) +-> l + +m = ['olá', 'mundo'] +print(m[0]) +-> olá + +print(m[1]) +-> mundo + +print(m[0][1]) +-> l + +print(m[1][0]) +-> m +``` + +Para manter controle sobre as frequências, utilizaremos outro tipo de objeto Python: um dicionário. O dicionário é uma coleção não ordenada de objetos. Isso significa que não pode utilizar índices para recuperar seus elementos. Pode, por outro lado, buscá-los utilizando uma chave, ou *key* no inglês (daí o nome "dicionário"). Estude o exemplo a seguir: + + +``` python + +d = {'mundo': 1, 'olá': 0} +print(d['olá']) +-> 0 + +print(d['mundo']) +-> 1 + +print(d.keys()) +-> dict_keys(['mundo', 'olá']) +``` + +Dicionários podem ser um pouco confusos para um novo programador. Tente pensar neles como um dicionário de idiomas. Caso não saiba (ou não se lembre) como exatamente "*bijection*" difere de "*surjection*", pode buscar pelos dois termos no *Oxford English Dictionary*. O mesmo princípio se aplica quando realiza um `print(d['olá'])` exceto pelo fato de que, ao invés de exibir uma definição literária, ele exibe o valor associado à palavra-chave 'olá', conforme definido por você quando criou o dicionário chamado `d`. Nesse caso, esse valor é "0". + +Observe que usa chaves para definir um dicionário, mas colchetes para acessar coisas dentro dele. A operação `keys` retorna uma lista de chaves que estão definidas no dicionário. + +## Pares Palavra-Frequência + +Com base no que temos até agora, queremos uma função que seja capaz de converter uma lista de palavras em um dicionário de pares palavra-frequência. O único comando novo que vamos precisar é `dict`, que faz um dicionário a partir de uma lista de pares. Copie o código a seguir e adicione-o ao módulo `obo.py`: + +``` python +# Dada uma lista de palavras, retorna um dicionário de pares palavra-frequência. + +def wordListToFreqDict(wordlist): + wordfreq = [wordlist.count(p) for p in wordlist] + return dict(list(zip(wordlist,wordfreq))) +``` + +Também vamos querer uma função que seja capaz de ordenar o dicionário de pares palavra-frequência por frequência decrescente. Copie o código a seguir e adicione-o também ao módulo `obo.py`: + + +``` python +# Ordena um dicionário de pares palavra-frequência em ordem decrescente de frequência. + +def sortFreqDict(freqdict): + aux = [(freqdict[key], key) for key in freqdict] + aux.sort() + aux.reverse() + return aux +``` + +Agora podemos escrever um programa que recebe uma URL e retorna pares palavra-frequência para a página web, de acordo com a ordem decrescente de frequência. Copie o programa a seguir no Komodo Edit, armazene-o como `html-to-freq.py` e execute-o. Estude o programa e seu resultado cuidadosamente antes de continuar. + + +``` python +#html-to-freq.py + +import urllib.request, urllib.error, urllib.parse, obo + +url = 'http://www.oldbaileyonline.org/browse.jsp?id=t17800628-33&div=t17800628-33' + +response = urllib.request.urlopen(url) +html = response.read().decode('UTF-8') +text = obo.stripTags(html).lower() +wordlist = obo.stripNonAlphaNum(text) +dictionary = obo.wordListToFreqDict(wordlist) +sorteddict = obo.sortFreqDict(dictionary) + +for s in sorteddict: print(str(s)) +``` + +## Removendo *Stop Words* + +Quando vemos o resultado do nosso programa `html-to-freq.py`, verificamos que muitas das palavras mais frequentes no texto são palavras funcionais como *the*, *of*, *to* e *and*. + +``` python +(192, 'the') +(105, 'i') +(74, 'to') +(71, 'was') +(67, 'of') +(62, 'in') +(53, 'a') +(52, 'and') +(50, 'you') +(50, 'he') +(40, 'that') +(39, 'his') +(36, 'it') +``` + +Essas palavras são geralmente as mais comuns em qualquer texto de língua inglesa, então elas não nos dizem muito a respeito do julgamento de Bowsey. Em geral, estamos mais interessados em encontrar as palavras que nos auxiliarão a diferenciar esse texto de outros textos sobre assuntos distintos. Desse modo, vamos remover as palavras funcionais comuns. Palavras que são ignoradas dessa forma são conhecidas como _stopwords_[^1]. Utilizaremos a lista a seguir, adaptada de uma publicação *online* por [cientistas da computação em Glasgow][]. Copie-a e adicione-a no início da biblioteca `obo.py` que está construindo. + +``` python +stopwords = ['a', 'about', 'above', 'across', 'after', 'afterwards'] +stopwords += ['again', 'against', 'all', 'almost', 'alone', 'along'] +stopwords += ['already', 'also', 'although', 'always', 'am', 'among'] +stopwords += ['amongst', 'amoungst', 'amount', 'an', 'and', 'another'] +stopwords += ['any', 'anyhow', 'anyone', 'anything', 'anyway', 'anywhere'] +stopwords += ['are', 'around', 'as', 'at', 'back', 'be', 'became'] +stopwords += ['because', 'become', 'becomes', 'becoming', 'been'] +stopwords += ['before', 'beforehand', 'behind', 'being', 'below'] +stopwords += ['beside', 'besides', 'between', 'beyond', 'bill', 'both'] +stopwords += ['bottom', 'but', 'by', 'call', 'can', 'cannot', 'cant'] +stopwords += ['co', 'computer', 'con', 'could', 'couldnt', 'cry', 'de'] +stopwords += ['describe', 'detail', 'did', 'do', 'done', 'down', 'due'] +stopwords += ['during', 'each', 'eg', 'eight', 'either', 'eleven', 'else'] +stopwords += ['elsewhere', 'empty', 'enough', 'etc', 'even', 'ever'] +stopwords += ['every', 'everyone', 'everything', 'everywhere', 'except'] +stopwords += ['few', 'fifteen', 'fifty', 'fill', 'find', 'fire', 'first'] +stopwords += ['five', 'for', 'former', 'formerly', 'forty', 'found'] +stopwords += ['four', 'from', 'front', 'full', 'further', 'get', 'give'] +stopwords += ['go', 'had', 'has', 'hasnt', 'have', 'he', 'hence', 'her'] +stopwords += ['here', 'hereafter', 'hereby', 'herein', 'hereupon', 'hers'] +stopwords += ['herself', 'him', 'himself', 'his', 'how', 'however'] +stopwords += ['hundred', 'i', 'ie', 'if', 'in', 'inc', 'indeed'] +stopwords += ['interest', 'into', 'is', 'it', 'its', 'itself', 'keep'] +stopwords += ['last', 'latter', 'latterly', 'least', 'less', 'ltd', 'made'] +stopwords += ['many', 'may', 'me', 'meanwhile', 'might', 'mill', 'mine'] +stopwords += ['more', 'moreover', 'most', 'mostly', 'move', 'much'] +stopwords += ['must', 'my', 'myself', 'name', 'namely', 'neither', 'never'] +stopwords += ['nevertheless', 'next', 'nine', 'no', 'nobody', 'none'] +stopwords += ['noone', 'nor', 'not', 'nothing', 'now', 'nowhere', 'of'] +stopwords += ['off', 'often', 'on','once', 'one', 'only', 'onto', 'or'] +stopwords += ['other', 'others', 'otherwise', 'our', 'ours', 'ourselves'] +stopwords += ['out', 'over', 'own', 'part', 'per', 'perhaps', 'please'] +stopwords += ['put', 'rather', 're', 's', 'same', 'see', 'seem', 'seemed'] +stopwords += ['seeming', 'seems', 'serious', 'several', 'she', 'should'] +stopwords += ['show', 'side', 'since', 'sincere', 'six', 'sixty', 'so'] +stopwords += ['some', 'somehow', 'someone', 'something', 'sometime'] +stopwords += ['sometimes', 'somewhere', 'still', 'such', 'system', 'take'] +stopwords += ['ten', 'than', 'that', 'the', 'their', 'them', 'themselves'] +stopwords += ['then', 'thence', 'there', 'thereafter', 'thereby'] +stopwords += ['therefore', 'therein', 'thereupon', 'these', 'they'] +stopwords += ['thick', 'thin', 'third', 'this', 'those', 'though', 'three'] +stopwords += ['three', 'through', 'throughout', 'thru', 'thus', 'to'] +stopwords += ['together', 'too', 'top', 'toward', 'towards', 'twelve'] +stopwords += ['twenty', 'two', 'un', 'under', 'until', 'up', 'upon'] +stopwords += ['us', 'very', 'via', 'was', 'we', 'well', 'were', 'what'] +stopwords += ['whatever', 'when', 'whence', 'whenever', 'where'] +stopwords += ['whereafter', 'whereas', 'whereby', 'wherein', 'whereupon'] +stopwords += ['wherever', 'whether', 'which', 'while', 'whither', 'who'] +stopwords += ['whoever', 'whole', 'whom', 'whose', 'why', 'will', 'with'] +stopwords += ['within', 'without', 'would', 'yet', 'you', 'your'] +stopwords += ['yours', 'yourself', 'yourselves'] +``` + +Agora, livrar-se das *stop words* em uma lista é fácil: basta usar outra *list comprehension*. Adicione também essa função ao módulo `obo.py`: + +``` python +# Dada uma lista de palavras, remove qualquer uma que esteja em uma lista de stop words + +def removeStopwords(wordlist, stopwords): + return [w for w in wordlist if w not in stopwords] +``` + +## Juntando Tudo + +Agora temos tudo o que precisamos para determinar frequências de palavras para páginas web. Copie o código a seguir no Komodo Edit, armazene-o como `html-to-freq-2.py` e execute-o: + + +``` python +# html-to-freq-2.py + +import urllib.request, urllib.error, urllib.parse +import obo + +url = 'http://www.oldbaileyonline.org/browse.jsp?id=t17800628-33&div=t17800628-33' + +response = urllib.request.urlopen(url) +html = response.read().decode('UTF-8') +text = obo.stripTags(html).lower() +fullwordlist = obo.stripNonAlphaNum(text) +wordlist = obo.removeStopwords(fullwordlist, obo.stopwords) +dictionary = obo.wordListToFreqDict(wordlist) +sorteddict = obo.sortFreqDict(dictionary) + +for s in sorteddict: print(str(s)) +``` + +Se tudo correu bem, sua saída deve-se parecer com isto: + +``` python +(25, 'house') +(20, 'yes') +(20, 'prisoner') +(19, 'mr') +(17, 'man') +(15, 'akerman') +(14, 'mob') +(13, 'black') +(12, 'night') +(11, 'saw') +(9, 'went') +(9, 'sworn') +(9, 'room') +(9, 'pair') +(9, 'know') +(9, 'face') +(8, 'time') +(8, 'thing') +(8, 'june') +(8, 'believe') +... +``` + +## Leituras Sugeridas + +Lutz, Learning Python + +- Ch. 9: Tuples, Files, and Everything Else +- Ch. 11: Assignment, Expressions, and print +- Ch. 12: if Tests +- Ch. 13: while and for Loops + +Pilgrim, Diving into Python + +- Ch. 7: [Regular Expressions][] + +## Sincronização de Código + +Para acompanhar lições futuras, é importante ter os ficheiros e programas corretos no seu diretório “programming-historian”. No final de cada lição, é possível fazer o *download* do ficheiro zip “programming-historian” para garantir que possui o código correto. + +- programming-historian-5 ([zip sync][]) + + [list comprehension]: https://docs.python.org/tutorial/datastructures.html#list-comprehensions + [cientistas da computação em Glasgow]: https://ir.dcs.gla.ac.uk/resources/linguistic_utils/stop_words + [Regular Expressions]: https://web.archive.org/web/20180416143856/https://www.diveintopython.net/regular_expressions/index.html + [zip]: https://programminghistorian.org/assets/python-lessons4.zip + [zip sync]: https://programminghistorian.org/assets/python-lessons5.zip + [^1]: Na língua portuguesa, palavras similares seriam "e", "de", "da", "do", "um", "uma", dentre outras, a depender de cada caso. diff --git a/pt/licoes/criacao-visualizacao-ficheiros-html-python.md b/pt/licoes/criacao-visualizacao-ficheiros-html-python.md index 37e40a4839..533a14c56b 100644 --- a/pt/licoes/criacao-visualizacao-ficheiros-html-python.md +++ b/pt/licoes/criacao-visualizacao-ficheiros-html-python.md @@ -1,146 +1,146 @@ ---- -title: Criação e Visualização de Ficheiros HTML com Python -layout: lesson -slug: criacao-visualizacao-ficheiros-html-python -date: 2012-07-17 -translation_date: 2022-10-31 -authors: -- William J. Turkel -- Adam Crymble -reviewers: -- Jim Clifford -editors: -- Miriam Posner -translator: -- Felipe Lamarca -translation-editor: -- Jimmy Medeiros -translation-reviewer: -- Gabriela Kucuruza -- Ana Carolina Erthal -difficulty: 2 -review-ticket: https://github.com/programminghistorian/ph-submissions/issues/462 -activity: presenting -topics: [python, website] -abstract: "Com esta lição aprenderá a criar ficheiros HTML com scripts Python e a usar o Python para abrir automaticamente um ficheiro HTML no Firefox." -original: creating-and-viewing-html-files-with-python -avatar_alt: Criança desenhando numa tábua -doi: 10.46430/phpt0030 ---- - -{% include toc.html %} - -## Objetivos da Lição - -Esta lição usa o Python para criar e visualizar um ficheiro HTML. Se escrever programas que produzem HTML, pode utilizar qualquer navegador para ver os seus resultados. Isso é especialmente conveniente se o seu programa cria automaticamente hiperlinks ou entidades gráficas, como gráficos e diagramas. - -Aqui irá aprender como criar ficheiros HTML com scripts Python e como utilizar o Python para abrir um ficheiro HTML automaticamente no Firefox. - -## Ficheiros Necessários para esta Lição - -- `obo.py` - -Caso não possua esses ficheiros da lição anterior, pode fazer o *download* do programming-historian-5, um [ficheiro zip da lição anterior](/assets/python-lessons5.zip). - -## Criando HTML com Python - -Até aqui, aprendemos como usar o Python para fazer o *download* de fontes *online* e extrair informação delas de forma automática. Lembre-se de que o nosso objetivo final é incorporar perfeitamente a programação em nossa prática de investigação. Em linha com este objetivo, nesta lição e na próxima aprenderemos como apresentar dados de volta à forma de HTML. Isso possui algumas vantagens. Primeiro, ao armazenar a informação no nosso disco rígido como um ficheiro HTML, podemos abri-lo com o Firefox e usar o [Zotero](https://www.zotero.org/), por exemplo, para indexar e fazer anotações posteriormente. Segundo, há uma ampla gama de opções de visualização para HTML que podemos usar mais tarde. - -Caso ainda não tenha feito o [tutorial de HTML do W3 Schools](http://www.w3schools.com/html/default.asp), reserve alguns minutos para fazê-lo antes de continuar. Criaremos um documento HTML usando Python, então será saber o que é um documento HTML! - -## "Olá mundo" em HTML usando Python - -Uma das ideias mais poderosas na ciência da computação é que um ficheiro que parece conter código sob uma perspectiva pode ser visto como dados sob outra. É possível, em outras palavras, escrever programas que manipulam outros programas. O que faremos a seguir é criar um ficheiro HTML que diz "Olá mundo!" usando Python. Faremos isso armazenando *tags* HTML em uma string multilinha de Python e guardando os conteúdos em um novo ficheiro. Esse ficheiro será armazenado com uma extensão `.html` ao invés de uma extensão `.txt`. - -Tipicamente um ficheiro HTML começa com uma [declaração do tipo de documento](http://www.w3schools.com/tags/tag_doctype.asp). Vimos isso ao escrever um programa HTML "Olá mundo!" em uma lição anterior. Para facilitar a leitura do nosso código, omitiremos o `doctype` neste exemplo. Lembre-se de que uma string multilinha é criada colocando o texto entre três aspas (veja abaixo): - -``` python -# write-html.py - -f = open('helloworld.html','w') - -message = """ - -

    Olá mundo!

    -""" - -f.write(message) -f.close() -``` - -Salve o programa acima como `write-html.py` e execute-o. Use `Ficheiro -> Abrir` (ou `Arquivo -> Abrir`, na versão brasileira) no editor de texto de sua escolha para abrir `helloworld.html` para verificar que seu programa de fato criou o ficheiro. O conteúdo deve se parecer com isto: - -{% include figure.html filename="hello-world-html.png" caption="Fonte HTML gerada pelo programa Python" %} - -Agora vá para o seu navegador Firefox e escolha `Ficheiro -> Nova Guia` (ou `Arquivo -> Nova aba`, na versão brasileira), vá para a guia e escolha `Ficheiro -> Abrir Ficheiro` (ou `Arquivo -> Abrir arquivo`, na versão brasileira). Selecione `helloworld.html`. Deve agora ser capaz de ver a sua mensagem no navegador. Reserve um momento para pensar sobre isso: agora tem a habilidade de escrever um programa que pode criar uma página web automaticamente. Não há razão pela qual não possa escrever um programa para criar automaticamente um *site* inteiro, caso deseje. - -
    - Por questões de versionamento, é possível que o seu navegador Firefox não possua a opção de abrir um ficheiro manualmente na guia. Nesse caso, procure pelo ficheiro HTML no seu diretório, clique nele com o botão direito e selecione a opção de abri-lo com o navegador Firefox. -
    - -## Usando o Python para Controlar o Firefox - -Nós criamos um ficheiro HTML automaticamente, mas depois precisamos deixar o nosso editor, ir para o Firefox e abrir o ficheiro em uma nova guia. Não seria melhor incluir essa etapa final no nosso programa Python? Digite ou copie o código abaixo e armazene-o como `write-html-2.py`. Quando executá-lo, ele deve criar o seu ficheiro HTML e depois abri-lo automaticamente numa nova guia do Firefox. Maravilha! - -### Instruções para Mac - -Usuários de Mac precisarão especificar a localização precisa do ficheiro `.html` nos seus computadores. Para fazer isso, localize a pasta `programming-historian` que criou para fazer esses tutoriais, clique com o botão direito nela e selecione "Obter Informações" (ou "*Get Info*"). - -Pode então recortar e colar a localização do ficheiro listado depois de "Onde:" (ou "*Where:*") e se certificar de incluir uma barra final (/) para que o computador saiba que deseja algo dentro desse diretório (e não o diretório em si). - - -``` python -# write-html-2-mac.py -import webbrowser - -f = open('helloworld.html','w') - -message = """ - -

    Olá mundo!

    -""" - -f.write(message) -f.close() - -#Altere o caminho para refletir a localização do ficheiro -filename = 'file:///Users/username/Desktop/programming-historian/' + 'helloworld.html' -webbrowser.open_new_tab(filename) -``` - -Caso receba um erro "Ficheiro não encontrado" (ou "*File not found*"), significa que não mudou o caminho para o ficheiro corretamente. - -### Instruções para Windows - -``` python -# write-html-2-windows.py - -import webbrowser - -f = open('helloworld.html','w') - -message = """ - -

    Olá mundo!

    -""" - -f.write(message) -f.close() - -webbrowser.open_new_tab('helloworld.html') -``` - -\*\*\* - -No final, não só escreveu um programa Python que pode criar um HTML simples, mas também controlou o seu navegador Firefox utilizando Python. Na próxima lição, focaremos em apresentar os dados que coletamos na forma de um ficheiro HTML. - -## Leituras Sugeridas - -- Lutz, Learning Python - - Re-read and review Chs. 1-17 - -## Sincronização de Código - -Para acompanhar lições futuras, é importante ter os ficheiros e programas corretos no seu diretório “programming-historian”. No final de cada lição, é possível fazer o *download* do ficheiro zip “programming-historian” para garantir que possui o código correto. Caso esteja acompanhando com a versão para Mac / Linux, deve ter que abrir o ficheiro `obo.py` e mudar "file:///Users/username/Desktop/programming-historian/" para o caminho até o diretório no seu próprio computador. - -- [python-lessons6.zip](/assets/python-lessons6.zip) +--- +title: Criação e Visualização de Ficheiros HTML com Python +layout: lesson +slug: criacao-visualizacao-ficheiros-html-python +date: 2012-07-17 +translation_date: 2022-10-31 +authors: +- William J. Turkel +- Adam Crymble +reviewers: +- Jim Clifford +editors: +- Miriam Posner +translator: +- Felipe Lamarca +translation-editor: +- Jimmy Medeiros +translation-reviewer: +- Gabriela Kucuruza +- Ana Carolina Erthal +difficulty: 2 +review-ticket: https://github.com/programminghistorian/ph-submissions/issues/462 +activity: presenting +topics: [python, website] +abstract: "Com esta lição aprenderá a criar ficheiros HTML com scripts Python e a usar o Python para abrir automaticamente um ficheiro HTML no Firefox." +original: creating-and-viewing-html-files-with-python +avatar_alt: Criança desenhando numa tábua +doi: 10.46430/phpt0030 +--- + +{% include toc.html %} + +## Objetivos da Lição + +Esta lição usa o Python para criar e visualizar um ficheiro HTML. Se escrever programas que produzem HTML, pode utilizar qualquer navegador para ver os seus resultados. Isso é especialmente conveniente se o seu programa cria automaticamente hiperlinks ou entidades gráficas, como gráficos e diagramas. + +Aqui irá aprender como criar ficheiros HTML com scripts Python e como utilizar o Python para abrir um ficheiro HTML automaticamente no Firefox. + +## Ficheiros Necessários para esta Lição + +- `obo.py` + +Caso não possua esses ficheiros da lição anterior, pode fazer o *download* do programming-historian-5, um [ficheiro zip da lição anterior](/assets/python-lessons5.zip). + +## Criando HTML com Python + +Até aqui, aprendemos como usar o Python para fazer o *download* de fontes *online* e extrair informação delas de forma automática. Lembre-se de que o nosso objetivo final é incorporar perfeitamente a programação em nossa prática de investigação. Em linha com este objetivo, nesta lição e na próxima aprenderemos como apresentar dados de volta à forma de HTML. Isso possui algumas vantagens. Primeiro, ao armazenar a informação no nosso disco rígido como um ficheiro HTML, podemos abri-lo com o Firefox e usar o [Zotero](https://www.zotero.org/), por exemplo, para indexar e fazer anotações posteriormente. Segundo, há uma ampla gama de opções de visualização para HTML que podemos usar mais tarde. + +Caso ainda não tenha feito o [tutorial de HTML do W3 Schools](https://www.w3schools.com/html/default.asp), reserve alguns minutos para fazê-lo antes de continuar. Criaremos um documento HTML usando Python, então será saber o que é um documento HTML! + +## "Olá mundo" em HTML usando Python + +Uma das ideias mais poderosas na ciência da computação é que um ficheiro que parece conter código sob uma perspectiva pode ser visto como dados sob outra. É possível, em outras palavras, escrever programas que manipulam outros programas. O que faremos a seguir é criar um ficheiro HTML que diz "Olá mundo!" usando Python. Faremos isso armazenando *tags* HTML em uma string multilinha de Python e guardando os conteúdos em um novo ficheiro. Esse ficheiro será armazenado com uma extensão `.html` ao invés de uma extensão `.txt`. + +Tipicamente um ficheiro HTML começa com uma [declaração do tipo de documento](https://www.w3schools.com/tags/tag_doctype.asp). Vimos isso ao escrever um programa HTML "Olá mundo!" em uma lição anterior. Para facilitar a leitura do nosso código, omitiremos o `doctype` neste exemplo. Lembre-se de que uma string multilinha é criada colocando o texto entre três aspas (veja abaixo): + +``` python +# write-html.py + +f = open('helloworld.html','w') + +message = """ + +

    Olá mundo!

    +""" + +f.write(message) +f.close() +``` + +Salve o programa acima como `write-html.py` e execute-o. Use `Ficheiro -> Abrir` (ou `Arquivo -> Abrir`, na versão brasileira) no editor de texto de sua escolha para abrir `helloworld.html` para verificar que seu programa de fato criou o ficheiro. O conteúdo deve se parecer com isto: + +{% include figure.html filename="hello-world-html.png" caption="Fonte HTML gerada pelo programa Python" %} + +Agora vá para o seu navegador Firefox e escolha `Ficheiro -> Nova Guia` (ou `Arquivo -> Nova aba`, na versão brasileira), vá para a guia e escolha `Ficheiro -> Abrir Ficheiro` (ou `Arquivo -> Abrir arquivo`, na versão brasileira). Selecione `helloworld.html`. Deve agora ser capaz de ver a sua mensagem no navegador. Reserve um momento para pensar sobre isso: agora tem a habilidade de escrever um programa que pode criar uma página web automaticamente. Não há razão pela qual não possa escrever um programa para criar automaticamente um *site* inteiro, caso deseje. + +
    + Por questões de versionamento, é possível que o seu navegador Firefox não possua a opção de abrir um ficheiro manualmente na guia. Nesse caso, procure pelo ficheiro HTML no seu diretório, clique nele com o botão direito e selecione a opção de abri-lo com o navegador Firefox. +
    + +## Usando o Python para Controlar o Firefox + +Nós criamos um ficheiro HTML automaticamente, mas depois precisamos deixar o nosso editor, ir para o Firefox e abrir o ficheiro em uma nova guia. Não seria melhor incluir essa etapa final no nosso programa Python? Digite ou copie o código abaixo e armazene-o como `write-html-2.py`. Quando executá-lo, ele deve criar o seu ficheiro HTML e depois abri-lo automaticamente numa nova guia do Firefox. Maravilha! + +### Instruções para Mac + +Usuários de Mac precisarão especificar a localização precisa do ficheiro `.html` nos seus computadores. Para fazer isso, localize a pasta `programming-historian` que criou para fazer esses tutoriais, clique com o botão direito nela e selecione "Obter Informações" (ou "*Get Info*"). + +Pode então recortar e colar a localização do ficheiro listado depois de "Onde:" (ou "*Where:*") e se certificar de incluir uma barra final (/) para que o computador saiba que deseja algo dentro desse diretório (e não o diretório em si). + + +``` python +# write-html-2-mac.py +import webbrowser + +f = open('helloworld.html','w') + +message = """ + +

    Olá mundo!

    +""" + +f.write(message) +f.close() + +#Altere o caminho para refletir a localização do ficheiro +filename = 'file:///Users/username/Desktop/programming-historian/' + 'helloworld.html' +webbrowser.open_new_tab(filename) +``` + +Caso receba um erro "Ficheiro não encontrado" (ou "*File not found*"), significa que não mudou o caminho para o ficheiro corretamente. + +### Instruções para Windows + +``` python +# write-html-2-windows.py + +import webbrowser + +f = open('helloworld.html','w') + +message = """ + +

    Olá mundo!

    +""" + +f.write(message) +f.close() + +webbrowser.open_new_tab('helloworld.html') +``` + +\*\*\* + +No final, não só escreveu um programa Python que pode criar um HTML simples, mas também controlou o seu navegador Firefox utilizando Python. Na próxima lição, focaremos em apresentar os dados que coletamos na forma de um ficheiro HTML. + +## Leituras Sugeridas + +- Lutz, Learning Python + - Re-read and review Chs. 1-17 + +## Sincronização de Código + +Para acompanhar lições futuras, é importante ter os ficheiros e programas corretos no seu diretório “programming-historian”. No final de cada lição, é possível fazer o *download* do ficheiro zip “programming-historian” para garantir que possui o código correto. Caso esteja acompanhando com a versão para Mac / Linux, deve ter que abrir o ficheiro `obo.py` e mudar "file:///Users/username/Desktop/programming-historian/" para o caminho até o diretório no seu próprio computador. + +- [python-lessons6.zip](/assets/python-lessons6.zip) diff --git a/pt/licoes/download-automatico-wget.md b/pt/licoes/download-automatico-wget.md index c6cabf717d..31a2e9d3d1 100644 --- a/pt/licoes/download-automatico-wget.md +++ b/pt/licoes/download-automatico-wget.md @@ -142,7 +142,7 @@ Neste ponto, a instalação do wget já deve estar concluída satisfatoriamente Se, por alguma razão, não conseguir instalar o pacote de gerenciamento, poderá simplesmente fazer o download do wget em separado. Esta opção é aplicável se utiliza um pacote de gerenciamento diferente (tal como Mac Ports) ou se deseja manter a infraestrutura num padrão mínimo. Siga as mesmas instruções novamente para instalar o xcode e o conjunto de ferramentas de linha de comando (Command Line Tools). -A seguir, faça o download de uma versão não compilada do wget no [website do GNU](http://www.gnu.org/software/wget/) (Eu escolhi fazer o dowload do ficheiro `wget-1.13.tar.gz`, disponível tanto no link [HTTP](http://ftp.gnu.org/gnu/wget/) como na página de downloads do [FTP](ftp://ftp.gnu.org/gnu/wget/), descompacte-o (clicando duas vezes sobre o ficheiro) no seu diretório 'home' (em um Mac, este será o `/User` directory – por exemplo, meu nome de usuário é ianmilligan e aparece próximo ao ícone de uma casa no meu localizador), e depois abra o Terminal. Para este tutorial, a versão do download é o `wget-1.13`. +A seguir, faça o download de uma versão não compilada do wget no [website do GNU](https://www.gnu.org/software/wget/) (Eu escolhi fazer o dowload do ficheiro `wget-1.13.tar.gz`, disponível tanto no link [HTTP](https://ftp.gnu.org/gnu/wget/) como na página de downloads do [FTP](ftp://ftp.gnu.org/gnu/wget/), descompacte-o (clicando duas vezes sobre o ficheiro) no seu diretório 'home' (em um Mac, este será o `/User` directory – por exemplo, meu nome de usuário é ianmilligan e aparece próximo ao ícone de uma casa no meu localizador), e depois abra o Terminal. Para este tutorial, a versão do download é o `wget-1.13`. Primeiramente, é preciso se direcionar para o diretório onde se encontram os ficheiros wget. No terminal, digite: @@ -188,7 +188,7 @@ De agora em diante, os usuários de todas as três plataformas estão em sintoni A documentação completa para wget pode ser encontrada na página [manual GNU wget](https://perma.cc/67JQ-TSB5). -Tome-se um exemplo de conjunto de dados. Digamos que queira fazer o download de todos os artigos hospedados no website [ActiveHistory.ca](https://perma.cc/KK9H-4XKL). Eles estão localizados em [http://activehistory.ca/papers/](https://perma.cc/CL79-ZN93); o que indica que eles estão todos contidos no diretório `/papers/`: por exemplo, o nono artigo publicado no website é o [http://activehistory.ca/papers/historypaper-9/](https://perma.cc/KF6E-8XZM). Pense nesta estrutura da mesma maneira que os diretórios do seu computador: se tiver uma pasta intitulada `/História/`, ela provavelmente conterá vários ficheiros. +Tome-se um exemplo de conjunto de dados. Digamos que queira fazer o download de todos os artigos hospedados no website [ActiveHistory.ca](https://perma.cc/KK9H-4XKL). Eles estão localizados em [https://activehistory.ca/papers/](https://perma.cc/CL79-ZN93); o que indica que eles estão todos contidos no diretório `/papers/`: por exemplo, o nono artigo publicado no website é o [https://activehistory.ca/papers/historypaper-9/](https://perma.cc/KF6E-8XZM). Pense nesta estrutura da mesma maneira que os diretórios do seu computador: se tiver uma pasta intitulada `/História/`, ela provavelmente conterá vários ficheiros. A mesma estrutura é válida para websites, e é utilizada esta lógica para informar ao computador quais ficheiros deseja-se fazer download. @@ -226,7 +226,7 @@ index.html [ <=> ] 65,60K --.-KB/s em 0,04s 2023-08-08 15:58:54 (1,83 MB/s) - ‘index.html’ salvo [67178] ``` -O que fez foi apenas o download da primeira página do [http://activehistory.ca/papers/](https://perma.cc/CL79-ZN93), a página de index dos artigos, para seu novo diretório. Se abri-la, verá o texto principal da página principal (homepage) do ActiveHistory.ca. Então, num piscar de olhos, já fizemos o download de algo rapidamente. +O que fez foi apenas o download da primeira página do [https://activehistory.ca/papers/](https://perma.cc/CL79-ZN93), a página de index dos artigos, para seu novo diretório. Se abri-la, verá o texto principal da página principal (homepage) do ActiveHistory.ca. Então, num piscar de olhos, já fizemos o download de algo rapidamente. No entanto, o objetivo é fazer o download de todos os artigos. Para isto é preciso incluir alguns poucos comandos no wget. @@ -240,7 +240,7 @@ No exemplo anterior, o componente [URL] informa ao programa para onde ele deve i -r -A recuperação recursiva é a parte mais importante do wget. Isto significa que o programa, ao iniciar, segue os links do website e também faz o download dos mesmos. Desta forma, por exemplo, o [http://activehistory.ca/papers/](https://perma.cc/CL79-ZN93) possui um link para o [http://activehistory.ca/papers/historypaper-9/](https://perma.cc/KF6E-8XZM), assim, ele fará o download deste também, ao utilizar a recuperação recursiva. Contudo, ele também seguirá quaisquer outros links: se houver um link para [http://uwo.ca](https://perma.cc/W7LH-SRTQ) em algum local daquela página, ele o seguirá e também fará o download. Por padrão, `-r` direciona o wget a até cinco websites após o primeiro. Isto consiste em seguir links até um limite de cinco cliques após o primeiro website. Desta maneira, funcionará de maneira bastante indiscriminada. Então precisamos de mais comandos: +A recuperação recursiva é a parte mais importante do wget. Isto significa que o programa, ao iniciar, segue os links do website e também faz o download dos mesmos. Desta forma, por exemplo, o [https://activehistory.ca/papers/](https://perma.cc/CL79-ZN93) possui um link para o [https://activehistory.ca/papers/historypaper-9/](https://perma.cc/KF6E-8XZM), assim, ele fará o download deste também, ao utilizar a recuperação recursiva. Contudo, ele também seguirá quaisquer outros links: se houver um link para [https://uwo.ca](https://perma.cc/W7LH-SRTQ) em algum local daquela página, ele o seguirá e também fará o download. Por padrão, `-r` direciona o wget a até cinco websites após o primeiro. Isto consiste em seguir links até um limite de cinco cliques após o primeiro website. Desta maneira, funcionará de maneira bastante indiscriminada. Então precisamos de mais comandos: ``` bash --no-parent @@ -248,7 +248,7 @@ A recuperação recursiva é a parte mais importante do wget. Isto significa que (O travessão duplo indica o texto completo de um comando. Todos os comandos também possuem uma versão abreviada que pode se iniciar com a utilização de `-np`). -Isto é importante. Significa que o wget deve seguir os links, mas não além do último diretório pai. No caso, implica dizer que ele não avançará a lugar nenhum que não seja parte da hierarquia do [http://activehistory.ca/papers/](https://perma.cc/CL79-ZN93). Se o endereço web for muito longo como `http://niche-canada.org/projects/events/new-events/not-yet-happened-events/`, ele encontra ficheiros apenas na pasta `/not-yet-happened-events/`. Este é um comando essencial para delinear sua pesquisa. +Isto é importante. Significa que o wget deve seguir os links, mas não além do último diretório pai. No caso, implica dizer que ele não avançará a lugar nenhum que não seja parte da hierarquia do [https://activehistory.ca/papers/](https://perma.cc/CL79-ZN93). Se o endereço web for muito longo como `http://niche-canada.org/projects/events/new-events/not-yet-happened-events/`, ele encontra ficheiros apenas na pasta `/not-yet-happened-events/`. Este é um comando essencial para delinear sua pesquisa. Aqui está uma representação gráfica: diff --git a/pt/licoes/download-multiplos-registros-query-strings.md b/pt/licoes/download-multiplos-registros-query-strings.md index dd4d26e209..95e6c9ee0e 100644 --- a/pt/licoes/download-multiplos-registros-query-strings.md +++ b/pt/licoes/download-multiplos-registros-query-strings.md @@ -1,778 +1,778 @@ ---- -title: Download de Múltiplos Registros usando Query Strings -layout: lesson -collection: lessons -slug: download-multiplos-registros-query-strings -date: 2012-11-11 -translation_date: 2022-11-25 -authors: -- Adam Crymble -reviewers: -- Luke Bergmann -- Sharon Howard -- Frederik Elwert -editors: -- Fred Gibbs -translator: -- Felipe Lamarca -translation-editor: -- Jimmy Medeiros -translation-reviewer: -- André Salvo -- Aracele Torres -difficulty: 2 -review-ticket: https://github.com/programminghistorian/ph-submissions/issues/465 -activity: acquiring -topics: [web-scraping] -abstract: "Fazer o download de um único registro de um website é fácil, mas fazer o download de vários registros de uma vez - uma necessidade cada vez mais frequente para um historiador - é muito mais eficiente usando uma linguagem de programação como o Python. Nessa lição, escreveremos um programa que fará o download de uma série de registros do Old Bailey Online usando critérios de busca personalizados e irá armazená-los num diretório no nosso computador." -redirect_from: /licoes/download-de-multiplos-registros-usando-query-strings -original: downloading-multiple-records-using-query-strings -avatar_alt: Figuras trabalhando numa mina, empurrando carrinhos -doi: 10.46430/phpt0034 ---- - -{% include toc.html %} - -
    -O site do Old Bailey Online foi recentemente atualizado. Infelizmente, devido às diversas mudanças, muitos (se não todos) os elementos do site de exemplo usado nesta lição não funcionarão conforme descrito. No entanto, as metodologias ensinadas por esta lição permanecem relevantes e podem ser adaptadas pelos leitores para um site de exemplo diferente. Estamos trabalhando na adaptação da lição para o novo site do Old Bailey Online, mas ainda não temos cronograma preciso de quando a lição será atualizada. [Abril de 2024] -
    - -## Objetivos do Módulo - -Fazer o *download* de um único registro de um website é fácil, mas fazer o *download* de vários registros de uma vez - uma necessidade cada vez mais frequente para um historiador - é muito mais eficiente usando uma linguagem de programação como o Python. Nesta lição, escreveremos um programa que fará o *download* de uma série de registros do *[Old Bailey Online](http://www.oldbaileyonline.org/)* usando critérios de investigação personalizados e irá armazená-los num diretório no nosso computador. Esse processo envolve interpretar e manipular *Query Strings* de URL. Nesse caso, o tutorial buscará fazer o *download* de fontes que contenham referências a afrodescendentes que foram publicadas no *Old Bailey Proceedings* entre 1700 e 1750. - -
    -Os exemplos nessa lição incluem linguagem histórica racializada que os leitores podem achar ofensiva. O autor não tolera o uso dessa linguagem, mas tentou usá-la no seu contexto histórico, reconhecendo que, de outra forma, é impossível encontrar os materiais desejados do estudo de caso. Qualquer pessoa que ensine com este material é aconselhada a adotar uma abordagem sensível em relação à linguagem e a aplicar as boas práticas ao ensinar sobre raça. O autor recomenda os muitos recursos do Teaching Tolerance; Peggy McIntosh, ‘White Privilege: Unpacking the Invisible Knapsack’, Peace and Freedom Magazine, (1989), 10-12; Binyavanga Wainaina, ‘How to Write About Africa’, Granta (92): 2006. -
    - -## Para Quem isso é Útil? - -Automatizar o processo de *download* de registros de uma base de dados *online* será útil para qualquer um que trabalhe com fontes históricas armazenadas *online* de forma ordenada e acessível e que deseje salvar cópias dessas fontes no seu próprio computador. É particularmente útil para alguém que deseja fazer o *download* de vários registros específicos, em vez de apenas um punhado. Caso deseje fazer o *download* de *todos* ou da *maioria* dos registros de uma base de dados em particular, pode achar o tutorial de Ian Milligan sobre [Automated Downloading with WGET](/en/lessons/automated-downloading-with-wget) mais adequado. - -O presente tutorial permitirá que faça *download* de forma isolada e discriminada de registros específicos que atendam às suas necessidades. Fazer o *download* de múltiplas fontes de forma automática economiza um tempo considerável. O que faz com as fontes baixadas depende dos seus objetivos de investigação. Pode desejar criar visualizações ou realizar uma série de métodos de análise de dados, ou simplesmente reformatá-las para facilitar a navegação. Ou pode desejar apenas manter uma cópia de *backup* para poder acessá-las sem acesso à internet. - -Essa lição é voltada para usuários de Python com nível intermediário. Caso ainda não tenha tentado as lições do [Básico de Programação em Python](/pt/licoes/introducao-instalacao-python), pode achá-las um ponto de partida útil. - -## Aplicando nosso Conhecimento Histórico - -Nesta lição, estamos tentando criar o nosso próprio corpus de casos relacionados com pessoas afrodescendentes. A partir do [caso de Benjamin Bowsey](http://www.oldbaileyonline.org/browse.jsp?id=t17800628-33&div=t17800628-33) no *Old Bailey* em 1780, podemos notar que "*black*" pode ser uma palavra-chave útil para usarmos para localizar outros casos envolvendo réus de ascendência africana. No entanto, quando buscamos por *black* no *website* do *Old Bailey*, percebemos que esta palavra às vezes se refere a outros usos: *black horses* ou *black cloth*. A tarefa de desambiguar esse uso da linguagem terá que esperar por outra lição. Por enquanto, vamos nos voltar para casos mais fáceis. Como historiadores, provavelmente, podemos pensar em palavras-chave de termos historicamente racializados relacionados com afrodescendentes as quais valeria a pena buscar. A infame "*n-word*", é claro, não é útil, já que esse termo não era comumente utilizado até meados do século XIX. Outras expressões racializadas como "*negro*" e "*mulatto*" são, porém, muito mais relevantes para o início do século XVIII. Essas palavras-chave são menos ambíguas do que "*black*" e são muito mais propensas a serem referências imediatas a pessoas no nosso público-alvo. Se testarmos esses dois termos em buscas separadas simples no *Old Bailey website*, temos resultados como nessa captura de tela: - -{% include figure.html filename="SearchResultsNegro.png" caption="Resultados de investigação para 'negro' no *Old Bailey Online*" %} - -{% include figure.html filename="SearchResultsMulatto.png" caption="Resultados de investigação para 'mulatto' no *Old Bailey Online*" %} - -Depois de examinar estes resultados de busca, parece evidente que são referências a pessoas e não a cavalos, panos ou qualquer outra coisa que seja preta. Desejamos fazer o *download* de todas para usar na nossa análise. Poderíamos, é claro, fazer o *download* de uma por uma manualmente. Mas vamos encontrar uma maneira programática de automatizar essa tarefa. - -## A Investigação Avançada no OBO - -As ferramentas de pesquisa de cada *site* funcionam de maneira diferente. Embora as pesquisas funcionem de forma semelhante, as complexidades das pesquisas numa base de dados podem não ser totalmente óbvias. Portanto, é importante pensar criticamente sobre as opções de busca de uma base de dados e, quando disponível, ler a documentação fornecida pelo *website*. Investigadores de história prudentes sempre interrogam suas fontes; os procedimentos por trás das suas caixas de pesquisa devem receber a mesma atenção. O [formulário de busca avançada](http://www.oldbaileyonline.org/forms/formMain.jsp) do *Old Bailey Online* permite refinar as suas buscas com base em dez campos diferentes, incluindo palavras-chave simples, um intervalo de datas e um tipo de crime. Como as ferramentas de busca de cada *website* são diferentes, vale sempre a pena reservar um momento ou dois para testar e ler a respeito das opções de investigação disponíveis. Uma vez que já fizemos buscas simples por "*negro*" e "*mulatto*", sabemos que haverá resultados. No entanto, vamos usar a busca avançada para limitar os nossos resultados aos registros publicados no *Old Bailey Proceedings* que dizem respeito a julgamentos apenas de 1700 até 1750. É claro que pode alterá-lo para o que desejar, mas isso tornará o exemplo mais simples de ser acompanhado. Faça a busca mostrada na imagem abaixo. Certifique-se de que marcou o botão "*Advanced*" e incluiu as *wildcards* `*` para incluir entradas pluralizadas ou com um "e" extra no final. - -{% include figure.html filename="AdvancedSearchExample.png" caption="Exemplo de Busca Avançada no *Old Bailey*" %} - -Execute a busca e depois clique no *link* "*Calculate Total*" para ver quantas entradas existem. Agora temos 13 resultados (caso tenha um número diferente, volte e certifique-se de que copiou o exemplo acima da forma exata). O que queremos fazer neste ponto é o *download* de todos esses ficheiros de julgamento e analizá-los mais profundamente. Mais uma vez, para apenas 13 registros, também pode fazer o *download* de cada registro manualmente. Mas à medida que mais e mais dados são disponibilizados *online*, torna-se mais comum a necessidade de baixar 1.300 ou até 130.000 registros, caso no qual o *download* individual dos registros se torna impraticável e entender como automatizar o processo se torna muito valioso. Para automatizar o processo, precisamos de dar um passo atrás e lembrar como as URLs de busca são criadas no *Old Bailey website*, um método comum para muitas bases de dados *online* e *websites*. - -## Entendendo *Queries* de URL - -Observe a URL produzida com a última página de resultado de busca. Ela deve se parecer com isso: - -``` -https://www.oldbaileyonline.org/search.jsp?gen=1&form=searchHomePage&_divs_fulltext=mulatto*+negro*&kwparse=advanced&_divs_div0Type_div1Type=sessionsPaper_trialAccount&fromYear=1700&fromMonth=00&toYear=1750&toMonth=99&start=0&count=0 -``` - -Vimos sobre URLs em [Noções básicas de páginas web e HTML](/pt/licoes/nocoes-basicas-paginas-web-html), mas isso parece muito mais complexo. Ainda que mais longo, *não* é verdadeiramente muito mais complexo. Mas é mais fácil de entender observando como os nossos critérios de busca são representados na URL. - -``` -https://www.oldbaileyonline.org/search.jsp -?gen=1 -&form=searchHomePage -&_divs_fulltext=mulatto*+negro* -&kwparse=advanced -&_divs_div0Type_div1Type=sessionsPaper_trialAccount -&fromYear=1700 -&fromMonth=00 -&toYear=1750 -&toMonth=99 -&start=0 -&count=0 -``` - -Nessa visão, vemos com mais clareza as 12 informações importantes que precisamos para realizar a nossa busca (uma por linha). Na primeira há a URL base do *Old Bailey website*, seguida por uma query "?" (não se preocupe com o *bit* `gen=1`; os desenvolvedores do *Old Bailey Online* dizem que ele não faz nada) e uma série de 10 pares *nome/valor* unidos por caracteres `&`. Juntos, esses 10 pares de nome/valor compõem a *query string* (expressão de busca), que informa ao mecanismo de busca quais variáveis usar em etapas específicas da investigação. Observe que cada par nome/valor contém um nome de variável: `toYear` e, em seguida, atribui a essa variável um valor: `1750`. Isso funciona exatamente da mesma forma que os *Argumentos de Função*, passando certas informações para variáveis específicas. Nesse caso, a variável mais importante é `_divs_fulltext=`, para a qual foi dado o valor: - -``` -mulatto*+negro* -``` - -Esta contém o termo que digitamos na caixa de busca. O programa adicionou automaticamente um sinal de soma `+` no lugar de um espaço em branco (URLs não podem conter espaçamentos); dito de outro modo, isso é exatamente o que pedimos que o *site* do *Old Bailey* encontrasse. As outras variáveis carregam valores que nós também definimos. `fromYear` e `toYear` contém o nosso intervalo de datas. Já que nenhum ano possui 99 meses, como sugerido na variável `toMonth`, podemos assumir que esse seja o modo através do qual o algoritmo garante que todos os registros daquele ano são incluídos. Não há regras difíceis ou rápidas para descobrir o que cada variável faz, porque a pessoa que criou o site as nomeou. Muitas vezes pode fazer uma suposição razoável. Todos os campos de busca possíveis na página de busca avançada possuem os seus próprios pares nome/valor. Caso deseje descobrir o nome da variável de modo a que possa utilizá-la, faça uma nova busca e certifique-se de colocar um valor no campo no qual está interessado. Após submeter a sua busca, verá o seu valor e o nome associado a ele como parte da URL da página dos resultados de busca. Com o *Old Bailey Online*, assim como com noutros *websites*, o formulário de busca (avançada ou não) ajuda, essencialmente, a construir URLs que informam à base de dados o que está buscando. Se puder entender como os campos de busca estão representados no URL - o que geralmente é algo bem direto -, então torna-se relativamente simples construir esses URLs programaticamente e automatizar o processo de *download* de registros. - -Agora tente alterar o `start=0` para `start=10` e pressione `enter`. Deve agora ter os resultados 11-13. A variável `start` informa ao *website* qual a entrada que deve ser mostrada no início da lista de resultados de busca. Nós devemos ser capazes de utilizar esse conhecimento para criar uma série de URLs que nos permitirão fazer o *download* de todos os 13 ficheiros. Vamos nos voltar para isso agora. - -## Fazendo o *Download* de Ficheiros Sistematicamente - -Na lição [Download de Páginas Web com Python](/pt/licoes/download-paginas-web-python), aprendemos que o Python pode fazer o *download* de uma página web desde que tenhamos a URL. Naquela lição, usamos a URL para fazer o *download* da transcrição do julgamento de Benjamin Bowsey. Nesse caso, estamos tentando fazer o *download* de múltiplas transcrições de julgamentos que atendem aos critérios de busca descritos acima sem precisar executar o programa repetidamente. Ao invés disso, queremos um programa que faça o *download* de tudo de uma vez. Neste ponto, temos a URL para a página de resultados de busca que contém as 10 primeiras entradas na nossa investigação. Também sabemos que ao mudarmos o valor de `start` na URL, podemos sequencialmente chamar cada uma das páginas de resultados de busca e finalmente recuperar todos os ficheiros de julgamento que elas possuem. É claro que os resultados de busca não nos oferecem os ficheiros do julgamento em si, mas apenas *links* para eles. Então precisamos de extrair esses *links* para os registros subjacentes dos resultados de busca. No *Old Bailey Online website*, as URLs para os registros individuais (os ficheiros de transcrição de julgamento) podem ser encontrados como *links* na página de resultados de busca. Sabemos que todas as transcrições de julgamento possuem um id de julgamento que assume a forma: "t" seguido por, pelo menos, 8 números (ex.: t17800628-33). Ao buscar *links* que contenham esse padrão, podemos identificar URLs de transcrição de julgamento. Como em lições anteriores, vamos desenvolver um algoritmo de modo a que possamos começar a enfrentar esse problema de uma maneira que o computador possa lidar. Parece que a tarefa pode ser realizada em 4 passos. Precisaremos: - -- Gerar as URLs para cada página de resultados de busca incrementando a variável `start` numa quantidade fixa um número apropriado de vezes. -- Fazer o *download* de cada página de resultados de busca como um ficheiro HTML. -- Extrair os URLs de cada transcrição de julgamento (usando o ID do julgamento como descrito acima) de cada ficheiro HTML de resultados de busca. -- Percorrer essas URLs extraídas para baixar cada transcrição de avaliação e salvá-las num diretório no nosso computador. - -Perceberá que isso é razoavelmente similiar às tarefas que realizamos em [Download de Páginas Web com Python](/pt/licoes/download-paginas-web-python) e [De HTML para Lista de Palavras (parte 2)](/pt/licoes/HTML-lista-palavras-2). Primeiro, fazemos o *download* e, então, analisamos as informações que procuramos. E, nesse caso, fazemos mais alguns *downloads*. - -## Fazendo o *Download* das Páginas de Resultados de Busca - -Primeiro, precisamos de gerar as URLs para fazer o download de cada página de resultados de busca. Já temos a primeira usando a forma do próprio *website*. - -``` -https://www.oldbaileyonline.org/search.jsp?gen=1&form=searchHomePage&_divs_fulltext=mulatto*+negro*&kwparse=advanced&_divs_div0Type_div1Type=sessionsPaper_trialAccount&fromYear=1700&fromMonth=00&toYear=1750&toMonth=99&start=0&count=0 -``` - -Poderíamos escrever essa URL duas vezes e alterar a variável `start` para obter todas as 13 entradas, mas vamos escrever um programa que funcionaria independentemente de quantas páginas de resultados de busca ou registros precisássemos de fazer *download*, não importando o que decidíssemos investigar. Estude esse código e, depois, adicione essa função ao seu módulo chamado `obo.py` (crie um ficheiro com esse nome e armazene-o no diretório onde deseja trabalhar). Os comentários no código destinam-se a ajudá-lo a decifrar as várias partes. - -``` python -# obo.py -def getSearchResults(query, kwparse, fromYear, fromMonth, toYear, toMonth): - - import urllib.request - - startValue = 0 - - # cada parte do URL. Dividido para facilitar a leitura - url = 'https://www.oldbaileyonline.org/search.jsp?gen=1&form=searchHomePage&_divs_fulltext=' - url += query - url += '&kwparse=' + kwparse - url += '&_divs_div0Type_div1Type=sessionsPaper_trialAccount' - url += '&fromYear=' + fromYear - url += '&fromMonth=' + fromMonth - url += '&toYear=' + toYear - url += '&toMonth=' + toMonth - url += '&start=' + str(startValue) - url += '&count=0' - - # faz o download da página e armazena o resultado - response = urllib.request.urlopen(url) - webContent = response.read().decode('UTF-8') - filename = 'search-result' - f = open(filename + ".html", 'w') - f.write(webContent) - f.close -``` - -Nessa função, separamos os vários componentes da *Query String* e usamos Argumentos de Função para que a função possa ser reutilizada além dos nossos objetivos específicos atuais. Quando chamarmos por essa função, substituiremos os argumentos pelos valores que desejamos buscar. Depois, fazemos o *download* das páginas dos resultados de busca de maneira similiar a como foi feito em [Download de Páginas Web com Python](/pt/licoes/download-paginas-web-python). Agora, crie um novo ficheiro: `download-searches.py` e copie o código a seguir dentro dele. Observe: os valores que passamos como argumentos são exatamente os mesmos dos utilizados no exemplo acima. Sinta-se livre para testá-los para receber resultados diferentes ou ver como funcionam. - -``` python -#download-searches.py -import obo - -query = 'mulatto*+negro*' - -obo.getSearchResults(query, "advanced", "1700", "00", "1750", "99") -``` - -Quando executar esse código, deve encontrar um novo ficheiro: `search-result.html` no seu `diretório programming-historian` contendo a primeira página dos resultados de busca da sua investigação. Certifique-se de que o *download* foi realizado apropriadamente e apague o ficheiro. Vamos adaptar o nosso programa para fazer o *download* da outra página contendo as outras 3 entradas ao mesmo tempo, assim queremos ter certeza que obteremos as duas. Vamos refinar a nossa função `getSearchResults` adicionando outro argumento de função chamado `entries`, de modo a que possamos dizer ao programa quantas páginas de resultados de busca precisamos fazer o *download*. Usaremos o valor das entradas e matemática simples para determinar quantas páginas de resultado de busca existem. Isso é algo bastante direto uma vez que sabemos que há dez transcrições de julgamento listadas por página. Podemos calcular o número de páginas de resultados de busca dividindo o valor das entradas por 10. Armazenaremos esse resultado na variável chamada `pageCount`. Ela se parecerá com isso: - -``` python -# determina quantos ficheiros precisam ser baixados -pageCount = entries / 10 -``` - -No entanto, em casos em que o número de entradas não é um múltiplo de 10, isso resultará num número decimal. Pode testá-lo executando esse código no seu Terminal (Mac & Linux) / Linha de Comandos Python (Windows) e exibindo o valor mantido em `pageCount`. (Observe que, daqui em diante, usaremos a palavra Terminal para referir esse programa). - -``` python -entries = 13 -pageCount = entries / 10 -print(pageCount) --> 1.3 -``` - -Sabemos que a contagem do número de página deve ser 2 (uma página contendo as entradas 1-10 e uma página contendo as entradas 11-13). Uma vez que sempre queremos o maior inteiro mais próximo, podemos arredondar o resultado da divisão. - -``` python -# determina quantos ficheiros precisam ser baixados -import math -pageCount = entries / 10 -pageCount = math.ceil(pageCount) -``` - -Se adicionarmos isso à nossa função `getSearchResults` abaixo da linha `startValue=0`, agora o código é capaz de calcular o número de páginas cujo *download* precisa de ser realizado. No entanto, nesta etapa ele irá fazer somente o *download* da primeira página, já que informamos à seção de *download* da função para executar somente uma vez. Para corrigir isso, podemos adicionar o código de *download* a um `for` *loop* que fará o *download* uma vez para cada número na variável `pageCount`. Caso ele leia 1, fará o *download* uma vez; caso ele leia 5, fará o *download* cinco vezes e assim por diante. Imediatamente após o `if` *statement* que acabou de escrever, adicione a linha a seguir e indente tudo antes de `f.close` com um espaçamento adicional de modo que tudo fique dentro do `for` *loop*: - -``` python -for pages in range(1, pageCount+1): - print(pages) -``` - -Uma vez que isso é um `for` *loop*, todo o código que desejamos executar repetidamente também precisa de ser planejado. Pode-se certificar de que fez isso corretamente verificando o código finalizado no exemplo abaixo. Esse *loop* aproveita a função [range](https://docs.python.org/3/tutorial/controlflow.html#the-range-function) do Python. Para entender esse `for` *loop* é melhor, provavelmente, pensar em `pageCount` igual a 2 como no exemplo. Portanto, essas duas linhas de código significam: comece a executar com um valor de *loop* inicial 1 e, a cada vez que executar, adicione uma unidade a esse valor. Quando o valor do *loop* é o mesmo de `pageCount`, executa mais uma vez e para. Isso é particularmente valioso porque significa que podemos dizer ao nosso programa para executar exatamente uma vez para cada página de resultados de busca e oferece uma nova habilidade flexível para controlar quantas vezes um `for` *loop* é executado. Caso deseje praticar essa nova e poderosa maneira de escrever *loops*, pode abrir o seu Terminal e brincar. - -``` python -pageCount = 2 -for pages in range(1, pageCount+1): - print(pages) - --> 1 --> 2 -``` - -Antes de adicionar todo esse código à nossa função `getSearchResults`, temos que fazer dois ajustes finais. No final do `for` *loop* (mas ainda dentro do *loop*) e depois que o nosso código de *download* for executado, precisamos de mudar nossa variável `startValue`, que é usada na construção da URL da página que desejamos fazer o *download*. Se nos esquecermos de fazer isso, o nosso programa fará repetidamente o *download* da primeira página de resultados de busca, já que não estamos verdadeiramente mudando nada na URL inicial. A variável `startValue`, como discutido acima, é o que controla em que página de resultados de busca desejamos fazer o *download*. Portanto, podemos solicitar a próxima página de resultados de busca incrementando o valor de `startvalue` em 10 unidades depois que o *download* inicial for concluído. Caso não tenha certeza de onde adicionar essa linha, pode espiar adiante o código finalizado no exemplo abaixo. - -Finalmente, queremos garantir que os nomes do ficheiros que fizemos o *download* são diferentes entre si. De outro modo, cada *download* será armazenado em cima do *download* anterior, deixando apenas um único ficheiro de resultados de busca. Para resolver isso, podemos ajustar os conteúdos da variável `filename` para incluir o valor armazenado em `startValue` de modo que a cada vez que fizermos o *download* de uma nova página, ela recebe um nome diferente. Já que a variável `startValue` é um inteiro, precisaremos de convertê-la para uma string antes de adicioná-la à variável `filename`. Ajuste a linha no seu programa que pertence à variável `filename` para ficar assim: - -``` python -filename = 'search-result' + str(startValue) -``` -Agora deve ser capaz de adicionar essas novas linhas de código à sua função `getSearchResults`. Lembre-se de que fizemos as adições a seguir: - -- Adicionar `entries` como um argumento de função adicional logo depois de `toMonth` -- Calcular o número de páginas de resultados de pesquisa e adicionar isso imediatamente após a linha que começa com `startValue = 0` (antes de construirmos a URL e começarmos o *download*) -- Imediatamente após isso, adicione um `for` *loop* que informará ao programa para executar uma vez para cada página de resultados de busca, e indentar o resto do código de modo a que ele esteja dentro do novo *loop* -- A última linha no `for` *loop* deve agora incrementar o valor da variável `startValue` a cada vez que o *loop* é executado -- Ajustar a variável `filename` existente de modo que a cada vez que for feito o *download* de uma página de resultados de busca ela forneça um nome único ao ficheiro. - -A função finalizada no seu ficheiro `obo.py` deve-se parecer com isso: - -``` python -# cria URLs para páginas de resultados de busca e armazena os ficheiros. -def getSearchResults(query, kwparse, fromYear, fromMonth, toYear, toMonth, entries): - - import urllib.request, math - - startValue = 0 - - # isso é novo! determina quantos ficheiros precisam ser baixados. - pageCount = entries / 10 - pageCount = math.ceil(pageCount) - - # essa linha é nova! - for pages in range(1, pageCount +1): - - # cada parte do URL. Dividido para facilitar a leitura. - url = 'https://www.oldbaileyonline.org/search.jsp?gen=1&form=searchHomePage&_divs_fulltext=' - url += query - url += '&kwparse=' + kwparse - url += '&_divs_div0Type_div1Type=sessionsPaper_trialAccount' - url += '&fromYear=' + fromYear - url += '&fromMonth=' + fromMonth - url += '&toYear=' + toYear - url += '&toMonth=' + toMonth - url += '&start=' + str(startValue) - url += '&count=0' - - # faz o download da página e salva o resultado. - response = urllib.request.urlopen(url) - webContent = response.read().decode('UTF-8') - filename = 'search-result' + str(startValue) - f = open(filename + ".html", 'w') - f.write(webContent) - f.close - - # essa linha é nova! - startValue = startValue + 10 -``` - -Para executar essa nova função, adicione o argumento extra ao `download-searches.py` e execute o programa novamente: - -``` python -#download-searches.py -import obo - -query = 'mulatto*+negro*' - -obo.getSearchResults(query, "advanced", "1700", "00", "1750", "99", 13) -``` - -Ótimo! Agora temos as duas páginas de resultados de busca, chamadas `search-result0.html` e `search-result10.html`. Mas antes de seguirmos para o próximo passo do algoritmo, vamos cuidar de algumas "tarefas de organização". O nosso diretório `programming-historian` rapidamente se tornará difícil de controlar se fizermos o *download* de múltiplas páginas de resultados de busca e transcrições de julgamento. Vamos fazer com que o Python crie um novo diretório nomeado a partir dos nossos termos de busca. - -Desejamos adicionar essa nova funcionalidade em `getSearchResults`, de modo que os *downloads* das nossas páginas de resultados de busca sejam direcionadas a diretórios com o mesmo nome da nossa *query* de busca. Isso manterá o nosso diretório `programming-historian` mais organizado. Para fazê-lo, criaremos um novo diretório usando a biblioteca `os`, abreviação de "*operating system*" (sistema operacional). Essa biblioteca contém uma função chamada `makedirs` que, não surpreendentemente, cria um novo diretório. Pode testar usando o Terminal: - - -``` python -import os - -query = "meuNovoDiretório" -if not os.path.exists(query): - os.makedirs(query) -``` - -Esse programa irá verificar se o seu computador já possui um diretório com esse nome. Caso não possua, agora deve possuir um diretório chamado `meuNovoDiretório` no seu computador. Num Mac provavelmente está localizado no seu diretório `/Users/username/`, e no Windows deve ser capaz de encontrá-lo no diretório `Python` no seu computador, o mesmo no qual abriu o programa da linha de comandos. Se isso funcionou, pode deletar o diretório do seu disco rígido, já que isso foi só uma prática. Uma vez que desejamos criar um novo diretório nomeado a partir da *query* que inserimos no *Old Bailey Online website*, vamos usar diretamente esse argumento de função `query` da função `getSearchResults`. Para fazer isso, importe a biblioteca `os` após as outras e, depois, adicione o código que acabou de escrever imediatamente abaixo. A sua função `getSearchResults` deve agora se parecer com isso: - -``` python -# cria URLs para páginas de resultados de busca e armazena os ficheiros. -def getSearchResults(query, kwparse, fromYear, fromMonth, toYear, toMonth, entries): - - import urllib.request, math, os - - # Essa linha é nova! Cria um novo diretório. - if not os.path.exists(query): - os.makedirs(query) - - startValue = 0 - - # Determina quantos ficheiros precisam ser baixados. - pageCount = entries / 10 - pageCount = math.ceil(pageCount) - - for pages in range(1, pageCount +1): - - # cada parte do URL. Dividido para facilitar a leitura. - url = 'https://www.oldbaileyonline.org/search.jsp?gen=1&form=searchHomePage&_divs_fulltext=' - url += query - url += '&kwparse=' + kwparse - url += '&_divs_div0Type_div1Type=sessionsPaper_trialAccount' - url += '&fromYear=' + fromYear - url += '&fromMonth=' + fromMonth - url += '&toYear=' + toYear - url += '&toMonth=' + toMonth - url += '&start=' + str(startValue) - url += '&count=0' - - # faz o download da página e salva o resultado. - response = urllib.request.urlopen(url) - webContent = response.read().decode('UTF-8') - - # armazena o resultado num novo diretório. - filename = 'search-result' + str(startValue) - - f = open(filename + ".html", 'w') - f.write(webContent) - f.close - - startValue = startValue + 10 -``` - -O último passo para essa função é garantir que, quando salvarmos as nossas páginas de resultados de busca, as armazenaremos nesse novo diretório. Para fazer isso, podemos fazer um pequeno ajuste à variável `filename` de modo a que o ficheiro termine no lugar certo. Há muitas formas de o fazer e a mais fácil é simplesmente adicionar o nome do novo diretório mais uma barra no nome do ficheiro: - -``` python -filename = query + '/' + 'search-result' + str(startValue) -``` - -Caso o seu computador esteja executando o Windows, precisará de uma barra invertida em vez da barra do exemplo acima. Adicione a linha acima à sua função `getSearchResults` no lugar da descrição atual do `filename`. - -Se estiver executando o Windows, é provável que o seu programa `downloadSearches.py` falhe quando o executar porque está tentando criar um diretório com um \* nele. O Windows não gosta disso. Para resolver esse problema podemos usar [expressões regulares](https://docs.python.org/3/library/re.html) para remover qualquer caractere não compatível com o Windows. Usamos expressões regulares anteriormente em [Contagem de Frequências de Palavras com Python](/pt/licoes/contar-frequencias-palavras-python). Para remover caracteres não-alfanuméricos da *query*, primeiro importe a biblioteca de expressões regulares imediatamente após importar a biblioteca `os` e, depois, use a função `re.sub()` para criar uma nova string chamada `cleanQuery` que contém apenas caracteres alfanuméricos. Depois precisará de substituir `cleanQuery` como a variável usada nas declarações de `os.path.exists()`, `os.makedirs()` e `filename`. - -``` python -import urllib.request, math, os, re -cleanQuery = re.sub(r'\W+', '', query) -if not os.path.exists(cleanQuery): - os.makedirs(cleanQuery) - -... - -filename = cleanQuery + '/' + 'search-result' + str(startValue) -``` - -A versão final da sua função deve-se parecer com isso: - -``` python -# cria URLs para páginas de resultados de busca e armazena os ficheiros. -def getSearchResults(query, kwparse, fromYear, fromMonth, toYear, toMonth, entries): - - import urllib.request, math, os, re - - cleanQuery = re.sub(r'\W+', '', query) - if not os.path.exists(cleanQuery): - os.makedirs(cleanQuery) - - startValue = 0 - - # Determina quantos ficheiros precisam ser baixados - pageCount = entries / 10 - pageCount = math.ceil(pageCount) - - for pages in range(1, pageCount +1): - - # cada parte do URL. Dividido para facilitar a leitura. - url = 'https://www.oldbaileyonline.org/search.jsp?gen=1&form=searchHomePage&_divs_fulltext=' - url += query - url += '&kwparse=' + kwparse - url += '&_divs_div0Type_div1Type=sessionsPaper_trialAccount' - url += '&fromYear=' + fromYear - url += '&fromMonth=' + fromMonth - url += '&toYear=' + toYear - url += '&toMonth=' + toMonth - url += '&start=' + str(startValue) - url += '&count=0' - - # faz o download da página e salva o resultado. - response = urllib.request.urlopen(url) - webContent = response.read().decode('UTF-8') - filename = cleanQuery + '/' + 'search-result' + str(startValue) - f = open(filename + ".html", 'w') - f.write(webContent) - f.close - - startValue = startValue + 10 -``` - -Dessa vez dizemos ao programa para fazer o *download* dos julgamentos e armazená-los num novo diretório ao invés do nosso diretório `programming-historian`. Execute o programa `download-searches.py` mais uma vez para se certificar de que ele funcionou e que entendeu como armazenar os ficheiros num diretório particular usando Python. - -### Fazendo o *Download* das Entradas de Julgamento Individuais - -A este ponto, criamos uma função que é capaz de fazer o *download* de todos os ficheiros HTML de resultados de busca a partir do website *Old Bailey Online* para uma busca avançada que definimos e desenvolvemos de forma programática. Agora o próximo passo do algoritmo: extrair as URLs de cada transcrição de julgamento dos ficheiros HTML de resultados de busca. Nas lições que precedem esta (ex.: [Download de Páginas Web com Python](/pt/licoes/download-paginas-web-python)), trabalhamos com as versões para exibição das transcrições dos julgamentos e continuaremos a fazer isso. Sabemos que a versão de exibição do julgamento de Benjamin Bowsey está localizada na URL: - -``` -http://www.oldbaileyonline.org/print.jsp?div=t17800628-33 -``` - -Da mesma forma que alterar as *query strings* nas URLs gera resultados de busca diferentes, alterar a URL dos registros de julgamento - no caso, substituir um ID de julgamento por outro - nos fará obter a transcrição para aquele novo julgamento. Isso significa que, para encontrar e fazer o *download* dos 13 ficheiros que buscamos, tudo o que precisamos são esses IDs de julgamento. Uma vez que sabemos que essas páginas de resultados de busca geralmente contém um *link* para as páginas descritas, há uma boa chance de que consigamos encontrar esses *links* integrados ao código HTML. Se formos capazes de raspar essa informação das páginas de resultados de busca em que fizemos *download*, podemos então usar essa informação para gerar uma URL que nos permitirá fazer o *download* de cada transcrição de julgamento. Essa é uma técnica que irá utilizar para a maioria das páginas de resultados de busca, não só o *Old Bailey Online*! Para fazer isso, primeiro precisamos encontrar onde os IDs de julgamento estão no código HTML dos ficheiros que fizemos o *download* e, depois, determinar uma maneira de isolá-los consistentemente usando código de modo a que, independentemente de qual página de resultado de busca fizermos o *download*, sejamos capazes de encontrar as transcrições de julgamento. Primeiro, abra `search-results0.html` no Komodo Edit e dê uma olhada na lista de julgamentos. A primeira entrada começa com "Anne Smith", então pode usar o recurso `find` no Komodo Edit para pular imediatamente para o lugar certo. Observe que o nome de Anne faz parte de um *link*: - -``` -browse.jsp?id=t17160113-18&div=t17160113-18&terms=mulatto*_negro*#highlight -``` - -Perfeito, o *link* contém o ID do julgamento! Percorra as entradas restantes e verá que isso é verdade em todos os casos. Para nossa sorte, o *site* é bem formatado e parece que cada *link* começa com `browse.jsp?id=` seguido pelo ID do julgamento e termina com um `&`, no caso de Anne: `browse.jsp?id=t17160113-18&`. Podemos escrever algumas linhas de código que sejam capazes de isolar esses IDs. Veja a função a seguir. Essa função também usa a biblioteca `os`, nesse caso para listar todos os ficheiros localizados no diretório criado na seção anterior. A biblioteca `os` possui uma gama de funções úteis que imitam os tipos de tarefas que esperaria ser capaz de fazer com o seu mouse no Mac Finder ou Windows, como abrir, fechar, criar, deletar e mover ficheiros e diretórios, e é uma boa biblioteca a ser masterizada - ou pelo menos para se familiarizar. - -``` python -def getIndivTrials(query): - import os, re - - cleanQuery = re.sub(r'\W+', '', query) - searchResults = os.listdir(cleanQuery) - - print(searchResults) -``` - -Crie e execute um novo programa chamado `extract-trials-ids.py` com o código a seguir. Certifique-se de inserir o mesmo valor nos argumentos da *query* como fez no exemplo anterior: - -``` python -import obo - -obo.getIndivTrials("mulatto*+negro*") -``` - -Se tudo correu bem, deve ver uma lista contendo o nome de todos os ficheiros no seu novo diretório `mulatto*+negro*`, que a essa altura devem ser as duas páginas de resultados de busca. Certifique-se de que isso funcionou antes de prosseguir. Uma vez que armazenamos todas as páginas de resultados de busca com um nome de ficheiro que inclui `search-results`, agora desejamos abrir todos os ficheiros cujo nome contenha `search-results` e extrair todos os IDs de julgamento encontrados neles. Nesse caso sabemos que temos 2, mas desejamos que o nosso código seja o mais reutilizável possível (com razão, é claro!). Restringir essa ação a ficheiros denominados `search-results` significará que este programa funcionará como pretendido, mesmo que o diretório contenha muitos outros ficheiros não relacionados, já que o programa ignorará qualquer coisa com nome diferente. - -Adicione o código a seguir à sua função `getIndivTrials()`, que verificará se cada ficheiro contém `search-results` no seu nome. Em caso verdadeiro, o ficheiro será aberto e o conteúdo será salvo na variável chamada `text`. Essa variável `text` será analisada na busca por um ID de julgamento, que sabemos que sempre segue `browse.jsp?id=`. Se e quando o ID de julgamento for encontrado, ele será armazenado numa lista e exibido na Saída de Comando, que nos deixa com todas as informações que precisamos para então escrever o programa que fará o *download* dos julgamentos desejados. - -``` python -def getIndivTrials(query): - import os, re - - cleanQuery = re.sub(r'\W+', '', query) - searchResults = os.listdir(cleanQuery) - - urls = [] - - # encontra as páginas de resultados de busca. - for files in searchResults: - if files.find("search-result") != -1: - f = open(cleanQuery + "/" + files, 'r') - text = f.read().split(" ") - f.close() - - # busca os IDs de julgamento. - for words in text: - if words.find("browse.jsp?id=") != -1: - # isola o ID - urls.append(words[words.find("id=") +3: words.find("&")]) - - print(urls) -``` - -Essa última linha do `for` *loop* pode parecer confusa, mas certifique-se de que entendeu antes de seguir em frente. A variável `words` é verificada para saber se contém os caracteres `id=` (sem aspas), que obviamente se referem a um ID específico de transcrição de julgamento. Caso contenha, usamos o método de string `slice` para capturar apenas o trecho entre `id=` e `&` e o adicionamos à lista de url. Se soubéssemos as posições exatas dos índices dessa substring, poderíamos ter usado esses valores numéricos no lugar. No entanto, ao utilizar o método de string `find()`, criamos um programa muito mais flexível. O código a seguir faz exatamente a mesma coisa que essa última linha, mas de maneira menos condensada: - -``` python -idStart = words.find("id=") + 3 -idEnd = words.find("&") -trialID = words[idStart: idEnd] - -urls.append(trialID) -``` - -Ao executar novamente o programa `extract-trial-ids.py`, deve ver uma lista de todos os IDs de julgamento. Podemos adicionar algumas linhas extra para transformá-los em URLs propriamente ditos e fazer o *download* de toda a lista para o nosso novo diretório. Também vamos usar a biblioteca `time` para pausar o nosso programa por 3 segundos entre cada *download* - uma técnica chamada *throttling* (em português, estrangulamento). É considerada uma boa forma de não sobrecarregar o servidor de alguém com muitas solicitações por segundo; e o pequeno retardamento torna mais fácil que todos esses ficheiros sejam, de fato, baixados ao invés de ocorrer um [time out](https://en.wikipedia.org/wiki/Timeout_(computing)). Adicione o código a seguir ao final da sua função `getIndivTrials()`. Esse código vai gerar uma URL para cada página individualmente, fará o *download* da página no seu computador, irá colocá-lo no seu diretório, armazenar o ficheiro e pausar por 3 segundos antes de continuar para o próximo julgamento. Todo esse trabalho está contido num `for` *loop* e será executado uma vez para cada julgamento na sua lista de urls. - - -``` python -def getIndivTrials(query): - #... - import urllib.request, time - - # importa funções python built-in para criar caminhos de ficheiro. - from os.path import join as pjoin - - for items in urls: - # gera a URL. - url = "http://www.oldbaileyonline.org/print.jsp?div=" + items - - # faz o download da página. - response = urllib.request.urlopen(url) - webContent = response.read().decode('UTF-8') - - # cria o nome do ficheiro e coloca-o no novo diretório. - filename = items + '.html' - filePath = pjoin(cleanQuery, filename) - - # armazena o ficheiro. - f = open(filePath, 'w') - f.write(webContent) - f.close - - # pausa por 3 segundos. - time.sleep(3) -``` - -Se unirmos tudo numa única função, ela deve-se parecer com isso (note que adicionamos todas as chamadas por `import` no início para manter as coisas claras): - -``` python -def getIndivTrials(query): - import os, re, urllib.request, time - - # importa funções python built-in para criar caminhos de ficheiro. - from os.path import join as pjoin - - cleanQuery = re.sub(r'\W+', '', query) - searchResults = os.listdir(cleanQuery) - - urls = [] - - # encontra páginas de resultados de busca. - for files in searchResults: - if files.find("search-result") != -1: - f = open(cleanQuery + "/" + files, 'r') - text = f.read().split(" ") - f.close() - - # busca por IDs de julgamento. - for words in text: - if words.find("browse.jsp?id=") != -1: - # isola o id - urls.append(words[words.find("id=") +3: words.find("&")]) - - # novo daqui em diante! - for items in urls: - # gera o URL - url = "http://www.oldbaileyonline.org/print.jsp?div=" + items - - # faz o download da página. - response = urllib.request.urlopen(url) - webContent = response.read().decode('UTF-8') - - # cria o nome do ficheiro e coloca-o no novo diretório. - filename = items + '.html' - filePath = pjoin(cleanQuery, filename) - - # armazena o ficheiro. - f = open(filePath, 'w') - f.write(webContent) - f.close - - # pausa por 3 segundos. - time.sleep(3) -``` - -Vamos adicionar a mesma pausa de três segundos à nossa função `getSearchResults` para ser amigável aos *servers* do *Old Bailey Online*: - -``` python -# cria URLs para páginas de resultados de busca e armazena os ficheiros. -def getSearchResults(query, kwparse, fromYear, fromMonth, toYear, toMonth, entries): - - import urllib.request, math, os, re, time - - cleanQuery = re.sub(r'\W+', '', query) - if not os.path.exists(cleanQuery): - os.makedirs(cleanQuery) - - startValue = 0 - - # Determina quantos ficheiros precisam de ser baixados. - pageCount = entries / 10 - pageCount = math.ceil(pageCount) - - for pages in range(1, pageCount +1): - - # cada parte da URL. Dividida para facilitar a leitura. - url = 'https://www.oldbaileyonline.org/search.jsp?gen=1&form=searchHomePage&_divs_fulltext=' - url += query - url += '&kwparse=' + kwparse - url += '&_divs_div0Type_div1Type=sessionsPaper_trialAccount' - url += '&fromYear=' + fromYear - url += '&fromMonth=' + fromMonth - url += '&toYear=' + toYear - url += '&toMonth=' + toMonth - url += '&start=' + str(startValue) - url += '&count=0' - - # faz o download da página e armazena o resultado. - response = urllib.request.urlopen(url) - webContent = response.read().decode('UTF-8') - filename = cleanQuery + '/' + 'search-result' + str(startValue) - f = open(filename + ".html", 'w') - f.write(webContent) - f.close - - startValue = startValue + 10 - - # pausa por 3 segundos. - time.sleep(3) -``` - -Finalmente, chame a função no programa `download-searches.py`: - -``` python -#download-searches.py -import obo - -query = 'mulatto*+negro*' - -obo.getSearchResults(query, "advanced", "1700", "00", "1750", "99", 13) - -obo.getIndivTrials(query) -``` - -Agora criou um programa que é capaz de fazer a solicitação e o *download* de ficheiros do *Old Bailey website*, baseado em parâmetros de busca que definiu, tudo sem visitar o *site*! - -### No Caso de um Ficheiro Não Ser Baixado - -Verifique se o *download* dos treze ficheiros foi realizado corretamente. Se esse for o caso, ótimo! No entanto, há a possibilidade de que esse programa tenha parado no meio do caminho. Isso porque o nosso programa, ao ser executado na nossa máquina, depende de dois fatores além do nosso controle imediato: a velocidade da internet e a o tempo de resposta do *server* do *Old Bailey Online* naquele momento. Uma coisa é pedir que o Python faça o *download* de um único ficheiro, mas quando começamos a solicitar um ficheiro a cada três segundos, há grandes chances de ocorrer um *time out* no *server* ou que ele falhe em nos enviar o ficheiro que estamos buscando. - -Se estivermos usando um navegador *web* para fazer essas solicitações, eventualmente receberíamos uma mensagem de que "a conexão expirou" ou algo do tipo. Todos nós vemos isso de tempos em tempos. No entanto, o nosso programa não foi desenvolvido para lidar ou retransmitir essas mensagens de erro, então só perceberá o problema quando o programa não tiver retornado o número esperado de ficheiros ou simplesmente não fizer nada. Para evitar frustrações e incertezas, queremos um sistema à prova de falha no nosso programa, que tentará baixar cada julgamento. Se por alguma razão ele falhar, apontaremos o problema e passaremos para o próximo julgamento. - -Para fazer isso, utilizaremos os mecanismos para lidar com erros do Python, [try / except](http://docs.python.org/tutorial/errors.html), bem como uma nova biblioteca: `socket`. `Try` e `Except` são muito parecidos com um `if / else` *statement*. Quando solicita que o Python `try` (em português, tente) algo, ele tentará executar o código; caso o código falhe em alcançar o que definiu, ele executará o código em `except` (em português, exceção). Isso é frequentemente usado ao lidar com erros, conhecido como “error handling”. Podemos usá-lo a nosso favor dizendo ao programa para tentar fazer o *download* de uma página. Caso o programa falhe, solicitaremos que ele nos informe qual ficheiro falhou e depois prossiga. Para fazer isso precisamos de usar a biblioteca `socket`, que nos permitirá definir um limite de tempo para um *download* antes de seguir em frente. Isso envolve alterar a função `getIndivTrials`. - -Primeiro, precisamos de carregar a biblioteca `socket`, o que deve ser feito da mesma forma que todos as outras importações de biblioteca. Depois, precisamos de importar a biblioteca `urllib.error`, que nos permite lidar com erros de *download*. Também precisamos de definir o tamanho do *timeout* padrão do *socket* - por quanto tempo desejamos tentar fazer o *download* de uma página antes de desistirmos. Isso deve entrar imediatamente após o comentário que começa com `# faz o download da página`: - - -``` python - import os, re, urllib.request, urllib.error, time, socket - - #... - # faz o download da página. - socket.setdefaulttimeout(10) -``` - -Então, precisamos de uma nova lista de Python que armazenará todas as urls cujo *download* falhou. Vamos chamá-la de `failedAttempts` e pode inserí-la imediatamente após as instruções de importação: - - -``` python -failedAttempts = [] -``` - -Finalmente, podemos adicionar o `try / except` *statement* de forma muito similar a como um `if / else` *statement* seria adicionado. Nesse caso, vamos colocar todo o código desenvolvido para fazer o *download* e armazenar os julgamentos no `try` *statement*, e no `except` *statement* vamos dizer ao programa o que desejamos que ele faça caso falhe. Aqui, vamos adicionar a url cujo *download* falhou à nossa nova lista, `failedAttempts`: - -``` python -#... - - socket.setdefaulttimeout(10) - - try: - response = urllib2.urlopen(url) - webContent = response.read().decode('UTF-8') - - # cria o nome de ficheiro e coloca-o no novo diretório "trials". - filename = items + '.html' - filePath = pjoin(newDir, filename) - - # armazena o ficheiro. - f = open(filePath, 'w') - f.write(webContent) - f.close - except urllib.error.URLError: - failedAttempts.append(url) -``` - -Finalmente, diremos ao programa para exibir os conteúdos da lista na Saída de Comando de modo que saibamos quais ficheiros falharam no *download*. Isso deve ser adicionado nas linhas finais da função: - -``` python -print("failed to download: " + str(failedAttempts)) -``` - -Agora ao executarmos o programa, caso haja algum problema no *download* de um ficheiro específico, receberá uma mensagem na janela de Saída de Comando do Komodo Edit. Essa mensagem irá conter quaisquer URLs dos ficheiros que falharam no *download*. Caso haja apenas um ou dois, provavelmente é mais fácil simplesmente visitar as páginas manualmente e usar o recurso de "Salvar Como" do seu navegador. Caso se esteja sentindo aventureiro, poderia modificar o programa para automaticamente fazer o *download* dos ficheiros faltantes. A versão final das suas funções `getSearchResults()` e `getIndivTrials()` deve-se parecer com isso: - -``` python -# cria URLs para páginas de resultados de busca e armazena os ficheiros. -def getSearchResults(query, kwparse, fromYear, fromMonth, toYear, toMonth, entries): - - import urllib.request, math, os, re, time - - cleanQuery = re.sub(r'\W+', '', query) - if not os.path.exists(cleanQuery): - os.makedirs(cleanQuery) - - startValue = 0 - - # determina quantos ficheiros precisam de ser baixados. - pageCount = entries / 10 - pageCount = math.ceil(pageCount) - - for pages in range(1, pageCount +1): - - # cada parte da URL. Dividida para facilitar a leitura. - url = 'https://www.oldbaileyonline.org/search.jsp?gen=1&form=searchHomePage&_divs_fulltext=' - url += query - url += '&kwparse=' + kwparse - url += '&_divs_div0Type_div1Type=sessionsPaper_trialAccount' - url += '&fromYear=' + fromYear - url += '&fromMonth=' + fromMonth - url += '&toYear=' + toYear - url += '&toMonth=' + toMonth - url += '&start=' + str(startValue) - url += '&count=0' - - # faz o download da página e salva o resultado. - response = urllib.request.urlopen(url) - webContent = response.read().decode('UTF-8') - filename = cleanQuery + '/' + 'search-result' + str(startValue) - f = open(filename + ".html", 'w') - f.write(webContent) - f.close - - startValue = startValue + 10 - - # pausa por 3 segundos. - time.sleep(3) - -def getIndivTrials(query): - import os, re, urllib.request, urllib.error, time, socket - - failedAttempts = [] - - # importa funções python built-in para criar caminhos de ficheiro. - from os.path import join as pjoin - - cleanQuery = re.sub(r'\W+', '', query) - searchResults = os.listdir(cleanQuery) - - urls = [] - - # encontra páginas de resultados de busca. - for files in searchResults: - if files.find("search-result") != -1: - f = open(cleanQuery + "/" + files, 'r') - text = f.read().split(" ") - f.close() - - # busca por IDs de julgamento. - for words in text: - if words.find("browse.jsp?id=") != -1: - #isolate the id - urls.append(words[words.find("id=") +3: words.find("&")]) - - for items in urls: - # gera a URL. - url = "http://www.oldbaileyonline.org/print.jsp?div=" + items - - # faz o download da página. - socket.setdefaulttimeout(10) - try: - response = urllib.request.urlopen(url) - webContent = response.read().decode('UTF-8') - - # cria o nome do ficheiro e coloca-o no novo diretório. - filename = items + '.html' - filePath = pjoin(cleanQuery, filename) - - # armazena o ficheiro. - f = open(filePath, 'w') - f.write(webContent) - f.close - except urllib.error.URLError: - failedAttempts.append(url) - - # pausa por 3 segundos. - time.sleep(3) - - print("failed to download: " + str(failedAttempts)) -``` - -## Leituras Adicionais - -Para usuários mais avançados, ou para se tornar um usuário mais avançado, pode achar que vale a pena ler sobre como alcançar esse mesmo processo usando Interfaces de Programação de Aplicações (API). Geralmente, um *website* com uma API dá instruções de como solicitar certos documentos. É um processo bastante similar ao que acabamos de fazer interpretando a *Query String* de URL, mas sem o trabalho de investigação adicional necessário para decifrar o que cada variável faz. Caso esteja interessado no *Old Bailey Online*, recentemente liberaram uma API e a documentação pode ajudar bastante: - -- Old Bailey Online API () -- Melhor maneira de criar um diretório para gravação de ficheiros, se ele não existir, usando Python? () +--- +title: Download de Múltiplos Registros usando Query Strings +layout: lesson +collection: lessons +slug: download-multiplos-registros-query-strings +date: 2012-11-11 +translation_date: 2022-11-25 +authors: +- Adam Crymble +reviewers: +- Luke Bergmann +- Sharon Howard +- Frederik Elwert +editors: +- Fred Gibbs +translator: +- Felipe Lamarca +translation-editor: +- Jimmy Medeiros +translation-reviewer: +- André Salvo +- Aracele Torres +difficulty: 2 +review-ticket: https://github.com/programminghistorian/ph-submissions/issues/465 +activity: acquiring +topics: [web-scraping] +abstract: "Fazer o download de um único registro de um website é fácil, mas fazer o download de vários registros de uma vez - uma necessidade cada vez mais frequente para um historiador - é muito mais eficiente usando uma linguagem de programação como o Python. Nessa lição, escreveremos um programa que fará o download de uma série de registros do Old Bailey Online usando critérios de busca personalizados e irá armazená-los num diretório no nosso computador." +redirect_from: /licoes/download-de-multiplos-registros-usando-query-strings +original: downloading-multiple-records-using-query-strings +avatar_alt: Figuras trabalhando numa mina, empurrando carrinhos +doi: 10.46430/phpt0034 +--- + +{% include toc.html %} + +
    +O site do Old Bailey Online foi recentemente atualizado. Infelizmente, devido às diversas mudanças, muitos (se não todos) os elementos do site de exemplo usado nesta lição não funcionarão conforme descrito. No entanto, as metodologias ensinadas por esta lição permanecem relevantes e podem ser adaptadas pelos leitores para um site de exemplo diferente. Estamos trabalhando na adaptação da lição para o novo site do Old Bailey Online, mas ainda não temos cronograma preciso de quando a lição será atualizada. [Abril de 2024] +
    + +## Objetivos do Módulo + +Fazer o *download* de um único registro de um website é fácil, mas fazer o *download* de vários registros de uma vez - uma necessidade cada vez mais frequente para um historiador - é muito mais eficiente usando uma linguagem de programação como o Python. Nesta lição, escreveremos um programa que fará o *download* de uma série de registros do *[Old Bailey Online](https://www.oldbaileyonline.org/)* usando critérios de investigação personalizados e irá armazená-los num diretório no nosso computador. Esse processo envolve interpretar e manipular *Query Strings* de URL. Nesse caso, o tutorial buscará fazer o *download* de fontes que contenham referências a afrodescendentes que foram publicadas no *Old Bailey Proceedings* entre 1700 e 1750. + +
    +Os exemplos nessa lição incluem linguagem histórica racializada que os leitores podem achar ofensiva. O autor não tolera o uso dessa linguagem, mas tentou usá-la no seu contexto histórico, reconhecendo que, de outra forma, é impossível encontrar os materiais desejados do estudo de caso. Qualquer pessoa que ensine com este material é aconselhada a adotar uma abordagem sensível em relação à linguagem e a aplicar as boas práticas ao ensinar sobre raça. O autor recomenda os muitos recursos do Teaching Tolerance; Peggy McIntosh, ‘White Privilege: Unpacking the Invisible Knapsack’, Peace and Freedom Magazine, (1989), 10-12; Binyavanga Wainaina, ‘How to Write About Africa’, Granta (92): 2006. +
    + +## Para Quem isso é Útil? + +Automatizar o processo de *download* de registros de uma base de dados *online* será útil para qualquer um que trabalhe com fontes históricas armazenadas *online* de forma ordenada e acessível e que deseje salvar cópias dessas fontes no seu próprio computador. É particularmente útil para alguém que deseja fazer o *download* de vários registros específicos, em vez de apenas um punhado. Caso deseje fazer o *download* de *todos* ou da *maioria* dos registros de uma base de dados em particular, pode achar o tutorial de Ian Milligan sobre [Automated Downloading with WGET](/en/lessons/automated-downloading-with-wget) mais adequado. + +O presente tutorial permitirá que faça *download* de forma isolada e discriminada de registros específicos que atendam às suas necessidades. Fazer o *download* de múltiplas fontes de forma automática economiza um tempo considerável. O que faz com as fontes baixadas depende dos seus objetivos de investigação. Pode desejar criar visualizações ou realizar uma série de métodos de análise de dados, ou simplesmente reformatá-las para facilitar a navegação. Ou pode desejar apenas manter uma cópia de *backup* para poder acessá-las sem acesso à internet. + +Essa lição é voltada para usuários de Python com nível intermediário. Caso ainda não tenha tentado as lições do [Básico de Programação em Python](/pt/licoes/introducao-instalacao-python), pode achá-las um ponto de partida útil. + +## Aplicando nosso Conhecimento Histórico + +Nesta lição, estamos tentando criar o nosso próprio corpus de casos relacionados com pessoas afrodescendentes. A partir do [caso de Benjamin Bowsey](https://www.oldbaileyonline.org/browse.jsp?id=t17800628-33&div=t17800628-33) no *Old Bailey* em 1780, podemos notar que "*black*" pode ser uma palavra-chave útil para usarmos para localizar outros casos envolvendo réus de ascendência africana. No entanto, quando buscamos por *black* no *website* do *Old Bailey*, percebemos que esta palavra às vezes se refere a outros usos: *black horses* ou *black cloth*. A tarefa de desambiguar esse uso da linguagem terá que esperar por outra lição. Por enquanto, vamos nos voltar para casos mais fáceis. Como historiadores, provavelmente, podemos pensar em palavras-chave de termos historicamente racializados relacionados com afrodescendentes as quais valeria a pena buscar. A infame "*n-word*", é claro, não é útil, já que esse termo não era comumente utilizado até meados do século XIX. Outras expressões racializadas como "*negro*" e "*mulatto*" são, porém, muito mais relevantes para o início do século XVIII. Essas palavras-chave são menos ambíguas do que "*black*" e são muito mais propensas a serem referências imediatas a pessoas no nosso público-alvo. Se testarmos esses dois termos em buscas separadas simples no *Old Bailey website*, temos resultados como nessa captura de tela: + +{% include figure.html filename="SearchResultsNegro.png" caption="Resultados de investigação para 'negro' no *Old Bailey Online*" %} + +{% include figure.html filename="SearchResultsMulatto.png" caption="Resultados de investigação para 'mulatto' no *Old Bailey Online*" %} + +Depois de examinar estes resultados de busca, parece evidente que são referências a pessoas e não a cavalos, panos ou qualquer outra coisa que seja preta. Desejamos fazer o *download* de todas para usar na nossa análise. Poderíamos, é claro, fazer o *download* de uma por uma manualmente. Mas vamos encontrar uma maneira programática de automatizar essa tarefa. + +## A Investigação Avançada no OBO + +As ferramentas de pesquisa de cada *site* funcionam de maneira diferente. Embora as pesquisas funcionem de forma semelhante, as complexidades das pesquisas numa base de dados podem não ser totalmente óbvias. Portanto, é importante pensar criticamente sobre as opções de busca de uma base de dados e, quando disponível, ler a documentação fornecida pelo *website*. Investigadores de história prudentes sempre interrogam suas fontes; os procedimentos por trás das suas caixas de pesquisa devem receber a mesma atenção. O [formulário de busca avançada](https://www.oldbaileyonline.org/forms/formMain.jsp) do *Old Bailey Online* permite refinar as suas buscas com base em dez campos diferentes, incluindo palavras-chave simples, um intervalo de datas e um tipo de crime. Como as ferramentas de busca de cada *website* são diferentes, vale sempre a pena reservar um momento ou dois para testar e ler a respeito das opções de investigação disponíveis. Uma vez que já fizemos buscas simples por "*negro*" e "*mulatto*", sabemos que haverá resultados. No entanto, vamos usar a busca avançada para limitar os nossos resultados aos registros publicados no *Old Bailey Proceedings* que dizem respeito a julgamentos apenas de 1700 até 1750. É claro que pode alterá-lo para o que desejar, mas isso tornará o exemplo mais simples de ser acompanhado. Faça a busca mostrada na imagem abaixo. Certifique-se de que marcou o botão "*Advanced*" e incluiu as *wildcards* `*` para incluir entradas pluralizadas ou com um "e" extra no final. + +{% include figure.html filename="AdvancedSearchExample.png" caption="Exemplo de Busca Avançada no *Old Bailey*" %} + +Execute a busca e depois clique no *link* "*Calculate Total*" para ver quantas entradas existem. Agora temos 13 resultados (caso tenha um número diferente, volte e certifique-se de que copiou o exemplo acima da forma exata). O que queremos fazer neste ponto é o *download* de todos esses ficheiros de julgamento e analizá-los mais profundamente. Mais uma vez, para apenas 13 registros, também pode fazer o *download* de cada registro manualmente. Mas à medida que mais e mais dados são disponibilizados *online*, torna-se mais comum a necessidade de baixar 1.300 ou até 130.000 registros, caso no qual o *download* individual dos registros se torna impraticável e entender como automatizar o processo se torna muito valioso. Para automatizar o processo, precisamos de dar um passo atrás e lembrar como as URLs de busca são criadas no *Old Bailey website*, um método comum para muitas bases de dados *online* e *websites*. + +## Entendendo *Queries* de URL + +Observe a URL produzida com a última página de resultado de busca. Ela deve se parecer com isso: + +``` +https://www.oldbaileyonline.org/search.jsp?gen=1&form=searchHomePage&_divs_fulltext=mulatto*+negro*&kwparse=advanced&_divs_div0Type_div1Type=sessionsPaper_trialAccount&fromYear=1700&fromMonth=00&toYear=1750&toMonth=99&start=0&count=0 +``` + +Vimos sobre URLs em [Noções básicas de páginas web e HTML](/pt/licoes/nocoes-basicas-paginas-web-html), mas isso parece muito mais complexo. Ainda que mais longo, *não* é verdadeiramente muito mais complexo. Mas é mais fácil de entender observando como os nossos critérios de busca são representados na URL. + +``` +https://www.oldbaileyonline.org/search.jsp +?gen=1 +&form=searchHomePage +&_divs_fulltext=mulatto*+negro* +&kwparse=advanced +&_divs_div0Type_div1Type=sessionsPaper_trialAccount +&fromYear=1700 +&fromMonth=00 +&toYear=1750 +&toMonth=99 +&start=0 +&count=0 +``` + +Nessa visão, vemos com mais clareza as 12 informações importantes que precisamos para realizar a nossa busca (uma por linha). Na primeira há a URL base do *Old Bailey website*, seguida por uma query "?" (não se preocupe com o *bit* `gen=1`; os desenvolvedores do *Old Bailey Online* dizem que ele não faz nada) e uma série de 10 pares *nome/valor* unidos por caracteres `&`. Juntos, esses 10 pares de nome/valor compõem a *query string* (expressão de busca), que informa ao mecanismo de busca quais variáveis usar em etapas específicas da investigação. Observe que cada par nome/valor contém um nome de variável: `toYear` e, em seguida, atribui a essa variável um valor: `1750`. Isso funciona exatamente da mesma forma que os *Argumentos de Função*, passando certas informações para variáveis específicas. Nesse caso, a variável mais importante é `_divs_fulltext=`, para a qual foi dado o valor: + +``` +mulatto*+negro* +``` + +Esta contém o termo que digitamos na caixa de busca. O programa adicionou automaticamente um sinal de soma `+` no lugar de um espaço em branco (URLs não podem conter espaçamentos); dito de outro modo, isso é exatamente o que pedimos que o *site* do *Old Bailey* encontrasse. As outras variáveis carregam valores que nós também definimos. `fromYear` e `toYear` contém o nosso intervalo de datas. Já que nenhum ano possui 99 meses, como sugerido na variável `toMonth`, podemos assumir que esse seja o modo através do qual o algoritmo garante que todos os registros daquele ano são incluídos. Não há regras difíceis ou rápidas para descobrir o que cada variável faz, porque a pessoa que criou o site as nomeou. Muitas vezes pode fazer uma suposição razoável. Todos os campos de busca possíveis na página de busca avançada possuem os seus próprios pares nome/valor. Caso deseje descobrir o nome da variável de modo a que possa utilizá-la, faça uma nova busca e certifique-se de colocar um valor no campo no qual está interessado. Após submeter a sua busca, verá o seu valor e o nome associado a ele como parte da URL da página dos resultados de busca. Com o *Old Bailey Online*, assim como com noutros *websites*, o formulário de busca (avançada ou não) ajuda, essencialmente, a construir URLs que informam à base de dados o que está buscando. Se puder entender como os campos de busca estão representados no URL - o que geralmente é algo bem direto -, então torna-se relativamente simples construir esses URLs programaticamente e automatizar o processo de *download* de registros. + +Agora tente alterar o `start=0` para `start=10` e pressione `enter`. Deve agora ter os resultados 11-13. A variável `start` informa ao *website* qual a entrada que deve ser mostrada no início da lista de resultados de busca. Nós devemos ser capazes de utilizar esse conhecimento para criar uma série de URLs que nos permitirão fazer o *download* de todos os 13 ficheiros. Vamos nos voltar para isso agora. + +## Fazendo o *Download* de Ficheiros Sistematicamente + +Na lição [Download de Páginas Web com Python](/pt/licoes/download-paginas-web-python), aprendemos que o Python pode fazer o *download* de uma página web desde que tenhamos a URL. Naquela lição, usamos a URL para fazer o *download* da transcrição do julgamento de Benjamin Bowsey. Nesse caso, estamos tentando fazer o *download* de múltiplas transcrições de julgamentos que atendem aos critérios de busca descritos acima sem precisar executar o programa repetidamente. Ao invés disso, queremos um programa que faça o *download* de tudo de uma vez. Neste ponto, temos a URL para a página de resultados de busca que contém as 10 primeiras entradas na nossa investigação. Também sabemos que ao mudarmos o valor de `start` na URL, podemos sequencialmente chamar cada uma das páginas de resultados de busca e finalmente recuperar todos os ficheiros de julgamento que elas possuem. É claro que os resultados de busca não nos oferecem os ficheiros do julgamento em si, mas apenas *links* para eles. Então precisamos de extrair esses *links* para os registros subjacentes dos resultados de busca. No *Old Bailey Online website*, as URLs para os registros individuais (os ficheiros de transcrição de julgamento) podem ser encontrados como *links* na página de resultados de busca. Sabemos que todas as transcrições de julgamento possuem um id de julgamento que assume a forma: "t" seguido por, pelo menos, 8 números (ex.: t17800628-33). Ao buscar *links* que contenham esse padrão, podemos identificar URLs de transcrição de julgamento. Como em lições anteriores, vamos desenvolver um algoritmo de modo a que possamos começar a enfrentar esse problema de uma maneira que o computador possa lidar. Parece que a tarefa pode ser realizada em 4 passos. Precisaremos: + +- Gerar as URLs para cada página de resultados de busca incrementando a variável `start` numa quantidade fixa um número apropriado de vezes. +- Fazer o *download* de cada página de resultados de busca como um ficheiro HTML. +- Extrair os URLs de cada transcrição de julgamento (usando o ID do julgamento como descrito acima) de cada ficheiro HTML de resultados de busca. +- Percorrer essas URLs extraídas para baixar cada transcrição de avaliação e salvá-las num diretório no nosso computador. + +Perceberá que isso é razoavelmente similiar às tarefas que realizamos em [Download de Páginas Web com Python](/pt/licoes/download-paginas-web-python) e [De HTML para Lista de Palavras (parte 2)](/pt/licoes/HTML-lista-palavras-2). Primeiro, fazemos o *download* e, então, analisamos as informações que procuramos. E, nesse caso, fazemos mais alguns *downloads*. + +## Fazendo o *Download* das Páginas de Resultados de Busca + +Primeiro, precisamos de gerar as URLs para fazer o download de cada página de resultados de busca. Já temos a primeira usando a forma do próprio *website*. + +``` +https://www.oldbaileyonline.org/search.jsp?gen=1&form=searchHomePage&_divs_fulltext=mulatto*+negro*&kwparse=advanced&_divs_div0Type_div1Type=sessionsPaper_trialAccount&fromYear=1700&fromMonth=00&toYear=1750&toMonth=99&start=0&count=0 +``` + +Poderíamos escrever essa URL duas vezes e alterar a variável `start` para obter todas as 13 entradas, mas vamos escrever um programa que funcionaria independentemente de quantas páginas de resultados de busca ou registros precisássemos de fazer *download*, não importando o que decidíssemos investigar. Estude esse código e, depois, adicione essa função ao seu módulo chamado `obo.py` (crie um ficheiro com esse nome e armazene-o no diretório onde deseja trabalhar). Os comentários no código destinam-se a ajudá-lo a decifrar as várias partes. + +``` python +# obo.py +def getSearchResults(query, kwparse, fromYear, fromMonth, toYear, toMonth): + + import urllib.request + + startValue = 0 + + # cada parte do URL. Dividido para facilitar a leitura + url = 'https://www.oldbaileyonline.org/search.jsp?gen=1&form=searchHomePage&_divs_fulltext=' + url += query + url += '&kwparse=' + kwparse + url += '&_divs_div0Type_div1Type=sessionsPaper_trialAccount' + url += '&fromYear=' + fromYear + url += '&fromMonth=' + fromMonth + url += '&toYear=' + toYear + url += '&toMonth=' + toMonth + url += '&start=' + str(startValue) + url += '&count=0' + + # faz o download da página e armazena o resultado + response = urllib.request.urlopen(url) + webContent = response.read().decode('UTF-8') + filename = 'search-result' + f = open(filename + ".html", 'w') + f.write(webContent) + f.close +``` + +Nessa função, separamos os vários componentes da *Query String* e usamos Argumentos de Função para que a função possa ser reutilizada além dos nossos objetivos específicos atuais. Quando chamarmos por essa função, substituiremos os argumentos pelos valores que desejamos buscar. Depois, fazemos o *download* das páginas dos resultados de busca de maneira similiar a como foi feito em [Download de Páginas Web com Python](/pt/licoes/download-paginas-web-python). Agora, crie um novo ficheiro: `download-searches.py` e copie o código a seguir dentro dele. Observe: os valores que passamos como argumentos são exatamente os mesmos dos utilizados no exemplo acima. Sinta-se livre para testá-los para receber resultados diferentes ou ver como funcionam. + +``` python +#download-searches.py +import obo + +query = 'mulatto*+negro*' + +obo.getSearchResults(query, "advanced", "1700", "00", "1750", "99") +``` + +Quando executar esse código, deve encontrar um novo ficheiro: `search-result.html` no seu `diretório programming-historian` contendo a primeira página dos resultados de busca da sua investigação. Certifique-se de que o *download* foi realizado apropriadamente e apague o ficheiro. Vamos adaptar o nosso programa para fazer o *download* da outra página contendo as outras 3 entradas ao mesmo tempo, assim queremos ter certeza que obteremos as duas. Vamos refinar a nossa função `getSearchResults` adicionando outro argumento de função chamado `entries`, de modo a que possamos dizer ao programa quantas páginas de resultados de busca precisamos fazer o *download*. Usaremos o valor das entradas e matemática simples para determinar quantas páginas de resultado de busca existem. Isso é algo bastante direto uma vez que sabemos que há dez transcrições de julgamento listadas por página. Podemos calcular o número de páginas de resultados de busca dividindo o valor das entradas por 10. Armazenaremos esse resultado na variável chamada `pageCount`. Ela se parecerá com isso: + +``` python +# determina quantos ficheiros precisam ser baixados +pageCount = entries / 10 +``` + +No entanto, em casos em que o número de entradas não é um múltiplo de 10, isso resultará num número decimal. Pode testá-lo executando esse código no seu Terminal (Mac & Linux) / Linha de Comandos Python (Windows) e exibindo o valor mantido em `pageCount`. (Observe que, daqui em diante, usaremos a palavra Terminal para referir esse programa). + +``` python +entries = 13 +pageCount = entries / 10 +print(pageCount) +-> 1.3 +``` + +Sabemos que a contagem do número de página deve ser 2 (uma página contendo as entradas 1-10 e uma página contendo as entradas 11-13). Uma vez que sempre queremos o maior inteiro mais próximo, podemos arredondar o resultado da divisão. + +``` python +# determina quantos ficheiros precisam ser baixados +import math +pageCount = entries / 10 +pageCount = math.ceil(pageCount) +``` + +Se adicionarmos isso à nossa função `getSearchResults` abaixo da linha `startValue=0`, agora o código é capaz de calcular o número de páginas cujo *download* precisa de ser realizado. No entanto, nesta etapa ele irá fazer somente o *download* da primeira página, já que informamos à seção de *download* da função para executar somente uma vez. Para corrigir isso, podemos adicionar o código de *download* a um `for` *loop* que fará o *download* uma vez para cada número na variável `pageCount`. Caso ele leia 1, fará o *download* uma vez; caso ele leia 5, fará o *download* cinco vezes e assim por diante. Imediatamente após o `if` *statement* que acabou de escrever, adicione a linha a seguir e indente tudo antes de `f.close` com um espaçamento adicional de modo que tudo fique dentro do `for` *loop*: + +``` python +for pages in range(1, pageCount+1): + print(pages) +``` + +Uma vez que isso é um `for` *loop*, todo o código que desejamos executar repetidamente também precisa de ser planejado. Pode-se certificar de que fez isso corretamente verificando o código finalizado no exemplo abaixo. Esse *loop* aproveita a função [range](https://docs.python.org/3/tutorial/controlflow.html#the-range-function) do Python. Para entender esse `for` *loop* é melhor, provavelmente, pensar em `pageCount` igual a 2 como no exemplo. Portanto, essas duas linhas de código significam: comece a executar com um valor de *loop* inicial 1 e, a cada vez que executar, adicione uma unidade a esse valor. Quando o valor do *loop* é o mesmo de `pageCount`, executa mais uma vez e para. Isso é particularmente valioso porque significa que podemos dizer ao nosso programa para executar exatamente uma vez para cada página de resultados de busca e oferece uma nova habilidade flexível para controlar quantas vezes um `for` *loop* é executado. Caso deseje praticar essa nova e poderosa maneira de escrever *loops*, pode abrir o seu Terminal e brincar. + +``` python +pageCount = 2 +for pages in range(1, pageCount+1): + print(pages) + +-> 1 +-> 2 +``` + +Antes de adicionar todo esse código à nossa função `getSearchResults`, temos que fazer dois ajustes finais. No final do `for` *loop* (mas ainda dentro do *loop*) e depois que o nosso código de *download* for executado, precisamos de mudar nossa variável `startValue`, que é usada na construção da URL da página que desejamos fazer o *download*. Se nos esquecermos de fazer isso, o nosso programa fará repetidamente o *download* da primeira página de resultados de busca, já que não estamos verdadeiramente mudando nada na URL inicial. A variável `startValue`, como discutido acima, é o que controla em que página de resultados de busca desejamos fazer o *download*. Portanto, podemos solicitar a próxima página de resultados de busca incrementando o valor de `startvalue` em 10 unidades depois que o *download* inicial for concluído. Caso não tenha certeza de onde adicionar essa linha, pode espiar adiante o código finalizado no exemplo abaixo. + +Finalmente, queremos garantir que os nomes do ficheiros que fizemos o *download* são diferentes entre si. De outro modo, cada *download* será armazenado em cima do *download* anterior, deixando apenas um único ficheiro de resultados de busca. Para resolver isso, podemos ajustar os conteúdos da variável `filename` para incluir o valor armazenado em `startValue` de modo que a cada vez que fizermos o *download* de uma nova página, ela recebe um nome diferente. Já que a variável `startValue` é um inteiro, precisaremos de convertê-la para uma string antes de adicioná-la à variável `filename`. Ajuste a linha no seu programa que pertence à variável `filename` para ficar assim: + +``` python +filename = 'search-result' + str(startValue) +``` +Agora deve ser capaz de adicionar essas novas linhas de código à sua função `getSearchResults`. Lembre-se de que fizemos as adições a seguir: + +- Adicionar `entries` como um argumento de função adicional logo depois de `toMonth` +- Calcular o número de páginas de resultados de pesquisa e adicionar isso imediatamente após a linha que começa com `startValue = 0` (antes de construirmos a URL e começarmos o *download*) +- Imediatamente após isso, adicione um `for` *loop* que informará ao programa para executar uma vez para cada página de resultados de busca, e indentar o resto do código de modo a que ele esteja dentro do novo *loop* +- A última linha no `for` *loop* deve agora incrementar o valor da variável `startValue` a cada vez que o *loop* é executado +- Ajustar a variável `filename` existente de modo que a cada vez que for feito o *download* de uma página de resultados de busca ela forneça um nome único ao ficheiro. + +A função finalizada no seu ficheiro `obo.py` deve-se parecer com isso: + +``` python +# cria URLs para páginas de resultados de busca e armazena os ficheiros. +def getSearchResults(query, kwparse, fromYear, fromMonth, toYear, toMonth, entries): + + import urllib.request, math + + startValue = 0 + + # isso é novo! determina quantos ficheiros precisam ser baixados. + pageCount = entries / 10 + pageCount = math.ceil(pageCount) + + # essa linha é nova! + for pages in range(1, pageCount +1): + + # cada parte do URL. Dividido para facilitar a leitura. + url = 'https://www.oldbaileyonline.org/search.jsp?gen=1&form=searchHomePage&_divs_fulltext=' + url += query + url += '&kwparse=' + kwparse + url += '&_divs_div0Type_div1Type=sessionsPaper_trialAccount' + url += '&fromYear=' + fromYear + url += '&fromMonth=' + fromMonth + url += '&toYear=' + toYear + url += '&toMonth=' + toMonth + url += '&start=' + str(startValue) + url += '&count=0' + + # faz o download da página e salva o resultado. + response = urllib.request.urlopen(url) + webContent = response.read().decode('UTF-8') + filename = 'search-result' + str(startValue) + f = open(filename + ".html", 'w') + f.write(webContent) + f.close + + # essa linha é nova! + startValue = startValue + 10 +``` + +Para executar essa nova função, adicione o argumento extra ao `download-searches.py` e execute o programa novamente: + +``` python +#download-searches.py +import obo + +query = 'mulatto*+negro*' + +obo.getSearchResults(query, "advanced", "1700", "00", "1750", "99", 13) +``` + +Ótimo! Agora temos as duas páginas de resultados de busca, chamadas `search-result0.html` e `search-result10.html`. Mas antes de seguirmos para o próximo passo do algoritmo, vamos cuidar de algumas "tarefas de organização". O nosso diretório `programming-historian` rapidamente se tornará difícil de controlar se fizermos o *download* de múltiplas páginas de resultados de busca e transcrições de julgamento. Vamos fazer com que o Python crie um novo diretório nomeado a partir dos nossos termos de busca. + +Desejamos adicionar essa nova funcionalidade em `getSearchResults`, de modo que os *downloads* das nossas páginas de resultados de busca sejam direcionadas a diretórios com o mesmo nome da nossa *query* de busca. Isso manterá o nosso diretório `programming-historian` mais organizado. Para fazê-lo, criaremos um novo diretório usando a biblioteca `os`, abreviação de "*operating system*" (sistema operacional). Essa biblioteca contém uma função chamada `makedirs` que, não surpreendentemente, cria um novo diretório. Pode testar usando o Terminal: + + +``` python +import os + +query = "meuNovoDiretório" +if not os.path.exists(query): + os.makedirs(query) +``` + +Esse programa irá verificar se o seu computador já possui um diretório com esse nome. Caso não possua, agora deve possuir um diretório chamado `meuNovoDiretório` no seu computador. Num Mac provavelmente está localizado no seu diretório `/Users/username/`, e no Windows deve ser capaz de encontrá-lo no diretório `Python` no seu computador, o mesmo no qual abriu o programa da linha de comandos. Se isso funcionou, pode deletar o diretório do seu disco rígido, já que isso foi só uma prática. Uma vez que desejamos criar um novo diretório nomeado a partir da *query* que inserimos no *Old Bailey Online website*, vamos usar diretamente esse argumento de função `query` da função `getSearchResults`. Para fazer isso, importe a biblioteca `os` após as outras e, depois, adicione o código que acabou de escrever imediatamente abaixo. A sua função `getSearchResults` deve agora se parecer com isso: + +``` python +# cria URLs para páginas de resultados de busca e armazena os ficheiros. +def getSearchResults(query, kwparse, fromYear, fromMonth, toYear, toMonth, entries): + + import urllib.request, math, os + + # Essa linha é nova! Cria um novo diretório. + if not os.path.exists(query): + os.makedirs(query) + + startValue = 0 + + # Determina quantos ficheiros precisam ser baixados. + pageCount = entries / 10 + pageCount = math.ceil(pageCount) + + for pages in range(1, pageCount +1): + + # cada parte do URL. Dividido para facilitar a leitura. + url = 'https://www.oldbaileyonline.org/search.jsp?gen=1&form=searchHomePage&_divs_fulltext=' + url += query + url += '&kwparse=' + kwparse + url += '&_divs_div0Type_div1Type=sessionsPaper_trialAccount' + url += '&fromYear=' + fromYear + url += '&fromMonth=' + fromMonth + url += '&toYear=' + toYear + url += '&toMonth=' + toMonth + url += '&start=' + str(startValue) + url += '&count=0' + + # faz o download da página e salva o resultado. + response = urllib.request.urlopen(url) + webContent = response.read().decode('UTF-8') + + # armazena o resultado num novo diretório. + filename = 'search-result' + str(startValue) + + f = open(filename + ".html", 'w') + f.write(webContent) + f.close + + startValue = startValue + 10 +``` + +O último passo para essa função é garantir que, quando salvarmos as nossas páginas de resultados de busca, as armazenaremos nesse novo diretório. Para fazer isso, podemos fazer um pequeno ajuste à variável `filename` de modo a que o ficheiro termine no lugar certo. Há muitas formas de o fazer e a mais fácil é simplesmente adicionar o nome do novo diretório mais uma barra no nome do ficheiro: + +``` python +filename = query + '/' + 'search-result' + str(startValue) +``` + +Caso o seu computador esteja executando o Windows, precisará de uma barra invertida em vez da barra do exemplo acima. Adicione a linha acima à sua função `getSearchResults` no lugar da descrição atual do `filename`. + +Se estiver executando o Windows, é provável que o seu programa `downloadSearches.py` falhe quando o executar porque está tentando criar um diretório com um \* nele. O Windows não gosta disso. Para resolver esse problema podemos usar [expressões regulares](https://docs.python.org/3/library/re.html) para remover qualquer caractere não compatível com o Windows. Usamos expressões regulares anteriormente em [Contagem de Frequências de Palavras com Python](/pt/licoes/contar-frequencias-palavras-python). Para remover caracteres não-alfanuméricos da *query*, primeiro importe a biblioteca de expressões regulares imediatamente após importar a biblioteca `os` e, depois, use a função `re.sub()` para criar uma nova string chamada `cleanQuery` que contém apenas caracteres alfanuméricos. Depois precisará de substituir `cleanQuery` como a variável usada nas declarações de `os.path.exists()`, `os.makedirs()` e `filename`. + +``` python +import urllib.request, math, os, re +cleanQuery = re.sub(r'\W+', '', query) +if not os.path.exists(cleanQuery): + os.makedirs(cleanQuery) + +... + +filename = cleanQuery + '/' + 'search-result' + str(startValue) +``` + +A versão final da sua função deve-se parecer com isso: + +``` python +# cria URLs para páginas de resultados de busca e armazena os ficheiros. +def getSearchResults(query, kwparse, fromYear, fromMonth, toYear, toMonth, entries): + + import urllib.request, math, os, re + + cleanQuery = re.sub(r'\W+', '', query) + if not os.path.exists(cleanQuery): + os.makedirs(cleanQuery) + + startValue = 0 + + # Determina quantos ficheiros precisam ser baixados + pageCount = entries / 10 + pageCount = math.ceil(pageCount) + + for pages in range(1, pageCount +1): + + # cada parte do URL. Dividido para facilitar a leitura. + url = 'https://www.oldbaileyonline.org/search.jsp?gen=1&form=searchHomePage&_divs_fulltext=' + url += query + url += '&kwparse=' + kwparse + url += '&_divs_div0Type_div1Type=sessionsPaper_trialAccount' + url += '&fromYear=' + fromYear + url += '&fromMonth=' + fromMonth + url += '&toYear=' + toYear + url += '&toMonth=' + toMonth + url += '&start=' + str(startValue) + url += '&count=0' + + # faz o download da página e salva o resultado. + response = urllib.request.urlopen(url) + webContent = response.read().decode('UTF-8') + filename = cleanQuery + '/' + 'search-result' + str(startValue) + f = open(filename + ".html", 'w') + f.write(webContent) + f.close + + startValue = startValue + 10 +``` + +Dessa vez dizemos ao programa para fazer o *download* dos julgamentos e armazená-los num novo diretório ao invés do nosso diretório `programming-historian`. Execute o programa `download-searches.py` mais uma vez para se certificar de que ele funcionou e que entendeu como armazenar os ficheiros num diretório particular usando Python. + +### Fazendo o *Download* das Entradas de Julgamento Individuais + +A este ponto, criamos uma função que é capaz de fazer o *download* de todos os ficheiros HTML de resultados de busca a partir do website *Old Bailey Online* para uma busca avançada que definimos e desenvolvemos de forma programática. Agora o próximo passo do algoritmo: extrair as URLs de cada transcrição de julgamento dos ficheiros HTML de resultados de busca. Nas lições que precedem esta (ex.: [Download de Páginas Web com Python](/pt/licoes/download-paginas-web-python)), trabalhamos com as versões para exibição das transcrições dos julgamentos e continuaremos a fazer isso. Sabemos que a versão de exibição do julgamento de Benjamin Bowsey está localizada na URL: + +``` +http://www.oldbaileyonline.org/print.jsp?div=t17800628-33 +``` + +Da mesma forma que alterar as *query strings* nas URLs gera resultados de busca diferentes, alterar a URL dos registros de julgamento - no caso, substituir um ID de julgamento por outro - nos fará obter a transcrição para aquele novo julgamento. Isso significa que, para encontrar e fazer o *download* dos 13 ficheiros que buscamos, tudo o que precisamos são esses IDs de julgamento. Uma vez que sabemos que essas páginas de resultados de busca geralmente contém um *link* para as páginas descritas, há uma boa chance de que consigamos encontrar esses *links* integrados ao código HTML. Se formos capazes de raspar essa informação das páginas de resultados de busca em que fizemos *download*, podemos então usar essa informação para gerar uma URL que nos permitirá fazer o *download* de cada transcrição de julgamento. Essa é uma técnica que irá utilizar para a maioria das páginas de resultados de busca, não só o *Old Bailey Online*! Para fazer isso, primeiro precisamos encontrar onde os IDs de julgamento estão no código HTML dos ficheiros que fizemos o *download* e, depois, determinar uma maneira de isolá-los consistentemente usando código de modo a que, independentemente de qual página de resultado de busca fizermos o *download*, sejamos capazes de encontrar as transcrições de julgamento. Primeiro, abra `search-results0.html` no Komodo Edit e dê uma olhada na lista de julgamentos. A primeira entrada começa com "Anne Smith", então pode usar o recurso `find` no Komodo Edit para pular imediatamente para o lugar certo. Observe que o nome de Anne faz parte de um *link*: + +``` +browse.jsp?id=t17160113-18&div=t17160113-18&terms=mulatto*_negro*#highlight +``` + +Perfeito, o *link* contém o ID do julgamento! Percorra as entradas restantes e verá que isso é verdade em todos os casos. Para nossa sorte, o *site* é bem formatado e parece que cada *link* começa com `browse.jsp?id=` seguido pelo ID do julgamento e termina com um `&`, no caso de Anne: `browse.jsp?id=t17160113-18&`. Podemos escrever algumas linhas de código que sejam capazes de isolar esses IDs. Veja a função a seguir. Essa função também usa a biblioteca `os`, nesse caso para listar todos os ficheiros localizados no diretório criado na seção anterior. A biblioteca `os` possui uma gama de funções úteis que imitam os tipos de tarefas que esperaria ser capaz de fazer com o seu mouse no Mac Finder ou Windows, como abrir, fechar, criar, deletar e mover ficheiros e diretórios, e é uma boa biblioteca a ser masterizada - ou pelo menos para se familiarizar. + +``` python +def getIndivTrials(query): + import os, re + + cleanQuery = re.sub(r'\W+', '', query) + searchResults = os.listdir(cleanQuery) + + print(searchResults) +``` + +Crie e execute um novo programa chamado `extract-trials-ids.py` com o código a seguir. Certifique-se de inserir o mesmo valor nos argumentos da *query* como fez no exemplo anterior: + +``` python +import obo + +obo.getIndivTrials("mulatto*+negro*") +``` + +Se tudo correu bem, deve ver uma lista contendo o nome de todos os ficheiros no seu novo diretório `mulatto*+negro*`, que a essa altura devem ser as duas páginas de resultados de busca. Certifique-se de que isso funcionou antes de prosseguir. Uma vez que armazenamos todas as páginas de resultados de busca com um nome de ficheiro que inclui `search-results`, agora desejamos abrir todos os ficheiros cujo nome contenha `search-results` e extrair todos os IDs de julgamento encontrados neles. Nesse caso sabemos que temos 2, mas desejamos que o nosso código seja o mais reutilizável possível (com razão, é claro!). Restringir essa ação a ficheiros denominados `search-results` significará que este programa funcionará como pretendido, mesmo que o diretório contenha muitos outros ficheiros não relacionados, já que o programa ignorará qualquer coisa com nome diferente. + +Adicione o código a seguir à sua função `getIndivTrials()`, que verificará se cada ficheiro contém `search-results` no seu nome. Em caso verdadeiro, o ficheiro será aberto e o conteúdo será salvo na variável chamada `text`. Essa variável `text` será analisada na busca por um ID de julgamento, que sabemos que sempre segue `browse.jsp?id=`. Se e quando o ID de julgamento for encontrado, ele será armazenado numa lista e exibido na Saída de Comando, que nos deixa com todas as informações que precisamos para então escrever o programa que fará o *download* dos julgamentos desejados. + +``` python +def getIndivTrials(query): + import os, re + + cleanQuery = re.sub(r'\W+', '', query) + searchResults = os.listdir(cleanQuery) + + urls = [] + + # encontra as páginas de resultados de busca. + for files in searchResults: + if files.find("search-result") != -1: + f = open(cleanQuery + "/" + files, 'r') + text = f.read().split(" ") + f.close() + + # busca os IDs de julgamento. + for words in text: + if words.find("browse.jsp?id=") != -1: + # isola o ID + urls.append(words[words.find("id=") +3: words.find("&")]) + + print(urls) +``` + +Essa última linha do `for` *loop* pode parecer confusa, mas certifique-se de que entendeu antes de seguir em frente. A variável `words` é verificada para saber se contém os caracteres `id=` (sem aspas), que obviamente se referem a um ID específico de transcrição de julgamento. Caso contenha, usamos o método de string `slice` para capturar apenas o trecho entre `id=` e `&` e o adicionamos à lista de url. Se soubéssemos as posições exatas dos índices dessa substring, poderíamos ter usado esses valores numéricos no lugar. No entanto, ao utilizar o método de string `find()`, criamos um programa muito mais flexível. O código a seguir faz exatamente a mesma coisa que essa última linha, mas de maneira menos condensada: + +``` python +idStart = words.find("id=") + 3 +idEnd = words.find("&") +trialID = words[idStart: idEnd] + +urls.append(trialID) +``` + +Ao executar novamente o programa `extract-trial-ids.py`, deve ver uma lista de todos os IDs de julgamento. Podemos adicionar algumas linhas extra para transformá-los em URLs propriamente ditos e fazer o *download* de toda a lista para o nosso novo diretório. Também vamos usar a biblioteca `time` para pausar o nosso programa por 3 segundos entre cada *download* - uma técnica chamada *throttling* (em português, estrangulamento). É considerada uma boa forma de não sobrecarregar o servidor de alguém com muitas solicitações por segundo; e o pequeno retardamento torna mais fácil que todos esses ficheiros sejam, de fato, baixados ao invés de ocorrer um [time out](https://en.wikipedia.org/wiki/Timeout_(computing)). Adicione o código a seguir ao final da sua função `getIndivTrials()`. Esse código vai gerar uma URL para cada página individualmente, fará o *download* da página no seu computador, irá colocá-lo no seu diretório, armazenar o ficheiro e pausar por 3 segundos antes de continuar para o próximo julgamento. Todo esse trabalho está contido num `for` *loop* e será executado uma vez para cada julgamento na sua lista de urls. + + +``` python +def getIndivTrials(query): + #... + import urllib.request, time + + # importa funções python built-in para criar caminhos de ficheiro. + from os.path import join as pjoin + + for items in urls: + # gera a URL. + url = "http://www.oldbaileyonline.org/print.jsp?div=" + items + + # faz o download da página. + response = urllib.request.urlopen(url) + webContent = response.read().decode('UTF-8') + + # cria o nome do ficheiro e coloca-o no novo diretório. + filename = items + '.html' + filePath = pjoin(cleanQuery, filename) + + # armazena o ficheiro. + f = open(filePath, 'w') + f.write(webContent) + f.close + + # pausa por 3 segundos. + time.sleep(3) +``` + +Se unirmos tudo numa única função, ela deve-se parecer com isso (note que adicionamos todas as chamadas por `import` no início para manter as coisas claras): + +``` python +def getIndivTrials(query): + import os, re, urllib.request, time + + # importa funções python built-in para criar caminhos de ficheiro. + from os.path import join as pjoin + + cleanQuery = re.sub(r'\W+', '', query) + searchResults = os.listdir(cleanQuery) + + urls = [] + + # encontra páginas de resultados de busca. + for files in searchResults: + if files.find("search-result") != -1: + f = open(cleanQuery + "/" + files, 'r') + text = f.read().split(" ") + f.close() + + # busca por IDs de julgamento. + for words in text: + if words.find("browse.jsp?id=") != -1: + # isola o id + urls.append(words[words.find("id=") +3: words.find("&")]) + + # novo daqui em diante! + for items in urls: + # gera o URL + url = "http://www.oldbaileyonline.org/print.jsp?div=" + items + + # faz o download da página. + response = urllib.request.urlopen(url) + webContent = response.read().decode('UTF-8') + + # cria o nome do ficheiro e coloca-o no novo diretório. + filename = items + '.html' + filePath = pjoin(cleanQuery, filename) + + # armazena o ficheiro. + f = open(filePath, 'w') + f.write(webContent) + f.close + + # pausa por 3 segundos. + time.sleep(3) +``` + +Vamos adicionar a mesma pausa de três segundos à nossa função `getSearchResults` para ser amigável aos *servers* do *Old Bailey Online*: + +``` python +# cria URLs para páginas de resultados de busca e armazena os ficheiros. +def getSearchResults(query, kwparse, fromYear, fromMonth, toYear, toMonth, entries): + + import urllib.request, math, os, re, time + + cleanQuery = re.sub(r'\W+', '', query) + if not os.path.exists(cleanQuery): + os.makedirs(cleanQuery) + + startValue = 0 + + # Determina quantos ficheiros precisam de ser baixados. + pageCount = entries / 10 + pageCount = math.ceil(pageCount) + + for pages in range(1, pageCount +1): + + # cada parte da URL. Dividida para facilitar a leitura. + url = 'https://www.oldbaileyonline.org/search.jsp?gen=1&form=searchHomePage&_divs_fulltext=' + url += query + url += '&kwparse=' + kwparse + url += '&_divs_div0Type_div1Type=sessionsPaper_trialAccount' + url += '&fromYear=' + fromYear + url += '&fromMonth=' + fromMonth + url += '&toYear=' + toYear + url += '&toMonth=' + toMonth + url += '&start=' + str(startValue) + url += '&count=0' + + # faz o download da página e armazena o resultado. + response = urllib.request.urlopen(url) + webContent = response.read().decode('UTF-8') + filename = cleanQuery + '/' + 'search-result' + str(startValue) + f = open(filename + ".html", 'w') + f.write(webContent) + f.close + + startValue = startValue + 10 + + # pausa por 3 segundos. + time.sleep(3) +``` + +Finalmente, chame a função no programa `download-searches.py`: + +``` python +#download-searches.py +import obo + +query = 'mulatto*+negro*' + +obo.getSearchResults(query, "advanced", "1700", "00", "1750", "99", 13) + +obo.getIndivTrials(query) +``` + +Agora criou um programa que é capaz de fazer a solicitação e o *download* de ficheiros do *Old Bailey website*, baseado em parâmetros de busca que definiu, tudo sem visitar o *site*! + +### No Caso de um Ficheiro Não Ser Baixado + +Verifique se o *download* dos treze ficheiros foi realizado corretamente. Se esse for o caso, ótimo! No entanto, há a possibilidade de que esse programa tenha parado no meio do caminho. Isso porque o nosso programa, ao ser executado na nossa máquina, depende de dois fatores além do nosso controle imediato: a velocidade da internet e a o tempo de resposta do *server* do *Old Bailey Online* naquele momento. Uma coisa é pedir que o Python faça o *download* de um único ficheiro, mas quando começamos a solicitar um ficheiro a cada três segundos, há grandes chances de ocorrer um *time out* no *server* ou que ele falhe em nos enviar o ficheiro que estamos buscando. + +Se estivermos usando um navegador *web* para fazer essas solicitações, eventualmente receberíamos uma mensagem de que "a conexão expirou" ou algo do tipo. Todos nós vemos isso de tempos em tempos. No entanto, o nosso programa não foi desenvolvido para lidar ou retransmitir essas mensagens de erro, então só perceberá o problema quando o programa não tiver retornado o número esperado de ficheiros ou simplesmente não fizer nada. Para evitar frustrações e incertezas, queremos um sistema à prova de falha no nosso programa, que tentará baixar cada julgamento. Se por alguma razão ele falhar, apontaremos o problema e passaremos para o próximo julgamento. + +Para fazer isso, utilizaremos os mecanismos para lidar com erros do Python, [try / except](https://docs.python.org/tutorial/errors.html), bem como uma nova biblioteca: `socket`. `Try` e `Except` são muito parecidos com um `if / else` *statement*. Quando solicita que o Python `try` (em português, tente) algo, ele tentará executar o código; caso o código falhe em alcançar o que definiu, ele executará o código em `except` (em português, exceção). Isso é frequentemente usado ao lidar com erros, conhecido como “error handling”. Podemos usá-lo a nosso favor dizendo ao programa para tentar fazer o *download* de uma página. Caso o programa falhe, solicitaremos que ele nos informe qual ficheiro falhou e depois prossiga. Para fazer isso precisamos de usar a biblioteca `socket`, que nos permitirá definir um limite de tempo para um *download* antes de seguir em frente. Isso envolve alterar a função `getIndivTrials`. + +Primeiro, precisamos de carregar a biblioteca `socket`, o que deve ser feito da mesma forma que todos as outras importações de biblioteca. Depois, precisamos de importar a biblioteca `urllib.error`, que nos permite lidar com erros de *download*. Também precisamos de definir o tamanho do *timeout* padrão do *socket* - por quanto tempo desejamos tentar fazer o *download* de uma página antes de desistirmos. Isso deve entrar imediatamente após o comentário que começa com `# faz o download da página`: + + +``` python + import os, re, urllib.request, urllib.error, time, socket + + #... + # faz o download da página. + socket.setdefaulttimeout(10) +``` + +Então, precisamos de uma nova lista de Python que armazenará todas as urls cujo *download* falhou. Vamos chamá-la de `failedAttempts` e pode inserí-la imediatamente após as instruções de importação: + + +``` python +failedAttempts = [] +``` + +Finalmente, podemos adicionar o `try / except` *statement* de forma muito similar a como um `if / else` *statement* seria adicionado. Nesse caso, vamos colocar todo o código desenvolvido para fazer o *download* e armazenar os julgamentos no `try` *statement*, e no `except` *statement* vamos dizer ao programa o que desejamos que ele faça caso falhe. Aqui, vamos adicionar a url cujo *download* falhou à nossa nova lista, `failedAttempts`: + +``` python +#... + + socket.setdefaulttimeout(10) + + try: + response = urllib2.urlopen(url) + webContent = response.read().decode('UTF-8') + + # cria o nome de ficheiro e coloca-o no novo diretório "trials". + filename = items + '.html' + filePath = pjoin(newDir, filename) + + # armazena o ficheiro. + f = open(filePath, 'w') + f.write(webContent) + f.close + except urllib.error.URLError: + failedAttempts.append(url) +``` + +Finalmente, diremos ao programa para exibir os conteúdos da lista na Saída de Comando de modo que saibamos quais ficheiros falharam no *download*. Isso deve ser adicionado nas linhas finais da função: + +``` python +print("failed to download: " + str(failedAttempts)) +``` + +Agora ao executarmos o programa, caso haja algum problema no *download* de um ficheiro específico, receberá uma mensagem na janela de Saída de Comando do Komodo Edit. Essa mensagem irá conter quaisquer URLs dos ficheiros que falharam no *download*. Caso haja apenas um ou dois, provavelmente é mais fácil simplesmente visitar as páginas manualmente e usar o recurso de "Salvar Como" do seu navegador. Caso se esteja sentindo aventureiro, poderia modificar o programa para automaticamente fazer o *download* dos ficheiros faltantes. A versão final das suas funções `getSearchResults()` e `getIndivTrials()` deve-se parecer com isso: + +``` python +# cria URLs para páginas de resultados de busca e armazena os ficheiros. +def getSearchResults(query, kwparse, fromYear, fromMonth, toYear, toMonth, entries): + + import urllib.request, math, os, re, time + + cleanQuery = re.sub(r'\W+', '', query) + if not os.path.exists(cleanQuery): + os.makedirs(cleanQuery) + + startValue = 0 + + # determina quantos ficheiros precisam de ser baixados. + pageCount = entries / 10 + pageCount = math.ceil(pageCount) + + for pages in range(1, pageCount +1): + + # cada parte da URL. Dividida para facilitar a leitura. + url = 'https://www.oldbaileyonline.org/search.jsp?gen=1&form=searchHomePage&_divs_fulltext=' + url += query + url += '&kwparse=' + kwparse + url += '&_divs_div0Type_div1Type=sessionsPaper_trialAccount' + url += '&fromYear=' + fromYear + url += '&fromMonth=' + fromMonth + url += '&toYear=' + toYear + url += '&toMonth=' + toMonth + url += '&start=' + str(startValue) + url += '&count=0' + + # faz o download da página e salva o resultado. + response = urllib.request.urlopen(url) + webContent = response.read().decode('UTF-8') + filename = cleanQuery + '/' + 'search-result' + str(startValue) + f = open(filename + ".html", 'w') + f.write(webContent) + f.close + + startValue = startValue + 10 + + # pausa por 3 segundos. + time.sleep(3) + +def getIndivTrials(query): + import os, re, urllib.request, urllib.error, time, socket + + failedAttempts = [] + + # importa funções python built-in para criar caminhos de ficheiro. + from os.path import join as pjoin + + cleanQuery = re.sub(r'\W+', '', query) + searchResults = os.listdir(cleanQuery) + + urls = [] + + # encontra páginas de resultados de busca. + for files in searchResults: + if files.find("search-result") != -1: + f = open(cleanQuery + "/" + files, 'r') + text = f.read().split(" ") + f.close() + + # busca por IDs de julgamento. + for words in text: + if words.find("browse.jsp?id=") != -1: + #isolate the id + urls.append(words[words.find("id=") +3: words.find("&")]) + + for items in urls: + # gera a URL. + url = "http://www.oldbaileyonline.org/print.jsp?div=" + items + + # faz o download da página. + socket.setdefaulttimeout(10) + try: + response = urllib.request.urlopen(url) + webContent = response.read().decode('UTF-8') + + # cria o nome do ficheiro e coloca-o no novo diretório. + filename = items + '.html' + filePath = pjoin(cleanQuery, filename) + + # armazena o ficheiro. + f = open(filePath, 'w') + f.write(webContent) + f.close + except urllib.error.URLError: + failedAttempts.append(url) + + # pausa por 3 segundos. + time.sleep(3) + + print("failed to download: " + str(failedAttempts)) +``` + +## Leituras Adicionais + +Para usuários mais avançados, ou para se tornar um usuário mais avançado, pode achar que vale a pena ler sobre como alcançar esse mesmo processo usando Interfaces de Programação de Aplicações (API). Geralmente, um *website* com uma API dá instruções de como solicitar certos documentos. É um processo bastante similar ao que acabamos de fazer interpretando a *Query String* de URL, mas sem o trabalho de investigação adicional necessário para decifrar o que cada variável faz. Caso esteja interessado no *Old Bailey Online*, recentemente liberaram uma API e a documentação pode ajudar bastante: + +- Old Bailey Online API () +- Melhor maneira de criar um diretório para gravação de ficheiros, se ele não existir, usando Python? () diff --git a/pt/licoes/download-paginas-web-python.md b/pt/licoes/download-paginas-web-python.md index 14405f5a58..a365033b8f 100644 --- a/pt/licoes/download-paginas-web-python.md +++ b/pt/licoes/download-paginas-web-python.md @@ -1,168 +1,168 @@ ---- -title: Download de páginas Web com Python -layout: lesson -slug: download-paginas-web-python -date: 2012-07-17 -translation_date: 2021-03-26 -authors: -- William J. Turkel -- Adam Crymble -reviewers: -- Jim Clifford -- Frederik Elwert -editors: -- Miriam Posner -translator: -- Bruno Gasparotto Ponne -translation-editor: -- Josir Cardoso Gomes -translation-reviewer: -- Felipe Lamarca -- Daniel Alves -difficulty: 2 -review-ticket: https://github.com/programminghistorian/ph-submissions/issues/360 -activity: acquiring -topics: [python] -abstract: "Esta lição apresenta o conceito de *Localizador Uniforme de Recursos* (URL em inglês) e explica como usar o Python para fazer o download de uma página *Web* no seu disco local." -original: working-with-web-pages -avatar_alt: Um homem alto ao lado de uma mulher baixa -doi: 10.46430/phpt0010 ---- - - -{% include toc.html %} - -
    -O site do Old Bailey Online foi recentemente atualizado. Infelizmente, devido às diversas mudanças, muitos (se não todos) os elementos do site de exemplo usado nesta lição não funcionarão conforme descrito. No entanto, as metodologias ensinadas por esta lição permanecem relevantes e podem ser adaptadas pelos leitores para um site de exemplo diferente. Estamos trabalhando na adaptação da lição para o novo site do Old Bailey Online, mas ainda não temos cronograma preciso de quando a lição será atualizada. [Abril de 2024] -
    - -### Objetivos da Lição - -Esta lição apresenta o conceito de *Localizador Uniforme de Recursos* (URL em inglês) e explica como usar o Python para fazer o download de uma página *Web* no seu disco local. - -### Sobre URLs - -Uma página *Web* é um ficheiro hospedado noutro computador, conhecido como *servidor*. Quando um site é acessado, na realidade, o seu computador (o *cliente*) envia um pedido ao *servidor de hospedagem* por meio da rede e o servidor responde enviando uma cópia da página ao seu computador. Uma forma de acessar uma página por meio do seu navegador é seguir um link. É possível também colar ou digitar uma URL (localizador uniforme de recursos) diretamente no seu navegador. A URL informa ao seu navegador onde encontrar um recurso online, especificando o servidor, o diretório e o nome do ficheiro a ser recuperado, bem como o tipo de *protocolo* que o servidor e o seu navegador utilizarão para troca de informações (como o HTTP, *protocolo de transferência de hipertexto*). A estrutura básica de uma URL é - -``` -protocol://host:port/path?query -``` - -Vejamos alguns exemplos: - -``` xml -http://oldbaileyonline.org -``` - -O tipo mais básico de URL especifica apenas o protocolo e o domínio. Quando inserido em seu navegador, essa URL retornará a página principal do site [Old Bailey Online](https://www.oldbaileyonline.org). O pressuposto convencional é que a página principal num determinado diretório se chamará *index*, geralmente `index.html`. - -A URL pode incluir também um *número de porta* opcional. Sem entrar em muitos detalhes, o protocolo de rede em que se baseia a troca de informações na Internet permite que computadores se conectem de diferentes maneiras. Números de portas são utilizados para distinguir esses diferentes tipos de conexão. Uma vez que a porta padrão para HTTP é a 80, a seguinte URL é equivalente à anterior. - -``` xml -http://oldbaileyonline.org:80 -``` - -Geralmente há diversas páginas *Web* num determinado site. Essas páginas são armazenadas em diretórios no servidor e é possível especificar o caminho para uma página em particular. A página "About" para o site *The Old Bailey Online* tem a seguinte URL: - -``` xml -http://oldbaileyonline.org/static/Project.jsp -``` - -Por fim, algumas páginas permitem inserir *queries*, termo em inglês que significa pedido, solicitação. O site *The Old Bailey Online*, por exemplo, foi desenvolvido de forma que é possível requisitar uma de suas páginas utilizando uma *query string* (conjunto de caracteres que contém uma solicitação). A seguinte URL acessará uma página de resultado de buscas por registros de julgamentos criminais contendo a palavra "arsenic". - -``` xml -https://www.oldbaileyonline.org/search.jsp?form=searchHomePage&_divs_fulltext=arsenic&kwparse=and&_persNames_surname=&_persNames_given=&_persNames_alias=&_offences_offenceCategory_offenceSubcategory=&_verdicts_verdictCategory_verdictSubcategory=&_punishments_punishmentCategory_punishmentSubcategory=&_divs_div0Type_div1Type=&fromMonth=&fromYear=&toMonth=&toYear=&ref=&submit.x=0&submit.y=0 -``` - -O fragmento a seguir ao sinal "?" representa a *query*. Aprenda mais sobre como criar *queries* na lição [Downloading Multiple Records Using Query Strings](/en/lessons/downloading-multiple-records-using-query-strings) (em inglês). - -### Acessando URLs com Python - -Como um historiador da era digital, você frenquentemente desejará utilizar dados mantidos em sites acadêmicos. Para acessar esses dados, seria possível abrir as URLs uma por uma e copiar e colar os conteúdos num ficheiro de texto. Alternativamente, é possível utilizar Python para, automaticamente, coletar e processar os dados. Para isso, é preciso aprender como abrir uma URL por meio do seu próprio código. A linguagem Python inclui uma série de padrões para fazer isso. - -Como exemplo, vamos trabalhar com o tipo de documento que provavelmente você vai encontrar ao realizar uma pesquisa na área de História. Suponhamos que haja interesse nas relações raciais na Inglaterra do século XVIII. O site *The Old Bailey Online* é uma fonte rica de informações históricas e disponibiliza transcrições de julgamentos que ocorreram entre 1674 e 1913. - -{% include figure.html filename="old-bailey.png" caption="A homepage do site The Old Bailey Online" %} - -Para esse exemplo, utilizaremos a transcrição do julgamento de Benjamin Bowsey, um negro condenado por perturbar a paz durante os protestos de Gordon em 1780. A URL para o registro é - -``` xml -http://www.oldbaileyonline.org/browse.jsp?id=t17800628-33&div=t17800628-33 -``` - -Estudando a URL, podemos verificar algumas coisas. Primeiro, o site é programado em JSP (*JavaServer Pages*, uma linguagem de programação para a *web* cujo resultado é um ficheiro HTML). Segundo, é possível acessar registros de julgamentos individuais fazendo uso de *query strings*. Cada registro recebe um número único (*id=t* na URL), formado a partir da data da sessão de julgamento no formato (*AAAAMMDD*) e o número do julgamento naquela sessão do tribunal. Neste caso, *33*. Caso as duas ocorrências de `33` sejam trocadas por `34` no link acima, o seu navegador o encaminhará ao próximo julgamento. Infelizmente, nem todos os sites possuem URLs tão acessíveis e confiáveis quanto essa. - -{% include figure.html filename="bowsey-trial-page.png" caption="Transcrição do julgamento de Benjamin Bowsey, 1780" %} - -Observe a página do julgamento de Benjamin Bowsey. Mais importante do que o conteúdo são os elementos presentes na página. Note o link [View as XML](http://www.oldbaileyonline.org/browse.jsp?foo=bar&path=sessionsPapers/17800628.xml&div=t17800628-33&xml=yes) na parte inferior. Esse link apresenta uma versão repleta de marcações no texto que podem ser úteis para certos tipos de pesquisa. O [documento original digitalizado](http://www.oldbaileyonline.org/images.jsp?doc=178006280084) do julgamento também pode ser acessado. - -Agora vamos tentar abrir a página utilizando Python. Copie o seguinte programa no *Komodo Edit* e salve o ficheiro como `open-webpage.py`. Quando executar o programa, a página do julgamento será acessada, seus conteúdos serão lidos e copiados numa string chamada `webContent`. Na sequência, os primeiros 300 caracteres serão exibidos no *painel de saída de comandos*. Utilize `Ferramentas -> Ferramentas do Navegador -> Fonte da página` no navegador Firefox para verificar que o código HTML da página é o mesmo que o seu programa acessou. Outros navegadores podem ter caminhos distintos para acessar o código fonte. Caso não consiga encontrar o caminho no seu navegador, tente utilizar um mecanismo de busca para encontrá-lo. (Consulte a biblioteca de referência do Python para aprender mais sobre [urllib](https://docs.python.org/3/library/urllib.html?highlight=urllib).) - -``` python -# open-webpage.py - -import urllib.request, urllib.error, urllib.parse - -url = 'http://www.oldbaileyonline.org/browse.jsp?id=t17800628-33&div=t17800628-33' - -response = urllib.request.urlopen(url) -webContent = response.read().decode('UTF-8') - -print(webContent[0:300]) -``` - -Utilizando apenas essas cinco linhas de código, é possível obter resultados substanciais. Agora, vamos nos assegurar de que cada linha de código está clara e que é possível distinguir os blocos que permitem ao programa realizar a tarefa que desejamos. - -*url*, *response* e *webContent* são todas variáveis nomeadas por nós. - -*url* contém a URL da página que queremos baixar. Neste exemplo, trata-se do julgamento de Benjamin Bowsey. - -Na linha seguinte, chamamos a função `urlopen`, contida no módulo do Python chamado `urllib.py`, e solicitamos que ela acesse o site especificado na variável *url*. Em seguida, salvamos o resultado desse processo numa variável chamada *response*. Essa variável contém agora uma versão aberta do site solicitado. - -No próximo passo, utilizamos o método `read`, que já utilizamos anteriormente, para copiar os conteúdos do site numa nova variável chamada *webContent*. - -Assegure-se de ser capaz de identificar as variáveis (3), o módulo (1), os métodos (2) e os parâmetros (1) antes de prosseguir. - -No resultado do código acima, alguns marcadores da linguagem HTML poderão ser identificados: - -``` xml - - - - Browse - Central Criminal Court - Open File` no Firefox, abra o ficheiro criado no seu disco local (`obo-t17800628-33.html`) para confirmar que a cópia salva é a mesma que a online. - -``` python -# save-webpage.py - -import urllib.request, urllib.error, urllib.parse - -url = 'http://www.oldbaileyonline.org/browse.jsp?id=t17800628-33&div=t17800628-33' - -response = urllib.request.urlopen(url) -webContent = response.read().decode('UTF-8') - -f = open('obo-t17800628-33.html', 'w') -f.write(webContent) -f.close -``` - -Se é possível salvar um único ficheiro dessa maneira, seria possível escrever um programa para baixar um conjunto de ficheiros? Por exemplo, seria possível percorrer os identificadores de um conjunto de páginas e copiá-las para o seu computador? Sim. Aprenda como na lição [Downloading Multiple Files using Query Strings](/en/lessons/downloading-multiple-records-using-query-strings) (em inglês), que recomendamos depois que tenha terminado as lições introdutórias dessa série. - -### Leitura Sugerida - -- Mitchell, Ryan. “Web Scraping com Python: Coletando Mais Dados da Web Moderna" (O’Reilly, 2019). - -### Sincronização do Código - -Para acompanhar futuras lições, é importante ter os ficheiros e programas corretos no seu diretório “programming-historian”. Ao final de cada lição, é possível baixar o ficheiro zip “programming-historian” para ter certeza de que o ficheiro correto está sendo utilizado. - -- programming-historian-1 ([zip](/assets/python-lessons1.zip)) - +--- +title: Download de páginas Web com Python +layout: lesson +slug: download-paginas-web-python +date: 2012-07-17 +translation_date: 2021-03-26 +authors: +- William J. Turkel +- Adam Crymble +reviewers: +- Jim Clifford +- Frederik Elwert +editors: +- Miriam Posner +translator: +- Bruno Gasparotto Ponne +translation-editor: +- Josir Cardoso Gomes +translation-reviewer: +- Felipe Lamarca +- Daniel Alves +difficulty: 2 +review-ticket: https://github.com/programminghistorian/ph-submissions/issues/360 +activity: acquiring +topics: [python] +abstract: "Esta lição apresenta o conceito de *Localizador Uniforme de Recursos* (URL em inglês) e explica como usar o Python para fazer o download de uma página *Web* no seu disco local." +original: working-with-web-pages +avatar_alt: Um homem alto ao lado de uma mulher baixa +doi: 10.46430/phpt0010 +--- + + +{% include toc.html %} + +
    +O site do Old Bailey Online foi recentemente atualizado. Infelizmente, devido às diversas mudanças, muitos (se não todos) os elementos do site de exemplo usado nesta lição não funcionarão conforme descrito. No entanto, as metodologias ensinadas por esta lição permanecem relevantes e podem ser adaptadas pelos leitores para um site de exemplo diferente. Estamos trabalhando na adaptação da lição para o novo site do Old Bailey Online, mas ainda não temos cronograma preciso de quando a lição será atualizada. [Abril de 2024] +
    + +### Objetivos da Lição + +Esta lição apresenta o conceito de *Localizador Uniforme de Recursos* (URL em inglês) e explica como usar o Python para fazer o download de uma página *Web* no seu disco local. + +### Sobre URLs + +Uma página *Web* é um ficheiro hospedado noutro computador, conhecido como *servidor*. Quando um site é acessado, na realidade, o seu computador (o *cliente*) envia um pedido ao *servidor de hospedagem* por meio da rede e o servidor responde enviando uma cópia da página ao seu computador. Uma forma de acessar uma página por meio do seu navegador é seguir um link. É possível também colar ou digitar uma URL (localizador uniforme de recursos) diretamente no seu navegador. A URL informa ao seu navegador onde encontrar um recurso online, especificando o servidor, o diretório e o nome do ficheiro a ser recuperado, bem como o tipo de *protocolo* que o servidor e o seu navegador utilizarão para troca de informações (como o HTTP, *protocolo de transferência de hipertexto*). A estrutura básica de uma URL é + +``` +protocol://host:port/path?query +``` + +Vejamos alguns exemplos: + +``` xml +http://oldbaileyonline.org +``` + +O tipo mais básico de URL especifica apenas o protocolo e o domínio. Quando inserido em seu navegador, essa URL retornará a página principal do site [Old Bailey Online](https://www.oldbaileyonline.org). O pressuposto convencional é que a página principal num determinado diretório se chamará *index*, geralmente `index.html`. + +A URL pode incluir também um *número de porta* opcional. Sem entrar em muitos detalhes, o protocolo de rede em que se baseia a troca de informações na Internet permite que computadores se conectem de diferentes maneiras. Números de portas são utilizados para distinguir esses diferentes tipos de conexão. Uma vez que a porta padrão para HTTP é a 80, a seguinte URL é equivalente à anterior. + +``` xml +http://oldbaileyonline.org:80 +``` + +Geralmente há diversas páginas *Web* num determinado site. Essas páginas são armazenadas em diretórios no servidor e é possível especificar o caminho para uma página em particular. A página "About" para o site *The Old Bailey Online* tem a seguinte URL: + +``` xml +http://oldbaileyonline.org/static/Project.jsp +``` + +Por fim, algumas páginas permitem inserir *queries*, termo em inglês que significa pedido, solicitação. O site *The Old Bailey Online*, por exemplo, foi desenvolvido de forma que é possível requisitar uma de suas páginas utilizando uma *query string* (conjunto de caracteres que contém uma solicitação). A seguinte URL acessará uma página de resultado de buscas por registros de julgamentos criminais contendo a palavra "arsenic". + +``` xml +https://www.oldbaileyonline.org/search.jsp?form=searchHomePage&_divs_fulltext=arsenic&kwparse=and&_persNames_surname=&_persNames_given=&_persNames_alias=&_offences_offenceCategory_offenceSubcategory=&_verdicts_verdictCategory_verdictSubcategory=&_punishments_punishmentCategory_punishmentSubcategory=&_divs_div0Type_div1Type=&fromMonth=&fromYear=&toMonth=&toYear=&ref=&submit.x=0&submit.y=0 +``` + +O fragmento a seguir ao sinal "?" representa a *query*. Aprenda mais sobre como criar *queries* na lição [Downloading Multiple Records Using Query Strings](/en/lessons/downloading-multiple-records-using-query-strings) (em inglês). + +### Acessando URLs com Python + +Como um historiador da era digital, você frenquentemente desejará utilizar dados mantidos em sites acadêmicos. Para acessar esses dados, seria possível abrir as URLs uma por uma e copiar e colar os conteúdos num ficheiro de texto. Alternativamente, é possível utilizar Python para, automaticamente, coletar e processar os dados. Para isso, é preciso aprender como abrir uma URL por meio do seu próprio código. A linguagem Python inclui uma série de padrões para fazer isso. + +Como exemplo, vamos trabalhar com o tipo de documento que provavelmente você vai encontrar ao realizar uma pesquisa na área de História. Suponhamos que haja interesse nas relações raciais na Inglaterra do século XVIII. O site *The Old Bailey Online* é uma fonte rica de informações históricas e disponibiliza transcrições de julgamentos que ocorreram entre 1674 e 1913. + +{% include figure.html filename="old-bailey.png" caption="A homepage do site The Old Bailey Online" %} + +Para esse exemplo, utilizaremos a transcrição do julgamento de Benjamin Bowsey, um negro condenado por perturbar a paz durante os protestos de Gordon em 1780. A URL para o registro é + +``` xml +http://www.oldbaileyonline.org/browse.jsp?id=t17800628-33&div=t17800628-33 +``` + +Estudando a URL, podemos verificar algumas coisas. Primeiro, o site é programado em JSP (*JavaServer Pages*, uma linguagem de programação para a *web* cujo resultado é um ficheiro HTML). Segundo, é possível acessar registros de julgamentos individuais fazendo uso de *query strings*. Cada registro recebe um número único (*id=t* na URL), formado a partir da data da sessão de julgamento no formato (*AAAAMMDD*) e o número do julgamento naquela sessão do tribunal. Neste caso, *33*. Caso as duas ocorrências de `33` sejam trocadas por `34` no link acima, o seu navegador o encaminhará ao próximo julgamento. Infelizmente, nem todos os sites possuem URLs tão acessíveis e confiáveis quanto essa. + +{% include figure.html filename="bowsey-trial-page.png" caption="Transcrição do julgamento de Benjamin Bowsey, 1780" %} + +Observe a página do julgamento de Benjamin Bowsey. Mais importante do que o conteúdo são os elementos presentes na página. Note o link [View as XML](https://www.oldbaileyonline.org/browse.jsp?foo=bar&path=sessionsPapers/17800628.xml&div=t17800628-33&xml=yes) na parte inferior. Esse link apresenta uma versão repleta de marcações no texto que podem ser úteis para certos tipos de pesquisa. O [documento original digitalizado](https://www.oldbaileyonline.org/images.jsp?doc=178006280084) do julgamento também pode ser acessado. + +Agora vamos tentar abrir a página utilizando Python. Copie o seguinte programa no *Komodo Edit* e salve o ficheiro como `open-webpage.py`. Quando executar o programa, a página do julgamento será acessada, seus conteúdos serão lidos e copiados numa string chamada `webContent`. Na sequência, os primeiros 300 caracteres serão exibidos no *painel de saída de comandos*. Utilize `Ferramentas -> Ferramentas do Navegador -> Fonte da página` no navegador Firefox para verificar que o código HTML da página é o mesmo que o seu programa acessou. Outros navegadores podem ter caminhos distintos para acessar o código fonte. Caso não consiga encontrar o caminho no seu navegador, tente utilizar um mecanismo de busca para encontrá-lo. (Consulte a biblioteca de referência do Python para aprender mais sobre [urllib](https://docs.python.org/3/library/urllib.html?highlight=urllib).) + +``` python +# open-webpage.py + +import urllib.request, urllib.error, urllib.parse + +url = 'http://www.oldbaileyonline.org/browse.jsp?id=t17800628-33&div=t17800628-33' + +response = urllib.request.urlopen(url) +webContent = response.read().decode('UTF-8') + +print(webContent[0:300]) +``` + +Utilizando apenas essas cinco linhas de código, é possível obter resultados substanciais. Agora, vamos nos assegurar de que cada linha de código está clara e que é possível distinguir os blocos que permitem ao programa realizar a tarefa que desejamos. + +*url*, *response* e *webContent* são todas variáveis nomeadas por nós. + +*url* contém a URL da página que queremos baixar. Neste exemplo, trata-se do julgamento de Benjamin Bowsey. + +Na linha seguinte, chamamos a função `urlopen`, contida no módulo do Python chamado `urllib.py`, e solicitamos que ela acesse o site especificado na variável *url*. Em seguida, salvamos o resultado desse processo numa variável chamada *response*. Essa variável contém agora uma versão aberta do site solicitado. + +No próximo passo, utilizamos o método `read`, que já utilizamos anteriormente, para copiar os conteúdos do site numa nova variável chamada *webContent*. + +Assegure-se de ser capaz de identificar as variáveis (3), o módulo (1), os métodos (2) e os parâmetros (1) antes de prosseguir. + +No resultado do código acima, alguns marcadores da linguagem HTML poderão ser identificados: + +``` xml + + + + Browse - Central Criminal Court + Open File` no Firefox, abra o ficheiro criado no seu disco local (`obo-t17800628-33.html`) para confirmar que a cópia salva é a mesma que a online. + +``` python +# save-webpage.py + +import urllib.request, urllib.error, urllib.parse + +url = 'http://www.oldbaileyonline.org/browse.jsp?id=t17800628-33&div=t17800628-33' + +response = urllib.request.urlopen(url) +webContent = response.read().decode('UTF-8') + +f = open('obo-t17800628-33.html', 'w') +f.write(webContent) +f.close +``` + +Se é possível salvar um único ficheiro dessa maneira, seria possível escrever um programa para baixar um conjunto de ficheiros? Por exemplo, seria possível percorrer os identificadores de um conjunto de páginas e copiá-las para o seu computador? Sim. Aprenda como na lição [Downloading Multiple Files using Query Strings](/en/lessons/downloading-multiple-records-using-query-strings) (em inglês), que recomendamos depois que tenha terminado as lições introdutórias dessa série. + +### Leitura Sugerida + +- Mitchell, Ryan. “Web Scraping com Python: Coletando Mais Dados da Web Moderna" (O’Reilly, 2019). + +### Sincronização do Código + +Para acompanhar futuras lições, é importante ter os ficheiros e programas corretos no seu diretório “programming-historian”. Ao final de cada lição, é possível baixar o ficheiro zip “programming-historian” para ter certeza de que o ficheiro correto está sendo utilizado. + +- programming-historian-1 ([zip](/assets/python-lessons1.zip)) + diff --git a/pt/licoes/explorar-analisar-dados-rede-python.md b/pt/licoes/explorar-analisar-dados-rede-python.md index 1e7bd4d1ce..3d749cde33 100644 --- a/pt/licoes/explorar-analisar-dados-rede-python.md +++ b/pt/licoes/explorar-analisar-dados-rede-python.md @@ -1,603 +1,603 @@ ---- -title: "Explorar e Analisar Dados de Rede com Python" -slug: explorar-analisar-dados-rede-python -original: exploring-and-analyzing-network-data-with-python -layout: lesson -collection: lessons -date: 2017-06-16 -translation_date: 2023-05-12 -authors: -- John R. Ladd -- Jessica Otis -- Christopher N. Warren -- Scott Weingart -reviewers: -- Elisa Beshero-Bondar -- Anne Chao -- Qiwei Li -editors: -- Brandon Walsh -translator: -- João Domingues Pereira -translation-editor: -- Eric Brasil -translation-reviewer: -- Josir Cardoso Gomes -- Daniel Alves -review-ticket: https://github.com/programminghistorian/ph-submissions/issues/446 -difficulty: 2 -activity: analyzing -topics: [network-analysis, data-visualization] -abstract: Esta lição introduz métricas de rede e como tirar conclusões das mesmas quando se trabalha com dados de Humanidades. O leitor aprenderá como usar o pacote NetworkX do Python para produzir e trabalhar com estas estatísticas de rede. -avatar_alt: Caminhos-de-ferro intrincados -doi: 10.46430/phpt0041 -modified: 2023-08-25 -lesson-testers: John R. Ladd -tested-date: 2023-08-21 ---- - -{% include toc.html %} - -# Introdução - -## Objetivos da Lição - -Neste tutorial, o leitor irá aprender: -- A usar o pacote [**NetworkX**](https://perma.cc/F574-RREU) para trabalhar com dados de rede em [**Python**](/pt/licoes/introducao-instalacao-python); e -- A analisar dados de rede de Humanidades para encontrar: - - Estruturas de rede e comprimentos de caminho, - - Nós importantes ou centrais, e - - Comunidades e subgrupos. - -**n.b.**: Este é um tutorial para explorar estatísticas e métricas de rede. Assim sendo, iremos focar-nos em maneiras de analisar e tirar conclusões a partir de redes sem visualizá-las. Provavelmente, o leitor quererá uma combinação de visualização e métricas de rede no seu próprio projeto, e, por isso, nós recomendamos este artigo como um complemento a [este tutorial anterior do *Programming Historian*](/en/lessons/creating-network-diagrams-from-historical-sources) (em inglês)[^1]. - -## Pré-Requisitos - -Este tutorial assume que o leitor: - -- Tem uma familiaridade básica com redes e/ou leu [*From Hermeneutics to Data to Networks: Data Extraction and Network Visualization of Historical Sources*](/en/lessons/creating-network-diagrams-from-historical-sources) (em inglês), de Marten Düring, aqui no *Programming Historian*; -- Instalou o Python 3, não o Python 2 que é nativamente instalado em sistemas operacionais com base no Unix, como os Macs (se precisar de assistência com a instalação do Python 3, veja [The Hitchhiker's Guide to Python](https://perma.cc/DP2N-B4EN) (em inglês); e -- Instalou o instalador de pacotes `pip`[^2]. - -É possível ter duas versões do Python (2 *e* 3) instaladas no seu computador ao mesmo tempo. Por esta razão, ao aceder ao Python 3, o leitor frequentemente terá que o declarar explicitamente digitando `python3` e `pip3` em vez de simplesmente `python` e `pip`. Consulte os tutoriais do *Programming Historian* sobre a [instalação do Python](/pt/licoes/introducao-instalacao-python) e o [uso do pip](/pt/licoes/instalacao-modulos-python-pip) para mais informações[^3]. - -## O Que o Leitor Pode Aprender a Partir dos Dados de Rede? - -Há muito que as redes interessam aos pesquisadores nas Humanidades, mas muitos académicos recentes progrediram dum interesse grandemente qualitativo e metafórico em links e conexões para um séquito mais formal de ferramentas quantitativas para estudar mediadores, *hubs* (nós importantes) e estruturas interconectadas. Como o sociólogo Mark S. Granovetter apontou no seu importante artigo de maio de 1973 [*The Strength of Weak Ties*](https://perma.cc/A4PC-WPKN) (em inglês), raramente é suficiente notar que duas pessoas estavam conectadas uma à outra. Fatores como a sua relação estrutural com outras pessoas e se essas pessoas adicionais estavam, elas próprias, conectadas umas às outras têm influência decisiva nos eventos. Na medida em que até o mais perspicaz dos académicos tem dificuldade em perceber, digamos, o contorno geral duma rede (a sua "Topologia" de rede) e em identificar os nós mais significativos para conectar grupos, a análise quantitativa de rede oferece aos académicos um modo de transitar relativamente fluidamente entre o objeto social de larga escala (o "grafo") e as particularidades minuciosas das pessoas e laços sociais. - -Este tutorial irá ajudá-lo a responder questões como: -- Qual é a estrutura geral da rede? -- Quem são as pessoas importantes, ou *hubs*, na rede? -- Quais são os subgrupos e comunidades na rede? - - -## O Nosso Exemplo: a Sociedade dos Amigos - -Antes que existissem amigos do Facebook, havia a Sociedade dos Amigos, conhecida como os *quakers*. Fundados na Inglaterra em meados do século XVII, os *quakers* eram cristãos protestantes que divergiram da oficial Igreja da Inglaterra e que promoviam uma ampla tolerância religiosa, preferindo a suposta "luz interior" (*inner light*; **nota de tradução**: este conceito tinha uma extrema importância na Teologia *quaker*) e as consciências dos cristãos à ortodoxia imposta pelo Estado. O número de *quakers* cresceu rapidamente de meados para os finais do século XVII e os seus membros espalharam-se pelas Ilhas Britânicas, pela Europa e pelas colônias do Novo Mundo---especialmente pela Pensilvânia, fundada pelo líder *quaker* William Penn e lar dos quatro autores. - -Visto que os académicos há muito que ligam o crescimento e a persistência dos *quakers* à eficácia das suas redes, os dados usados neste tutorial são uma lista de nomes e relações entre os primevos *quakers* do século XVII. Este *dataset* é derivado do [*Oxford Dictionary of National Biography*](http://www.oxforddnb.com) (em inglês) e do trabalho em progresso do projeto [*Six Degrees of Francis Bacon*](http://www.sixdegreesoffrancisbacon.com) (em inglês), o qual está a reconstruir as redes sociais da Grã-Bretanha moderna (1500-1700). - -# Preparação dos Dados e Instalação do NetworkX - -Antes de iniciar este tutorial, o leitor precisará de fazer o download de dois ficheiros que, combinados, constituem o *dataset* da nossa rede. O ficheiro [quakers_nodelist.csv](/assets/exploring-and-analyzing-network-data-with-python/quakers_nodelist.csv) é uma lista de *quakers* modernos (nós) e o ficheiro [quakers_edgelist.csv](/assets/exploring-and-analyzing-network-data-with-python/quakers_edgelist.csv) é uma lista de relações entre esses *quakers* (*edges*). Para fazer o download destes ficheiros, basta clicar com o botão direito do *mouse* nos *links* e escolher "Guardar ligação como". - -Será extremamente útil ao leitor familiarizar-se com a estrutura do *dataset* antes de continuar. Para mais informações sobre a estrutura geral dos *datasets* de rede, veja [este tutorial](/en/lessons/creating-network-diagrams-from-historical-sources#developing-a-coding-scheme) (em inglês). Quando o leitor abrir o ficheiro de nós no programa da sua escolha, verá que cada *quaker* é primeiramente identificado pelo seu *name* (nome). Cada nó dum *quaker* também tem um número de atributos associados, incluindo *historical significance* (em português, significado histórico), *gender* (em português, género), *birth*/*death dates* (em português, datas de nascimento/morte), e o SDFB ID---um identificador numérico exclusivo que lhe permitirá cruzar nós neste *dataset* com o *dataset* original do *Six Degrees of Francis Bacon*, se desejado. Aqui estão as primeiras linhas: - -``` -Name,Historical Significance,Gender,Birthdate,Deathdate,ID -Joseph Wyeth,religious writer,male,1663,1731,10013191 -Alexander Skene of Newtyle,local politician and author,male,1621,1694,10011149 -James Logan,colonial official and scholar,male,1674,1751,10007567 -Dorcas Erbery,Quaker preacher,female,1656,1659,10003983 -Lilias Skene,Quaker preacher and poet,male,1626,1697,10011152 -``` - -Note que, embora as colunas não estejam corretamente alinhadas como ocorre numa tabela de dados, as vírgulas mantêm tudo apropriadamente separado. - -Quando o leitor abrir o ficheiro de *edges*, verá que nós usamos os *names* do ficheiro de nós para identificar os nós conectados por cada *edge*. Estas *edges* começam num nó ***source*** (em português, origem) e acabam num nó ***target*** (em português, destino). Embora esta linguagem derive das chamadas estruturas de rede **direcionadas**, nós usaremos os nossos dados como uma rede **não direcionada**: se a Pessoa A conhece a Pessoa B, então a Pessoa B também deve conhecer a Pessoa A. Nas redes direcionadas, as relações não precisam de ser recíprocas (a Pessoa A pode enviar uma carta à B sem receber uma em troca), mas nas redes não direcionadas as conexões são sempre recíprocas, ou **simétricas**. Uma vez que esta é uma rede de quem conhecia quem ao invés de, digamos, uma rede epistolar, um conjunto de relações não direcionadas é o mais apropriado. As relações simétricas nas redes não direcionadas são úteis sempre que estiver preocupado com relações que definem o mesmo papel para ambas as partes. Dois amigos têm uma relação simétrica: cada um deles é um amigo do outro. O autor e o destinatário duma carta têm uma relação assimétrica porque cada um tem um papel diferente. Tanto as redes direcionadas como as não direcionadas têm os seus próprios recursos (e, por vezes, as suas próprias métricas), e o leitor quererá escolher aquela que melhor se adapta aos tipos de relações que está a registrar e às questões que quer clarificar. Aqui estão as primeiras *edges* na rede *quaker* não direcionada: - -``` -Source,Target -George Keith,Robert Barclay -George Keith,Benjamin Furly -George Keith,Anne Conway Viscountess Conway and Killultagh -George Keith,Franciscus Mercurius van Helmont -George Keith,William Penn -``` - -Agora que fez o download dos dados *quakers* e viu como estão estruturados, está na hora de começar a trabalhar com esses dados no Python. Assim que tanto o Python como o pip estiverem instalados (ver Pré-Requisitos, acima), quererá instalar o NetworkX, digitando isto na sua [linha de comandos](/en/lessons/intro-to-bash) (em inglês):[^4] - -```python -pip3 install networkx==3.1 -``` - -Uma nota curta sobre controle de versão: este tutorial usa NetworkX 3.1, mas a biblioteca está em desenvolvimento ativo e é atualizada com frequência. Recomendamos usar o comando de instalação acima para garantir que a sua versão do NetworkX corresponde ao código abaixo (em vez de simplesmente instalar a versão mais recente). Se já tiver uma versão mais antiga do NetworkX instalada, execute `pip3 install networkx==3.1 --upgrade` antes de tentar o tutorial[^5]. - -Está feito! Está preparado para começar a codificar. - -# Começando - -## Ler Ficheiros, Importar Dados - -Inicie um novo ficheiro de texto simples, em branco, no mesmo diretório que os seus ficheiros de dados chamado `quaker_network.py` (para mais detalhes sobre a instalação e execução do Python, ver [este tutorial](/pt/licoes/instalacao-windows)). No topo desse ficheiro, importe as bibliotecas de que precisa. O leitor precisará de três bibliotecas---aquela que acabámos de instalar, e duas bibliotecas incorporadas no Python. Pode digitar: - -```python -import csv -from operator import itemgetter -import networkx as nx -from networkx.algorithms import community # Esta parte do NetworkX, para a deteção de comunidades, precisa de ser importada separadamente. -``` - -Agora pode ordenar ao programa para ler os seus ficheiros de CSV e retirar os dados de que precisa. Ironicamente, ler ficheiros e reorganizar os dados geralmente requer um código mais complexo que as funções para executar uma análise de redes sociais, portanto pedimos que tenha paciência connosco ao longo deste primeiro bloco de código. Aqui está um conjunto de comandos para abrir e ler os ficheiros das nossas listas de nós e de *edges*: - -```python -with open('quakers_nodelist.csv', 'r') as nodecsv: # Abra o ficheiro - nodereader = csv.reader(nodecsv) # Leia o CSV - # Retire os dados (usando a list comprehension e a list slicing do Python para remover a linha de cabeçalho, veja a nota de rodapé 6) - nodes = [n for n in nodereader][1:] - -node_names = [n[0] for n in nodes] # Obtenha uma lista apenas dos nomes dos nós - -with open('quakers_edgelist.csv', 'r') as edgecsv: # Abra o ficheiro - edgereader = csv.reader(edgecsv) # Leia o CSV - edges = [tuple(e) for e in edgereader][1:] # Retire os dados -``` - -Este código executa funções similares às [deste tutorial](/pt/licoes/trabalhando-ficheiros-texto-python), mas usa o módulo CSV para carregar os seus nós e *edges*. Mais tarde, o leitor voltará a atuar sobre os dados e obterá mais informação sobre os nós, mas, por agora, precisa de duas coisas: a lista completa de nós e uma lista de pares *edges* (como énuplos de nós)[^6]. Estas são as formas de que o NetworkX precisará para criar um "objeto grafo", um tipo de dados especial do NetworkX sobre o qual o leitor aprenderá na próxima secção. - -Nesta fase, antes de começar a usar o NetworkX, o leitor pode fazer algumas verificações de sanidade básicas para se certificar que os seus dados foram corretamente carregados usando funções e métodos incorporados no Python. Digitando: - -```python -print(len(node_names)) -``` - -e: - -```python -print(len(edges)) -``` - -e, depois, executando o seu *script* lhe mostrará quantos nós e *edges* carregou com sucesso no Python. Se o leitor vir 119 nós e 174 *edges*, então tem todos os dados necessários. - - -## Noções Básicas do NetworkX: Criar o Grafo - -Agora o leitor tem os seus dados como duas listas do Python: uma lista de nós (`node_names`) e uma lista de *edges* (`edges`). No NetworkX, o leitor pode juntar estas duas listas num só objeto rede que compreende como os nós e as *edges* se relacionam. Este objeto é chamado de **Grafo**, referindo-se a um dos termos comuns para dados organizados como uma rede **n.b.**: não se refere a alguma representação visual dos dados. Aqui, grafo é usado puramente num sentido matemático, de análise de rede. Primeiro, o leitor deve *inicializar* um objeto Grafo com o seguinte comando: - -```python -G = nx.Graph() -``` - -> **Nota de tradução**: em inglês, 'gráfico' pode ser traduzido como '*graphic*' ou, de forma diminutiva, como '*graph*', que também pode significar 'grafo', o termo aqui referido. Esta homografia não ocorre no português. - -Isto criará um novo objeto grafo, *G*, com nada nele. Agora, o leitor pode adicionar as suas listas de nós e de *edges* assim: - -```python -G.add_nodes_from(node_names) -G.add_edges_from(edges) -``` - -Esta é uma de várias maneiras de adicionar dados a um objeto rede. O leitor pode verificar a [documentação do NetworkX](https://perma.cc/3QVU-FLPF) (em inglês) para obter mais informações sobre como adicionar *weighted edges*, ou adicionar nós e *edges* uma de cada vez. - -Finalmente, o leitor pode obter informação básica sobre a sua rede recém-criada usando a função `info`: - -```python -print(G) -``` - -A função `info` informa o tipo da sua rede (neste caso, é um objeto Graph padrão) e o número de nós e arestas na mesma. O _output_ deve ser parecido a este: - -``` -Name: -Type: Graph -Number of nodes: 119 -Number of edges: 174 -Average degree: 2.9244 -``` - -Esta é uma forma rápida de obter informação geral sobre o seu grafo, mas como o leitor aprenderá em secções subsequentes, está apenas a passar pela superfície do que o NetworkX lhe pode indicar sobre os seus dados. - -Para recapitular, de momento o seu *script* será semelhante a isto: - -```python -import csv -from operator import itemgetter -import networkx as nx -from networkx.algorithms import community - -# Leia no ficheiro da lista de nós -with open('quakers_nodelist.csv', 'r') as nodecsv: - nodereader = csv.reader(nodecsv) - nodes = [n for n in nodereader][1:] - -# Obtenha uma lista apenas dos nomes dos nós (o primeiro item em cada linha) -node_names = [n[0] for n in nodes] - -# Leia no ficheiro da lista de edges -with open('quakers_edgelist.csv', 'r') as edgecsv: - edgereader = csv.reader(edgecsv) - edges = [tuple(e) for e in edgereader][1:] - -# Obtenha o número de nós e de edges nas nossas duas listas -print(len(node_names)) -print(len(edges)) - -G = nx.Graph() # Inicialize um objeto Grafo -G.add_nodes_from(node_names) # Adicione nós ao Grafo -G.add_edges_from(edges) # Adicione edges ao Grafo -print(G) # Obtenha informação sobre o Grafo -``` - -Até agora, o leitor leu dados de nós e de *edges* no Python a partir de ficheiros CSV, e, depois, contou esses nós e *edges*. Depois disso, o leitor criou um objeto grafo usando o NetworkX e carregou os seus dados para esse objeto. - -## Adicionar Atributos - -Para o NetworkX, um objeto grafo é uma coisa grande (a sua rede) composta por dois tipos de coisas mais pequenas (os seus nós e as suas *edges*). Até agora, o leitor carregou nós e *edges* (como pares de nós), mas o NetworkX permite-lhe adicionar *atributos* tanto aos nós como às *edges*, providenciando mais informação sobre cada um deles. Mais à frente neste tutorial, o leitor executará métricas e adicionará alguns dos resultados de volta ao Grafo como atributos. Por agora, vamos certificar-nos que o seu Grafo contém todos os atributos que estão atualmente no seu CSV. - -O leitor quererá retornar a uma lista que criou no início do seu *script*: `nodes`. Esta lista contém todas as linhas do `quakers_nodelist.csv`, incluindo colunas para o *name*, a *historical significance*, o *gender*, o *birth year*, o *death year* e o SDFB ID. O leitor quererá iterar por esta lista e adicionar esta informação ao nosso grafo. Existem algumas maneiras de fazer isto, mas o NetworkX providencia duas funções convenientes para adicionar atributos a todos os nós e *edges* dum Grafo duma só vez: `nx.set_node_attributes()` e `nx.set_edge_attributes()`. Para usar estas funções, o leitor irá precisar que os seus dados de atributos estejam na forma dum *dicionário* Python, no qual os nomes dos nós são as *chaves* e os atributos que quer adicionar são os *valores*[^7]. O leitor quererá criar um dicionário para cada um dos seus atributos, e, depois, adicioná-los usando as funções acima. A primeira coisa que o leitor deve fazer é criar cinco dicionários em branco, usando chavetas: - -```python -hist_sig_dict = {} -gender_dict = {} -birth_dict = {} -death_dict = {} -id_dict = {} -``` - -Agora nós podemos fazer o *loop* através da nossa lista de `nodes` e adicionar os itens apropriados a cada dicionário. Nós fazemos isto sabendo antecipadamente a posição, ou *índice*, de cada atributo. Porque o nosso ficheiro `quaker_nodelist.csv` está bem organizado, nós sabemos que o *name* da pessoa será sempre o primeiro item no lista: índice 0, visto que começamos sempre a contar do 0 no Python. A *historical significance* da pessoa será o índice 1, o seu *gender* será o índice 2, e assim por diante. Portanto, nós podemos construir os nossos dicionários desta forma[^8]: - -```python -for node in nodes: # Itere pela lista, uma linha de cada vez - hist_sig_dict[node[0]] = node[1] - gender_dict[node[0]] = node[2] - birth_dict[node[0]] = node[3] - death_dict[node[0]] = node[4] - id_dict[node[0]] = node[5] -``` - -Agora o leitor tem um conjunto de dicionários que pode usar para adicionar atributos a nós no seu objeto Grafo. A função `set_node_attributes` toma três variáveis: o Grafo ao qual o leitor está a adicionar o atributo, o dicionário de pares id-atributo, e o nome do novo atributo. O código para adicionar os seus seis atributos assemelha-se a isto: - -```python -nx.set_node_attributes(G, hist_sig_dict, 'historical_significance') -nx.set_node_attributes(G, gender_dict, 'gender') -nx.set_node_attributes(G, birth_dict, 'birth_year') -nx.set_node_attributes(G, death_dict, 'death_year') -nx.set_node_attributes(G, id_dict, 'sdfb_id') -``` - -Agora todos os seus nós têm estes seis atributos, e o leitor pode aceder a eles a qualquer momento. Por exemplo, o leitor pode obter todos os *birth years* dos seus nós iterando por eles e acedendo ao atributo `birth_year`, assim: - -```python -for n in G.nodes(): # Itere por cada nó, entre os nossos dados "n" estará o nome da pessoa - print(n, G.nodes[n]['birth_year']) # Aceda a cada nó pelo seu nome, e, depois, pelo atributo "birth_year" -``` - -A partir desta instrução, o leitor obterá uma linha de *output* para cada nó na rede. Deve parecer-se como uma simples lista de nomes e anos: - -``` -Anne Camm 1627 -Sir Charles Wager 1666 -John Bellers 1654 -Dorcas Erbery 1656 -Mary Pennyman 1630 -Humphrey Woolrich 1633 -John Stubbs 1618 -Richard Hubberthorne 1628 -Robert Barclay 1648 -William Coddington 1601 -``` - -Os passos acima são um método comum para adicionar atributos a nós que o leitor usará repetidamente mais tarde neste tutorial. Aqui está uma recapitulação do bloco de código desta secção: - -```python -# Crie um dicionário em branco para cada atributo -hist_sig_dict = {} -gender_dict = {} -birth_dict = {} -death_dict = {} -id_dict = {} - -for node in nodes: # Itere pela lista de nós, uma linha de cada vez - hist_sig_dict[node[0]] = node[1] # Aceda ao item correto, adicione-o ao dicionário correspondente - gender_dict[node[0]] = node[2] - birth_dict[node[0]] = node[3] - death_dict[node[0]] = node[4] - id_dict[node[0]] = node[5] - -# Adicione cada dicionário como um atributo de nó ao objeto Grafo -nx.set_node_attributes(G, hist_sig_dict, 'historical_significance') -nx.set_node_attributes(G, gender_dict, 'gender') -nx.set_node_attributes(G, birth_dict, 'birth_year') -nx.set_node_attributes(G, death_dict, 'death_year') -nx.set_node_attributes(G, id_dict, 'sdfb_id') - -# Itere por cada nó, para aceder e obter todos os atributos "birth_year" -for n in G.nodes(): - print(n, G.nodes[n]['birth_year']) -``` - -Agora o leitor aprendeu como criar um objeto Grafo e adicionar atributos ao mesmo. Nesta próxima secção, o leitor aprenderá sobre uma variedade de métricas disponíveis no NetworkX e como aceder às mesmas. Mas relaxe, acabou de aprender o maior parte do código de que precisará para o resto do tutorial! - -# Métricas Disponíveis no NetworkX - -Quando o leitor começa a trabalhar num novo *dataset*, é uma boa ideia obter uma visão geral dos dados. A primeira etapa, descrita acima, consiste simplesmente em abrir os ficheiros e ver o que está lá dentro. Porque é uma rede, o leitor sabe que existirão nós e *edges*, mas quantos de cada um existem? Que informação está anexada a cada nó ou *edge*? - -No nosso caso, existem 174 *edges* e 119 nós. Estas *edges* não têm direções (isto é, existe uma relação simétrica entre pessoas), nem incluem informação adicional. Para os nós, nós sabemos os seus *names*, a sua *historical significance*, o seu *genders*, a sua *birth date* e *death date*, e o SDFB ID. - -Estes detalhes informam o que o leitor pode ou devia fazer com o seu *dataset*. Muitos poucos nós (digamos, 15), e uma análise de rede é menos útil que desenhar uma imagem ou fazer algumas leituras; Demasiadas (digamos, 15 milhões), e o leitor deveria considerar começar com um subconjunto ou encontrar um supercomputador. - -As propriedades da rede também guiam a sua análise. Porque esta rede é **não direcionada**, a sua análise tem que usar métricas que exigem *edges* simétricas entre nós. Por exemplo, o leitor pode determinar em que comunidades as pessoas se encontram, mas não pode determinar as rotas *direcionais* pelas quais a informação poderá fluir ao longo da rede (precisaria duma rede direcionada para isso). Ao usar as relações simétricas e não direcionadas neste caso, o leitor será capaz de encontrar subcomunidades e as pessoas que são importantes nessas comunidades, um processo que seria mais difícil (embora ainda que possível) com uma rede direcionada. O NetworkX permite-lhe realizar a maior parte das análises que o leitor pode conceber, mas deve compreender as possibilidades do seu *dataset* e perceber que alguns logaritmos do NetworkX são mais apropriados do que outros. - -### O Formato da Rede - -Após ver a aparência do *dataset*, é importante ver a aparência da *rede*. Estas são coisas diferentes. O *dataset* é uma representação abstrata do que o leitor assume serem conexões entre entidades; a rede é a instanciação específica dessas suposições. A rede, pelo menos neste contexto, é como o computador, lê as conexões que o leitor codificou num *dataset*. A rede tem uma [Topologia](https://perma.cc/8M84-GESG), ou uma forma conectiva, que pode ser centralizada ou descentralizada; densa ou esparsa; cíclica ou linear. Um *dataset* não tem, fora da estrutura da tabela na qual está digitado. - -O formato e as propriedades básicas da rede irão dar-lhe uma ideia sobre com o que está a trabalhar e que análises parecem razoáveis. O leitor já sabe o número de nós e de *edges*, mas a que a rede se 'assemelha'? Os nós agrupam-se, ou estão espalhados de forma regular? Existem estruturas complexas, ou cada nó está organizado numa linha reta? - -A visualização abaixo, criada na ferramenta de visualização de redes [Gephi](https://gephi.org/), lhe dará uma ideia da Topologia desta rede[^9]. O leitor poderia criar um gráfico similar no Palladio usando [este tutorial](/en/lessons/creating-network-diagrams-from-historical-sources) (em inglês). - -{% include figure.html filename="exploring-and-analyzing-network-data-with-python-1.png" alt="Imagem com uma representação de um gráfico de redes" caption="Visualização de rede baseada em força dos dados *quakers*, criado no Gephi." %} - -Existem várias formas de visualizar uma rede, e um [*layout* baseado em força](https://perma.cc/AM7G-BTWV) (em inglês), do qual a imagem acima é um exemplo, encontra-se entre as mais comuns. Grafos baseados em força tentam encontrar o posicionamento ideal para nós com uma calculação baseada na [tensão de cordas segundo a Lei de Hooke](https://perma.cc/2RTL-CYVL) (em inglês), a qual, para grafos mais pequenos, normalmente cria visualizações limpas e de leitura fácil. A visualização embutida acima mostra-lhe que existe um único grande **componente** de nós conectados (no centro) e vários componentes pequenos com apenas uma ou duas conexões nas periferias. Esta é uma estrutura de rede relativamente comum. Sabendo que existem múltiplos componentes na rede irá limitar de forma útil as calculações que o leitor quererá realizar nela. Ao dispor o número de conexões (conhecidas como **grau**, ver abaixo) como o tamanho dos nós, a visualização também mostra que existem alguns nós com muitas conexões que mantêm o componente central intricado. Estes grandes nós são conhecidos como ***hubs***, e o facto de eles aparecem tão claramente aqui dá-lhe uma pista em relação ao que o leitor encontrará quando medir a **centralidade** na próxima secção. - -Visualizações, no entanto, apenas o levam até certo ponto. Com quantas mais redes trabalhar, mais o leitor se aperceberá que a maior parte parece similar o suficiente ao ponto de ser difícil distinguir uma da outra. Métricas quantitativas deixam-no diferenciar redes, aprender sobre as suas Topologias, e tornar uma confusão de nós e *edges* em algo a partir do qual se pode aprender. - -Uma boa métrica com a qual começar é a **densidade** de rede. Isto é, simplesmente, o rácio de *edges* reais na rede face a todas as *edges* possíveis na rede. Numa rede não direcionada como esta, *poderia* haver uma única *edge* entre quaisquer dois nós, mas como o leitor viu na visualização, apenas algumas dessas *edges* possíveis estão realmente presentes. A densidade de rede dá-lhe uma ideia rápida do quão intimamente próxima a sua rede é. - -E as boas notícias são que muitas destas métricas requerem comandos simples e unilineares no Python. Daqui para a frente, o leitor pode continuar a construir o seu bloco de código das secções anteriores. O leitor não tem de apagar nada que já tenha digitado, e porque criou o seu objeto rede `G` no bloco de código acima, todas as métricas a partir daqui devem trabalhar corretamente. - -O leitor pode calcular a densidade da rede executando `nx.density(G)`. No entanto, a melhor maneira de fazer isto é armazenar a sua métrica numa variável para referência futura, e imprimir essa variável, como: - -```python -density = nx.density(G) -print("Network density:", density) -``` - -O *output* da densidade é um número, então é isso que o leitor verá quando imprimir o valor. Neste caso, a densidade da nossa rede é, aproximadamente, 0.0248. Numa escala de 0 a 1, não é uma rede muito densa, o que confere com o que o leitor consegue ver na visualização[^10]. Um 0 significaria que não existem quaisquer conexões de todo, e um 1 indicaria que todas as *edges possíveis* estão presentes (uma rede perfeitamente conectada): esta rede *quaker* está na extremidade inferior dessa escala, mas, mesmo assim, longe do 0. - -Uma medida de caminho mais curta é um pouco mais complexa. Ela calcula a série mais curta possível de nós e *edges* que se situam entre quaisquer dois nós, algo difícil de ver em visualizações de grandes redes. Esta medida corresponde, essencialmente, a encontrar amigos de amigos---se a minha mãe conhece alguém que eu não conheço, então a minha mãe é o caminho mais curto entre mim e essa pessoa. O jogo *Six Degrees of Kevin Bacon*, a partir do qual o [nosso projeto](http://sixdegreesoffrancisbacon.com/) (em inglês) retira o nome, é basicamente um jogo que consiste em encontrar os caminhos mais curtos (com um **comprimento de caminho** de seis ou menos) de Kevin Bacon a qualquer outro ator. - -Para calcular um caminho mais curto, o leitor precisa de passar por várias variáveis de *input* (informação que dá a uma função do Python): o grafo inteiro, o seu nó *source*, e o seu nó *target*. Vamos procurar o caminho mais curto entre Margaret Fell e George Whitehead. Como usámos nomes para identificar unicamente os nossos nós nesta rede, o leitor pode aceder a esses nós (como a ***source*** e o ***target*** do seu caminho) usando os nomes diretamente. - -```python -fell_whitehead_path = nx.shortest_path(G, source="Margaret Fell", target="George Whitehead") - -print("Shortest path between Fell and Whitehead:", fell_whitehead_path) -``` - -Dependendo do tamanho da sua rede, isto pode demorar algum tempo para calcular, visto que o Python primeiro encontra todos os caminhos possíveis e depois escolhe o mais curto. O *output* de `shortest_path` será uma lista dos nós que incluí a "source" (Fell), o "target" (Whitehead), e os nós entre eles. Neste caso, nós podemos ver que o fundador dos *quakers*, George Fox, se encontra no caminho mais curto entre eles. Como Fox é também um ***hub*** (ver centralidade de grau, abaixo) com muitas conexões, nós podemos supor que vários caminhos mais curtos passam por ele como mediador. O que é que isto pode indicar sobre a importância dos fundadores dos *quakers* para a sua rede social? - -O Python incluí várias ferramentas que calculam os caminhos mais curtos. Existem funções para os comprimentos dos caminhos mais curtos, para todos os caminhos mais curtos, e para saber se um caminho existe ou não de todo na [documentação](https://perma.cc/3MJE-7MQQ) (em inglês). O leitor poderia usar uma função separada para encontrar o comprimento do caminho *Fell-Whitehead* que acabámos de calcular, ou poderia simplesmente tomar o comprimento da lista menos um[^11], assim: - -```python -print("Length of that path:", len(fell_whitehead_path)-1) -``` - -Existem muitas métricas de rede derivadas dos comprimentos de caminho mais curtos. Uma tal medida é o **diâmetro**, que é o mais longo de todos os caminhos mais curtos. Depois de calcular todos os caminhos mais curtos entre cada par de nós possível na rede, o diâmetro é o comprimento do caminho entre os dois nós que estão mais afastados. A medida está projetada para lhe dar um senso do tamanho geral da rede, a distância duma extremidade da rede à outra. - -O diâmetro usa um comando simples: `nx.diameter(G)`. No entanto, executar este comando no grafo *quaker* dará uma mensagem de erro indicando que o Grafo não está conectado ("*not connected*"). Isto significa apenas que o seu grafo, como o leitor já viu, tem mais que um componente. Porque existem alguns nós que não têm um caminho de todo com outros, é impossível encontrar todos os caminhos mais curtos. Veja novamente a visualização do seu grafo: - -{% include figure.html filename="exploring-and-analyzing-network-data-with-python-1.png" alt="Imagem com uma representação de um gráfico de redes" caption="Visualização de rede baseada em força dos dados *quakers*, criado no Gephi." %} - -Como não há caminho entre nós dum componente e nós doutro, `nx.diameter()` retorna a mensagem de erro "*not connected*". O leitor pode remediar isto, primeiro, ao descobrir se o seu Grafo está conectado ("*is connected*") (*i.e.* tudo um componente) e, se não conectado, descobrir apenas o componente mais largo e calcular o diâmetro somente desse componente. Aqui está o código: - -```python -# Se o seu Grafo tiver mais do que um componente, isto retornará como 'False' -print(nx.is_connected(G)) - -# A seguir, use nx.connected_components para obter a lista de componentes, -# depois, use o comando max() para encontrar o mais pesado: -components = nx.connected_components(G) -largest_component = max(components, key=len) - -# Crie um 'Subgrafo' apenas com o componente mais pesado, -# depois, calcule o diâmetro do Subgrafo, tal como fez com a densidade. - -subgraph = G.subgraph(largest_component) -diameter = nx.diameter(subgraph) -print("Network diameter of largest component:", diameter) -``` - -Como nós tomámos o componente mais largo, nós podemos assumir que não há nenhum diâmetro mais largo para os outros componentes. Portanto, esta figura é uma boa representação para o diâmetro de todo o Grafo. O diâmetro de rede do componente mais largo desta rede é 8: existe um comprimento de rede de 8 entre os dois nós mais afastados na rede. Ao contrário da densidade, que é apresentada de 0 a 1, é difícil saber a partir deste número somente se 8 é um diâmetro largo ou curto. Para algumas métricas globais, pode ser melhor compará-lo a redes de tamanho e forma similar[^12]. - -O cálculo estrutural final que o leitor fará nesta rede concerne o conceito de **fechamento triádico**. Fechamento triádico supõe que se duas pessoas conhecem a mesma pessoa, elas provavelmente conhecem-se mutuamente. Se Fox conhece tanto Fell como Whitehead, então Fell e Whitehead podem perfeitamente conhecer-se mutuamente, completando um **triângulo** na visualização de três *edges* conectando Fox, Fell e Whitehead. O número destes triângulos fechados na rede pode ser usado para descobrir aglomerados e comunidades de indivíduos que se conhecem todos intimamente. - -Uma forma de medir o fechamento triádico é o chamado **coeficiente de aglomeração** por causa desta tendência aglomeradora, mas a medida estrutural de rede que o leitor aprenderá é conhecida como **transitividade**[^13]. Transitividade é o rácio de todos os triângulos sobre todos os triângulos possíveis. Um triângulo possível existe quando uma pessoa (Fox) conhece duas pessoas (Fell e Whitehead). Então, transitividade, como a densidade, expressa quão interconectado um grafo é em termos dum rácio de conexões reais sobre as possíveis. Lembre-se, medidas como a transitividade e a densidade lidam com *probabilidades* e não com *certezas*. Todos os *outputs* do seu *script* no Python devem ser interpretados, como qualquer outro objeto de pesquisa. A transitividade permite-lhe uma forma de pensar sobre todas as relações no seu grafo que *podem* existir, mas que, atualmente, não existem. - -O leitor pode calcular a transitividade numa só linha, da mesma forma que calculou a densidade: - -```python -triadic_closure = nx.transitivity(G) -print("Triadic closure:", triadic_closure) -``` - -Tal como a densidade, transitividade é numerada de 0 a 1, e o leitor pode ver que a transitividade da rede é de cerca de 0.1694, um valor um pouco mais alto que o da sua densidade de 0.0248. Porque o grafo não é muito denso, existem menos *triângulos possíveis*, o que pode resultar numa transitividade relativamente mais elevada. Isto é, nós que já têm várias conexões farão provavelmente parte destes triângulos fechados. Para suportar isto, o leitor quererá saber mais sobre nós com muitas conexões. - -## Centralidade - -Depois de obter algumas medidas básicas da estrutura da rede inteira, um bom próximo passo é descobrir quais nós são os mais importantes na sua rede. Na análise de redes, medidas da importância dos nós são referidas como medidas de **centralidade**. Porque existem várias maneiras de abordar a questão "Que nós são os mais importantes?", existem várias formas diferentes de calcular a centralidade. Aqui, o leitor aprenderá sobre as três medidas de centralidade mais comuns: o grau, a centralidade de intermediação, e a centralidade adjacente. - -O **grau** é a forma mais simples e comum de encontrar nós importantes. O grau dum nó é a soma das suas *edges*. Se um nó tem três linhas a estenderem-se a outros nós, o seu grau é de três. Cinco *edges*, o seu grau é de cinco. É extremamente simples. Como cada uma dessas edges terá sempre um nó na outra extremidade, o leitor pode pensar no grau como o número de pessoas às quais qualquer pessoa está diretamente conectada. Os nós com os graus mais elevados numa rede social são as pessoas que conhecem mais pessoas. Estes nós são geralmente referidos como ***hubs***, e calcular o grau é a forma mais rápida de identificar os *hubs*. - -Calcular a centralidade para cada nó no NetworkX não é exatamente tão simples como as métricas de toda a rede acima, mas continua a envolver comandos unilineares. Todos os comandos de centralidade que o leitor aprenderá nesta secção produzem dicionários nos quais as chaves são os nós e os valores são as medidas de centralidade. Isto significa que eles estão prontos para adicionar de volta à nossa rede como um atributo de nó, como o leitor fez na última secção. Comece por calcular o grau e adicione-o como um atributo à sua rede. - -```python -degree_dict = dict(G.degree(G.nodes())) -nx.set_node_attributes(G, degree_dict, 'degree') -``` - -O leitor acabou de executar o método `G.degree()` na lista completa de nós na sua rede (`G.nodes()`). Como o leitor adicionou-o como um atributo, agora pode ver o grau de William Penn, bem como com o resto da sua informação se aceder ao seu nó diretamente: - -```python -print(G.nodes['William Penn']) -``` - -Mas estes resultados são úteis para mais do que simplesmente adicionar atributos ao seu objeto Grafo. Como o leitor já está no Python, pode organizar e compará-los. O leitor pode usar a função incorporada `sorted()` para organizar um dicionário com as suas chaves ou valores e encontrar o *top* vinte dos nós por grau. Para fazer isto, o leitor vai precisar de usar `itemgetter`, o qual nós importámos no início do tutorial. Usando `sorted` e `itemgetter`, pode organizar o dicionário de graus assim: - -```python -sorted_degree = sorted(degree_dict.items(), key=itemgetter(1), reverse=True) -``` - -Aqui, há muitas coisas a acontecer nos bastidores, mas concentre-se só nas três variáveis de *input* que o leitor deu a `sorted()`. A primeira é o dicionário, `degree_dict.items()`, que quer organizar. A segunda é o que organizar por: neste caso, item "1" é o segundo item no par, ou o valor do seu dicionário. Finalmente, o leitor diz a `sorted()` para ir em `reverse` para que os nós de grau mais elevado apareçam primeiro na lista resultante. Assim que o leitor tiver criado esta lista organizada, pode iterar por ela e usar a *list slicing*[^6] para obter somente os primeiros 20 nós: - -```python -print("Top 20 nodes by degree:") -for d in sorted_degree[:20]: - print(d) -``` - -Como o leitor pode ver, o grau de Penn é 18, relativamente elevado para esta rede. Mas digitar estas informações de classificação ilustra as limitações do grau como uma medida de centralidade. O leitor provavelmente não precisava que o NetworkX lhe dissesse que William Penn, líder *quaker* e fundador da Pensilvânia, era importante. A maioria das redes sociais terão somente alguns *hubs* de grau muito elevado, com o resto de grau similar e baixo[^14]. O grau pode informá-lo sobre os maiores *hubs*, mas não pode dizer-lhe muito sobre o resto dos nós. E, em muitos casos, esses *hubs* sobre os quaiso está a informar (como o Penn ou como a cofundadora do Quakerismo, Margaret Fell, com um grau de 13) não são especialmente surpreendentes. Neste caso, quase todos os *hubs* são fundadores da religião ou, noutros casos, figuras políticas importantes. - -Felizmente, existem outras medidas de centralidade que lhe podem dizer mais do que só os *hubs*. A [centralidade adjacente](https://perma.cc/VF28-JDCR) (em inglês) é um tipo de extensão do grau---analisa uma combinação dos *edges* dum nó e as *edges* dos vizinhos desse nó. Centralidade adjacente preocupa-se se um nó é um *hub*, mas também se preocupa com quantos *hubs* um nó está conectado. É calculado como um valor de 0 a 1: quanto mais próximo do um, maior a centralidade. A centralidade adjacente é útil para compreender que nós podem obter informação a outros nós rapidamente. Se o leitor conhece muitas pessoas bem-conectadas, poderia espalhar uma mensagem muito eficientemente. Se o leitor usou o Google, então está já mais ou menos familiarizado com a centralidade adjacente. O seu algoritmo de PageRank usa uma extensão desta fórmula para decidir que páginas de internet são colocadas no topo da lista de resultados. - -A [centralidade de intermediação](https://perma.cc/C55J-7XAJ) (em inglês) é um pouco diferente das outras duas calculações na medida em que não se preocupa com o número de *edges* que qualquer nó ou grupo de nós tem. A centralidade de intermediação observa todos os **caminhos mais curtos** que passam por um nó em particular (ver acima). Para fazer isto, tem que primeiro calcular todos os possíveis caminhos mais curtos na sua rede, por isso mantenha em mente que a centralidade de intermediação vai demorar mais tempo para calcular que as outras medidas de centralidade (mas não será um problema num *dataset* desta dimensão). A centralidade de intermediação, que também é expressa numa escala de 0 a 1, é particularmente boa a encontrar nós que conectam duas partes distintas duma rede. Se o leitor é a única coisa conectando dois aglomerados, cada comunicação entre esses aglomerados tem que passar por si. Em contraste com um *hub*, este tipo de nó é regularmente referido como um ***broker***. A centralidade de intermediação não é a única maneira de encontrar *brokerage* (e outros métodos são mais sistemáticos), mas é uma forma rápida de lhe dar uma ideia de quais nós são importantes, não porque têm muitas conexões eles próprios, mas porque eles situam-se *entre* grupos, dando à rede conectividade e coesão. - -Estas duas medidas de centralidade são ainda mais simples de executar que um grau---eles não precisam de receber uma lista de nós, só o grafo `G`. O leitor pode executá-las com estas funções: - -```python -betweenness_dict = nx.betweenness_centrality(G) # Execute a centralidade de intermediação -eigenvector_dict = nx.eigenvector_centrality(G) # Execute a centralidade adjacente - -# Atribua cada a um atributo na sua rede -nx.set_node_attributes(G, betweenness_dict, 'betweenness') -nx.set_node_attributes(G, eigenvector_dict, 'eigenvector') -``` - -O leitor pode organizar a centralidade de intermediação (ou a adjacente) ao mudar os nomes das variáveis no código organizador acima, como: - -```python -sorted_betweenness = sorted(betweenness_dict.items(), key=itemgetter(1), reverse=True) - -print("Top 20 nodes by betweenness centrality:") -for b in sorted_betweenness[:20]: - print(b) -``` - -O leitor notará que muitos, mas não todos, dos nós que têm graus elevados também têm uma centralidade de intermediação alta. De facto, centralidade de intermediação apresenta duas mulheres, Elizabeth Leavens e Mary Penington, cuja importância tinha sido obscurecida pela métrica da centralidade de grau. Uma vantagem de fazer estes cálculos no Python é que o leitor pode rapidamente comparar dois conjuntos de cálculos. E se o leitor quiser saber quais dos nós com alta centralidade de intermediação têm graus baixos? Isto é o mesmo que dizer: quais nós de alta intermediação são inesperados? Pode usar uma combinação da lista organizada acima: - -```python -# Primeiro, obtenha uma lista do top 20 nós por intermediação -top_betweenness = sorted_betweenness[:20] - -# Depois, encontre e obtenha o grau de cada um -for tb in top_betweenness: # Itere por top_betweenness - degree = degree_dict[tb[0]] # Use degree_dict para aceder ao grau dum nó, veja a nota de rodapé 4 - print("Name:", tb[0], "| Betweenness Centrality:", tb[1], "| Degree:", degree) -``` - -O leitor pode confirmar a partir destes resultados que algumas pessoas, como Leavens e Penington, têm alta centralidade de intermediação, mas baixo grau. Isto pode significar que estas mulheres eram *brokers* importantes, conectando partes díspares do grafo. O leitor também pode aprender coisas inesperadas sobre pessoas sobre as quais já se sabe algo---nesta lista, consegue ver que Penn tem um grau inferior ao do fundador *quaker* George Fox, mas uma centralidade de intermediação mais elevada. Isto é o mesmo que dizer, simplesmente conhecer mais pessoas não é tudo. - -Isto aborda somente a superfície do que pode ser feito com métricas de rede no Python. O NetworkX oferece dezenas de funções e medidas para o leitor usar em várias combinações, e pode usar Python para estender estas medidas de formas quase ilimitadas. Uma linguagem de programação como o Python ou o R dar-lhe-á a flexibilidade para explorar a sua rede computacionalmente de formas que outros *interfaces* não podem ao permitir-lhe combinar e comparar os resultados estatísticos da sua rede com outros atributos dos seus dados (como as datas e ocupações que adicionou à rede no início deste tutorial!). - -## Noções Avançadas do NetworkX: Deteção de Comunidades com Modularidade - -Outra coisa regularmente questionada sobre o *dataset* duma rede é quais são os subgrupos e comunidades dentro da estrutura social mais larga. A sua rede é uma família grande e feliz na qual todos se conhecem? Ou é uma coleção de subgrupos mais pequenos que estão conectados por um ou dois intermediários? O campo da deteção de comunidades em redes está desenhado para responder a estas questões. Existem várias formas de calcular comunidades, cliques, e aglomerados na sua rede, mas o método mais popular atualmente é a **modularidade**. A modularidade é uma medida de densidade relativa na sua rede: uma comunidade (chamada um **módulo** ou **classe** modular) tem uma densidade elevada em relação a outros nós dentro do seu módulo, mas densidade baixa com os outros de fora. A modularidade dá-lhe uma pontuação geral de quão fracioanda a sua rede é, e essa pontuação pode ser usada para **repartir** a rede e evidenciar as comunidades individuais[^15]. - -Redes muito densas são geralmente mais difíceis de dividir em repartições sensatas. Felizmente, como o leitor descobriu anteriormente, esta rede não é assim tão densa. Não existem tantas conexões reais quanto conexões possíveis, e existem componentes desconectados de todo. Vale a pena repartir esta rede esparsa com modularidade e ver se os resultados fazem sentido histórico e analítico. - -A deteção e repartição de comunidades no NetworkX requere um pouco mais de configuração do que algumas das outras métricas. Existem algumas abordagens incorporadas para a deteção de comunidades (como o [*minimum cut*](https://perma.cc/K59Y-WZRX) (em inglês)), mas modularidade não vem incluída com o NetworkX. Felizmente, existe um [módulo adicional no Python](https://github.com/taynaud/python-louvain/) (em inglês) que o leitor pode usar com o NetworkX, e que já instalou e importou no início deste tutorial. O leitor pode ler a [documentação completa](https://perma.cc/KW5K-ZX67) (em inglês) para todas as funções que oferece, mas para a maior parte dos propósitos da deteção de comunidades, quererá apenas `best_partition()`: - -```python -communities = community.greedy_modularity_communities(G) -``` - -O método `greedy_modularity_communities()` tenta determinar o número de comunidades apropriadas para o grafo, e agrupa todos os nós em subconjuntos baseados nestas comunidades. Ao contrário das funções de centralidade, o código acima não criará um dicionário. Ao invés, criará uma lista especial de objetos "*frozenset*" (similar a listas). Existe um conjunto para cada grupo, e os conjuntos contêm os nomes das pessoas em cada grupo. Para adicionar esta informação à sua rede na maneira agora familiar, o leitor tem que primeiro criar um dicionário que classifique cada pessoa com um valor numérico para o grupo ao qual pertencem: - -```python -modularity_dict = {} # Crie um dicionário vazio -for i,c in enumerate(communities): # Itere pela lista de comunidades, mantendo em mente o número para a comunidade - for name in c: # Itere por cada pessoa numa comunidade - modularity_dict[name] = i # Crie uma entrada no dicionário para a pessoa, na qual o valor é o grupo ao qual pertence. - -# Agora, o leitor pode adicionar a informação de modularidade como fez com as outras métricas -nx.set_node_attributes(G, modularity_dict, 'modularity') -``` - -Como sempre, o leitor pode combinar estas medidas com outras. Por exemplo, aqui está como encontrar os nós de centralidade adjacente mais elevada na classe modular 0 (a primeira): - -```python -# Primeiro, obtenha uma lista apenas dos nós nessa classe -class0 = [n for n in G.nodes() if G.nodes[n]['modularity'] == 0] - -# Depois, crie um dicionário das centralidades adjacentes desses nós -class0_eigenvector = {n:G.nodes[n]['eigenvector'] for n in class0} - -# Depois, organize esse dicionário e obtenha os primeiros 5 resultados -class0_sorted_by_eigenvector = sorted(class0_eigenvector.items(), key=itemgetter(1), reverse=True) - -print("Modularity Class 0 Sorted by Eigenvector Centrality:") -for node in class0_sorted_by_eigenvector[:5]: - print("Name:", node[0], "| Eigenvector Centrality:", node[1]) -``` - -Usando a centralidade adjacente como um *ranking* pode dar-lhe uma ideia das pessoas importantes nesta classe modular. O leitor notará que algumas destas pessoas, especialmente William Penn, William Bradford (*não* o fundador de Plymouth em que estará a pensar[^16]) e James Logan, passaram muito tempo na América. Também, Bradford e Tace Sowle eram ambos impressores *quakers* proeminentes. Com um pouco de pesquisa, nós podemos descobrir que existem tanto razões geográficas como ocupacionais que explicam que este grupo de pessoas se juntem. Isto é uma indicação de que a modularidade está a trabalhar como esperado. - -Em redes mais pequenas como esta, uma tarefa comum é encontrar e listar todas as classes modulares e seus membros[^17]. O leitor pode fazer isto ao percorrer pela lista `communities`: - -```python -for i,c in enumerate(communities): # Itere pela lista de comunidades - if len(c) > 2: # Filtre as classes modulares com 2 ou menos nós - print('Class '+str(i)+':', list(c)) # Obtenha as classes e os seus membros -``` - -Note no código acima que está a filtrar qualquer classe modular com dois ou menos nós, na linha `if len(c) > 2`. O leitor recordar-se-á da visualização que existiam vários componentes pequenos da rede com apenas dois nós. A modularidade encontrará estes componentes e tratá-los-á como classes separadas (visto que eles não estão conectados a mais nada). Ao filtrá-los, o leitor obtém uma ideia melhor das classes modulares maiores dentro do principal componente da rede. - -Trabalhando só com o NetworkX trá-lo-á longe, e o leitor pode encontrar muito sobre classes modulares apenas ao trabalhar com os dados diretamente. Mas quase sempre quer visualizar os seus dados (e, talvez, expressar a modularidade como a cor de nó). Na próxima secção, o leitor irá aprender como exportar os seus dados do NetworkX para uso noutros programas. - - - -# Exportar Dados - -O NetworkX suporta um grande número de formatos de ficheiros para [exportação de dados](https://perma.cc/X65S-HRCF) (em inglês). Se o leitor quiser exportar uma lista de *edges* em texto simples para carregar no Palladio, existe um [*wrapper* conveniente](https://perma.cc/P9ES-57X3) (em inglês) para isso. Frequentemente, no *Six Degrees of Francis Bacon*, nós exportamos dados do NetworkX no [formato JSON especializado do D3](https://perma.cc/SF8Z-DWPW) (em inglês), para visualização no navegador de internet. O leitor poderia até [exportar](https://perma.cc/Y6QJ-5VM8) (em inglês) o seu grafo como um [*dataframe* do Pandas](https://perma.cc/87NA-KCK4) (em inglês) se existissem operações estatísticas mais avançadas que quisesse executar. Existem várias opções, e se o leitor tiver adicionado diligentemente todas as suas métricas de volta no seu objeto Grafo como atributos, todos os seus dados serão exportados duma só vez. - -A maior parte das opções de exportação funcionam da mesma maneira, por isso, para este tutorial o leitor aprenderá como exportar os seus dados para o formato GEXF do Gephi. Assim que tiver exportado o ficheiro, o leitor pode fazer o *upload* [diretamente para o Gephi](https://gephi.org/users/supported-graph-formats/) (em inglês) para a visualização. - -Exportar dados é, normalmente, um simples comando unilinear. Tudo o que é preciso é escolher um nome de ficheiro. Neste caso, usaremos `quaker_network.gexf`. Para exportar, digite: - -```python -nx.write_gexf(G, 'quaker_network.gexf') -``` - -É só! Quando executar o seu *script* no Python, colocará automaticamente o novo ficheiro GEXF no mesmo diretório que o seu ficheiro Python.[^18] - -# Conclusões - -Agora, tendo realizado e revisto uma panóplia de métricas de rede no Python, o leitor tem as evidências a partir das quais os argumentos se contrõem e se retiram conclusões sobre esta rede de *quakers* na Grã-Bretanha moderna. O leitor sabe, por exemplo, que a rede tem uma **densidade** relativamente baixa, sugerindo associações ténues e/ou dados originais imcompletos. O leitor sabe que a comunidade está organizada em torno de vários ***hubs*** desproporcionalmente grandes, entre eles fundadores da denominação, como Margaret Fell e George Fox, bem como líderes políticos e religiosos importantes, como William Penn. Mais útil, o leitor sabe sobre mulheres com graus relativamente baixos, como Elizabeth Leavens e Mary Penington, que (como resultado de centralidade de intermediação elevada) podem ter agido como ***brokers***, conectando múltiplos grupos. Finalmente, o leitor aprendeu que a rede é feita dum grande **componente** e muitos muito pequenos. No interior desse grande componente, existem várias **comunidades** distintas, algumas das quais parecem organizadas em torno do tempo ou local (como Penn e os seus associados estadunidenses). Por causa dos metadados que adicionou à sua rede, o leitor tem as ferramentas para explorar estas métricas em profundidade e para, potencialmente, explicar alguns dos recursos estruturais que identificou. - -Cada uma destas descobertas é um convite para mais pesquisa ao invés dum ponto final ou prova. A análise de redes é um conjunto de ferramentas para perguntar questões específicas sobre a estrutura das relações num *dataset*, e o NetworkX providencia um interface relativamente simples a muitas das técnicas e métricas comuns. As redes são uma maneira útil de estender a sua pesquisa a um grupo ao providenciar informações sobre a estrutura da comunidade, e nós esperamos que o leitor será inspirado por este tutorial para usar métricas para enriquecer a sua própria pesquisa e para explorar a flexibilidade da análise de redes para além da visualização. - -[^1]: **Nota de tradução**: Como o leitor poderá confirmar mais abaixo, os autores desta lição transformaram os dados aqui analisados num gráfico, sem explicar tal passo, visto que o artigo lida com a análise dos dados, e não com a sua visualização. Se desejar, pode ler também a lição aqui referida e voltar a esta para confirmar se o seu gráfico se assemelha ao dos quatro autores. Aconselhamos que o faça após ter concluído todos os passos aqui descritos. - -[^2]: Em muitos (mas não todos os) casos, `pip` ou `pip3` serão instalados automaticamente com o Python3. - -[^3]: **Nota de tradução**: Isto pode estender-se ao uso de comandos, na sua *shell*, nomeadamente aquando da instalação do pip e de pacotes (ver Preparação dos Dados e Instalação do NetworkX). - -[^4]: Algumas instalações só quererão que o leitor digite `pip` sem "3," mas no Python 3, `pip3` é a mais comum. Se um não funcionar, tente o outro! - -[^5]: **Nota de tradução**: É importante lembrar que existem variações entre as diferentes versões do NetworkX que podem resultar em erros ou outputs diferentes. Tal é o caso da 2.6, com a qual obtivemos uma mensagem de erro durante a avaliação da modularidade e uma resposta diferente com a função print(nx.info(G)) daquela apresentada com a 2.4. - -[^6]: Existem algumas técnicas *pythónicas* que este código usa. A primeira é a 'compreensão de lista' (*list comprehensions*), que incorpora *loops* (`for n in nodes`) para criar novas listas (em parêntesis retos), assim: `new_list = [item for item in old_list]`. A segunda é a *list slicing*, que permite-lhe subdividir ou "*slice*" ("cortar") a lista. A notação da *list slicing* `[1:]` toma tudo *exceto* o primeiro item na lista. O 1 informa o Python para começar com o segundo item nesta lista (no Python, o leitor começa a contar do 0), e os dois pontos dizem ao Python para tomar tudo até ao fim da lista. Como a primeira linha em ambas destas listas é a fila de cabeçalho de cada CSV, nós não queremos que esses cabeçalhos sejam incluídos nos nossos dados. - -[^7]: Dicionários são um tipo de dados incorporados no Python, construídos com pares de chave-valor. Pense numa chave como a palavra-chave num dicionário, e o valor como a sua definição. Chaves têm que ser únicas (só uma de cada por dicionário), mas os valores podem ser qualquer coisa. Dicionários são representados por chavetas, com chaves e valores separados por dois pontos: `{key1:value1, key2:value2, ...}`. Dicionários são uma das maneiras mais rápidas de armazenar valores que o leitor pode necessitar mais tarde. De facto, um objeto Grafo do NetworkX é, ele próprio, feito de dicionários aninhados. - -[^8]: Note que este código usa parêntesis retos de duas formas. Usa números em parêntesis retos para aceder índices específicos numa lista de nós (por exemplo, o ano de nascimento no `node[4]`), mas também para designar uma *chave* (sempre `node[0]`, o ID) a qualquer um dos nossos dicionários vazios: `dictionary[key] = value`. Conveniente! - -[^9]: Por uma questão de simplicidade, removemos quaisquer nós que *não estão conectados a quaisquer outros* do *dataset* antes de termos começado. Isto foi feito simplesmente para reduzir a desordem, mas também é muito comum de se ver muitos destes nós solteiros no seu *dataset* de rede comum. - -[^10]: Mas mantenha em mente que isto é a densidade de *toda* a rede, incluindo esses componentes não conectados a flutuar em órbita. Existem várias conexões possíveis entre e com eles. Se o leitor tivesse tomado a densidade somente do componente maior, poderia ter obtido um número diferente. O leitor poderia fazê-lo ao encontrar o componente mais largo como nós lhe mostramos na próxima secção sobre o **diâmetro**, e, depois, ao executar o mesmo método de densidade somente nesse componente. - -[^11]: Nós tomamos o comprimento da lista *menos um* porque nós queremos o número de *edges* (ou passos) entre os nós listados aqui, ao invés do número de nós. - -[^12]: A forma mais correta de fazer este tipo de comparação é criar *grafos aleatórios* de tamanho idêntico para ver se as métricas diferem da norma. O NetworkX oferece várias ferramentas para [gerar grafos aleatórios](https://perma.cc/7Z4U-KAY7) (em inglês). - -[^13]: Porque se chama transitividade? O leitor pode recordar-se da propriedade transitiva de Geometria das aulas de Matemática no Ensino Secundário: se A=B e B=C, o A deve ser igual a C. Semelhantemente, no fechamento triádico, se a pessoa A conhece a pessoa B e a pessoa B conhece a pessoa C, então a pessoa A provavelmente conhece a pessoa C: logo, transitividade. - -[^14]: Aqueles com experiência em Estatística notarão que grau em redes sociais segue tipicamente uma *lei de potência*, mas isto não é nem pouco usual, nem especialmente útil saber. - -[^15]: Embora não venhamos a cobri-lo neste tutorial, é geralmente boa ideia obter a clasificação modular global primeiro para determinar se o leitor aprenderá qualquer coisa ao repartir a sua rede de acordo com a modularidade. Para ver a classificação geral da modularidade, tome as comunidades que calculou com `communities = community.best_partition(G)` e execute `global_modularity = community.modularity(communities, G)`. E depois basta aplicar `print(global_modularity)`. - -[^16]: **Nota de tradução**: [Plymouth](https://perma.cc/2EKN-TJPW) foi a primeira colónia inglesa permanente na região da Nova Inglaterra, no nordeste dos Estados Unidos da América, tendo sido fundada em 1620 por vários colonos puritanos, entre os quais um tal [William Bradford](https://perma.cc/UA8V-J4CX). Este [outro](https://perma.cc/TW4C-QWUY) referido foi um importante impressor *quaker*. - -[^17]: Em redes grandes, as listas seriam provavelmente ilegivelmente longas, mas o leitor poderia obter uma ideia de todas as classes modulares duma só vez ao visualizar a rede e adicionar cor aos nós baseada na sua classe modular. - -[^18]: Cada formato de ficheiro que é exportável é também importável. Se o leitor tiver um ficheiro GEXF do Gephi que quer pôr no NetworkX, digitaria `G = nx.read_gexf('some_file.gexf')`. +--- +title: "Explorar e Analisar Dados de Rede com Python" +slug: explorar-analisar-dados-rede-python +original: exploring-and-analyzing-network-data-with-python +layout: lesson +collection: lessons +date: 2017-06-16 +translation_date: 2023-05-12 +authors: +- John R. Ladd +- Jessica Otis +- Christopher N. Warren +- Scott Weingart +reviewers: +- Elisa Beshero-Bondar +- Anne Chao +- Qiwei Li +editors: +- Brandon Walsh +translator: +- João Domingues Pereira +translation-editor: +- Eric Brasil +translation-reviewer: +- Josir Cardoso Gomes +- Daniel Alves +review-ticket: https://github.com/programminghistorian/ph-submissions/issues/446 +difficulty: 2 +activity: analyzing +topics: [network-analysis, data-visualization] +abstract: Esta lição introduz métricas de rede e como tirar conclusões das mesmas quando se trabalha com dados de Humanidades. O leitor aprenderá como usar o pacote NetworkX do Python para produzir e trabalhar com estas estatísticas de rede. +avatar_alt: Caminhos-de-ferro intrincados +doi: 10.46430/phpt0041 +modified: 2023-08-25 +lesson-testers: John R. Ladd +tested-date: 2023-08-21 +--- + +{% include toc.html %} + +# Introdução + +## Objetivos da Lição + +Neste tutorial, o leitor irá aprender: +- A usar o pacote [**NetworkX**](https://perma.cc/F574-RREU) para trabalhar com dados de rede em [**Python**](/pt/licoes/introducao-instalacao-python); e +- A analisar dados de rede de Humanidades para encontrar: + - Estruturas de rede e comprimentos de caminho, + - Nós importantes ou centrais, e + - Comunidades e subgrupos. + +**n.b.**: Este é um tutorial para explorar estatísticas e métricas de rede. Assim sendo, iremos focar-nos em maneiras de analisar e tirar conclusões a partir de redes sem visualizá-las. Provavelmente, o leitor quererá uma combinação de visualização e métricas de rede no seu próprio projeto, e, por isso, nós recomendamos este artigo como um complemento a [este tutorial anterior do *Programming Historian*](/en/lessons/creating-network-diagrams-from-historical-sources) (em inglês)[^1]. + +## Pré-Requisitos + +Este tutorial assume que o leitor: + +- Tem uma familiaridade básica com redes e/ou leu [*From Hermeneutics to Data to Networks: Data Extraction and Network Visualization of Historical Sources*](/en/lessons/creating-network-diagrams-from-historical-sources) (em inglês), de Marten Düring, aqui no *Programming Historian*; +- Instalou o Python 3, não o Python 2 que é nativamente instalado em sistemas operacionais com base no Unix, como os Macs (se precisar de assistência com a instalação do Python 3, veja [The Hitchhiker's Guide to Python](https://perma.cc/DP2N-B4EN) (em inglês); e +- Instalou o instalador de pacotes `pip`[^2]. + +É possível ter duas versões do Python (2 *e* 3) instaladas no seu computador ao mesmo tempo. Por esta razão, ao aceder ao Python 3, o leitor frequentemente terá que o declarar explicitamente digitando `python3` e `pip3` em vez de simplesmente `python` e `pip`. Consulte os tutoriais do *Programming Historian* sobre a [instalação do Python](/pt/licoes/introducao-instalacao-python) e o [uso do pip](/pt/licoes/instalacao-modulos-python-pip) para mais informações[^3]. + +## O Que o Leitor Pode Aprender a Partir dos Dados de Rede? + +Há muito que as redes interessam aos pesquisadores nas Humanidades, mas muitos académicos recentes progrediram dum interesse grandemente qualitativo e metafórico em links e conexões para um séquito mais formal de ferramentas quantitativas para estudar mediadores, *hubs* (nós importantes) e estruturas interconectadas. Como o sociólogo Mark S. Granovetter apontou no seu importante artigo de maio de 1973 [*The Strength of Weak Ties*](https://perma.cc/A4PC-WPKN) (em inglês), raramente é suficiente notar que duas pessoas estavam conectadas uma à outra. Fatores como a sua relação estrutural com outras pessoas e se essas pessoas adicionais estavam, elas próprias, conectadas umas às outras têm influência decisiva nos eventos. Na medida em que até o mais perspicaz dos académicos tem dificuldade em perceber, digamos, o contorno geral duma rede (a sua "Topologia" de rede) e em identificar os nós mais significativos para conectar grupos, a análise quantitativa de rede oferece aos académicos um modo de transitar relativamente fluidamente entre o objeto social de larga escala (o "grafo") e as particularidades minuciosas das pessoas e laços sociais. + +Este tutorial irá ajudá-lo a responder questões como: +- Qual é a estrutura geral da rede? +- Quem são as pessoas importantes, ou *hubs*, na rede? +- Quais são os subgrupos e comunidades na rede? + + +## O Nosso Exemplo: a Sociedade dos Amigos + +Antes que existissem amigos do Facebook, havia a Sociedade dos Amigos, conhecida como os *quakers*. Fundados na Inglaterra em meados do século XVII, os *quakers* eram cristãos protestantes que divergiram da oficial Igreja da Inglaterra e que promoviam uma ampla tolerância religiosa, preferindo a suposta "luz interior" (*inner light*; **nota de tradução**: este conceito tinha uma extrema importância na Teologia *quaker*) e as consciências dos cristãos à ortodoxia imposta pelo Estado. O número de *quakers* cresceu rapidamente de meados para os finais do século XVII e os seus membros espalharam-se pelas Ilhas Britânicas, pela Europa e pelas colônias do Novo Mundo---especialmente pela Pensilvânia, fundada pelo líder *quaker* William Penn e lar dos quatro autores. + +Visto que os académicos há muito que ligam o crescimento e a persistência dos *quakers* à eficácia das suas redes, os dados usados neste tutorial são uma lista de nomes e relações entre os primevos *quakers* do século XVII. Este *dataset* é derivado do [*Oxford Dictionary of National Biography*](https://www.oxforddnb.com) (em inglês) e do trabalho em progresso do projeto [*Six Degrees of Francis Bacon*](https://www.sixdegreesoffrancisbacon.com) (em inglês), o qual está a reconstruir as redes sociais da Grã-Bretanha moderna (1500-1700). + +# Preparação dos Dados e Instalação do NetworkX + +Antes de iniciar este tutorial, o leitor precisará de fazer o download de dois ficheiros que, combinados, constituem o *dataset* da nossa rede. O ficheiro [quakers_nodelist.csv](/assets/exploring-and-analyzing-network-data-with-python/quakers_nodelist.csv) é uma lista de *quakers* modernos (nós) e o ficheiro [quakers_edgelist.csv](/assets/exploring-and-analyzing-network-data-with-python/quakers_edgelist.csv) é uma lista de relações entre esses *quakers* (*edges*). Para fazer o download destes ficheiros, basta clicar com o botão direito do *mouse* nos *links* e escolher "Guardar ligação como". + +Será extremamente útil ao leitor familiarizar-se com a estrutura do *dataset* antes de continuar. Para mais informações sobre a estrutura geral dos *datasets* de rede, veja [este tutorial](/en/lessons/creating-network-diagrams-from-historical-sources#developing-a-coding-scheme) (em inglês). Quando o leitor abrir o ficheiro de nós no programa da sua escolha, verá que cada *quaker* é primeiramente identificado pelo seu *name* (nome). Cada nó dum *quaker* também tem um número de atributos associados, incluindo *historical significance* (em português, significado histórico), *gender* (em português, género), *birth*/*death dates* (em português, datas de nascimento/morte), e o SDFB ID---um identificador numérico exclusivo que lhe permitirá cruzar nós neste *dataset* com o *dataset* original do *Six Degrees of Francis Bacon*, se desejado. Aqui estão as primeiras linhas: + +``` +Name,Historical Significance,Gender,Birthdate,Deathdate,ID +Joseph Wyeth,religious writer,male,1663,1731,10013191 +Alexander Skene of Newtyle,local politician and author,male,1621,1694,10011149 +James Logan,colonial official and scholar,male,1674,1751,10007567 +Dorcas Erbery,Quaker preacher,female,1656,1659,10003983 +Lilias Skene,Quaker preacher and poet,male,1626,1697,10011152 +``` + +Note que, embora as colunas não estejam corretamente alinhadas como ocorre numa tabela de dados, as vírgulas mantêm tudo apropriadamente separado. + +Quando o leitor abrir o ficheiro de *edges*, verá que nós usamos os *names* do ficheiro de nós para identificar os nós conectados por cada *edge*. Estas *edges* começam num nó ***source*** (em português, origem) e acabam num nó ***target*** (em português, destino). Embora esta linguagem derive das chamadas estruturas de rede **direcionadas**, nós usaremos os nossos dados como uma rede **não direcionada**: se a Pessoa A conhece a Pessoa B, então a Pessoa B também deve conhecer a Pessoa A. Nas redes direcionadas, as relações não precisam de ser recíprocas (a Pessoa A pode enviar uma carta à B sem receber uma em troca), mas nas redes não direcionadas as conexões são sempre recíprocas, ou **simétricas**. Uma vez que esta é uma rede de quem conhecia quem ao invés de, digamos, uma rede epistolar, um conjunto de relações não direcionadas é o mais apropriado. As relações simétricas nas redes não direcionadas são úteis sempre que estiver preocupado com relações que definem o mesmo papel para ambas as partes. Dois amigos têm uma relação simétrica: cada um deles é um amigo do outro. O autor e o destinatário duma carta têm uma relação assimétrica porque cada um tem um papel diferente. Tanto as redes direcionadas como as não direcionadas têm os seus próprios recursos (e, por vezes, as suas próprias métricas), e o leitor quererá escolher aquela que melhor se adapta aos tipos de relações que está a registrar e às questões que quer clarificar. Aqui estão as primeiras *edges* na rede *quaker* não direcionada: + +``` +Source,Target +George Keith,Robert Barclay +George Keith,Benjamin Furly +George Keith,Anne Conway Viscountess Conway and Killultagh +George Keith,Franciscus Mercurius van Helmont +George Keith,William Penn +``` + +Agora que fez o download dos dados *quakers* e viu como estão estruturados, está na hora de começar a trabalhar com esses dados no Python. Assim que tanto o Python como o pip estiverem instalados (ver Pré-Requisitos, acima), quererá instalar o NetworkX, digitando isto na sua [linha de comandos](/en/lessons/intro-to-bash) (em inglês):[^4] + +```python +pip3 install networkx==3.1 +``` + +Uma nota curta sobre controle de versão: este tutorial usa NetworkX 3.1, mas a biblioteca está em desenvolvimento ativo e é atualizada com frequência. Recomendamos usar o comando de instalação acima para garantir que a sua versão do NetworkX corresponde ao código abaixo (em vez de simplesmente instalar a versão mais recente). Se já tiver uma versão mais antiga do NetworkX instalada, execute `pip3 install networkx==3.1 --upgrade` antes de tentar o tutorial[^5]. + +Está feito! Está preparado para começar a codificar. + +# Começando + +## Ler Ficheiros, Importar Dados + +Inicie um novo ficheiro de texto simples, em branco, no mesmo diretório que os seus ficheiros de dados chamado `quaker_network.py` (para mais detalhes sobre a instalação e execução do Python, ver [este tutorial](/pt/licoes/instalacao-windows)). No topo desse ficheiro, importe as bibliotecas de que precisa. O leitor precisará de três bibliotecas---aquela que acabámos de instalar, e duas bibliotecas incorporadas no Python. Pode digitar: + +```python +import csv +from operator import itemgetter +import networkx as nx +from networkx.algorithms import community # Esta parte do NetworkX, para a deteção de comunidades, precisa de ser importada separadamente. +``` + +Agora pode ordenar ao programa para ler os seus ficheiros de CSV e retirar os dados de que precisa. Ironicamente, ler ficheiros e reorganizar os dados geralmente requer um código mais complexo que as funções para executar uma análise de redes sociais, portanto pedimos que tenha paciência connosco ao longo deste primeiro bloco de código. Aqui está um conjunto de comandos para abrir e ler os ficheiros das nossas listas de nós e de *edges*: + +```python +with open('quakers_nodelist.csv', 'r') as nodecsv: # Abra o ficheiro + nodereader = csv.reader(nodecsv) # Leia o CSV + # Retire os dados (usando a list comprehension e a list slicing do Python para remover a linha de cabeçalho, veja a nota de rodapé 6) + nodes = [n for n in nodereader][1:] + +node_names = [n[0] for n in nodes] # Obtenha uma lista apenas dos nomes dos nós + +with open('quakers_edgelist.csv', 'r') as edgecsv: # Abra o ficheiro + edgereader = csv.reader(edgecsv) # Leia o CSV + edges = [tuple(e) for e in edgereader][1:] # Retire os dados +``` + +Este código executa funções similares às [deste tutorial](/pt/licoes/trabalhando-ficheiros-texto-python), mas usa o módulo CSV para carregar os seus nós e *edges*. Mais tarde, o leitor voltará a atuar sobre os dados e obterá mais informação sobre os nós, mas, por agora, precisa de duas coisas: a lista completa de nós e uma lista de pares *edges* (como énuplos de nós)[^6]. Estas são as formas de que o NetworkX precisará para criar um "objeto grafo", um tipo de dados especial do NetworkX sobre o qual o leitor aprenderá na próxima secção. + +Nesta fase, antes de começar a usar o NetworkX, o leitor pode fazer algumas verificações de sanidade básicas para se certificar que os seus dados foram corretamente carregados usando funções e métodos incorporados no Python. Digitando: + +```python +print(len(node_names)) +``` + +e: + +```python +print(len(edges)) +``` + +e, depois, executando o seu *script* lhe mostrará quantos nós e *edges* carregou com sucesso no Python. Se o leitor vir 119 nós e 174 *edges*, então tem todos os dados necessários. + + +## Noções Básicas do NetworkX: Criar o Grafo + +Agora o leitor tem os seus dados como duas listas do Python: uma lista de nós (`node_names`) e uma lista de *edges* (`edges`). No NetworkX, o leitor pode juntar estas duas listas num só objeto rede que compreende como os nós e as *edges* se relacionam. Este objeto é chamado de **Grafo**, referindo-se a um dos termos comuns para dados organizados como uma rede **n.b.**: não se refere a alguma representação visual dos dados. Aqui, grafo é usado puramente num sentido matemático, de análise de rede. Primeiro, o leitor deve *inicializar* um objeto Grafo com o seguinte comando: + +```python +G = nx.Graph() +``` + +> **Nota de tradução**: em inglês, 'gráfico' pode ser traduzido como '*graphic*' ou, de forma diminutiva, como '*graph*', que também pode significar 'grafo', o termo aqui referido. Esta homografia não ocorre no português. + +Isto criará um novo objeto grafo, *G*, com nada nele. Agora, o leitor pode adicionar as suas listas de nós e de *edges* assim: + +```python +G.add_nodes_from(node_names) +G.add_edges_from(edges) +``` + +Esta é uma de várias maneiras de adicionar dados a um objeto rede. O leitor pode verificar a [documentação do NetworkX](https://perma.cc/3QVU-FLPF) (em inglês) para obter mais informações sobre como adicionar *weighted edges*, ou adicionar nós e *edges* uma de cada vez. + +Finalmente, o leitor pode obter informação básica sobre a sua rede recém-criada usando a função `info`: + +```python +print(G) +``` + +A função `info` informa o tipo da sua rede (neste caso, é um objeto Graph padrão) e o número de nós e arestas na mesma. O _output_ deve ser parecido a este: + +``` +Name: +Type: Graph +Number of nodes: 119 +Number of edges: 174 +Average degree: 2.9244 +``` + +Esta é uma forma rápida de obter informação geral sobre o seu grafo, mas como o leitor aprenderá em secções subsequentes, está apenas a passar pela superfície do que o NetworkX lhe pode indicar sobre os seus dados. + +Para recapitular, de momento o seu *script* será semelhante a isto: + +```python +import csv +from operator import itemgetter +import networkx as nx +from networkx.algorithms import community + +# Leia no ficheiro da lista de nós +with open('quakers_nodelist.csv', 'r') as nodecsv: + nodereader = csv.reader(nodecsv) + nodes = [n for n in nodereader][1:] + +# Obtenha uma lista apenas dos nomes dos nós (o primeiro item em cada linha) +node_names = [n[0] for n in nodes] + +# Leia no ficheiro da lista de edges +with open('quakers_edgelist.csv', 'r') as edgecsv: + edgereader = csv.reader(edgecsv) + edges = [tuple(e) for e in edgereader][1:] + +# Obtenha o número de nós e de edges nas nossas duas listas +print(len(node_names)) +print(len(edges)) + +G = nx.Graph() # Inicialize um objeto Grafo +G.add_nodes_from(node_names) # Adicione nós ao Grafo +G.add_edges_from(edges) # Adicione edges ao Grafo +print(G) # Obtenha informação sobre o Grafo +``` + +Até agora, o leitor leu dados de nós e de *edges* no Python a partir de ficheiros CSV, e, depois, contou esses nós e *edges*. Depois disso, o leitor criou um objeto grafo usando o NetworkX e carregou os seus dados para esse objeto. + +## Adicionar Atributos + +Para o NetworkX, um objeto grafo é uma coisa grande (a sua rede) composta por dois tipos de coisas mais pequenas (os seus nós e as suas *edges*). Até agora, o leitor carregou nós e *edges* (como pares de nós), mas o NetworkX permite-lhe adicionar *atributos* tanto aos nós como às *edges*, providenciando mais informação sobre cada um deles. Mais à frente neste tutorial, o leitor executará métricas e adicionará alguns dos resultados de volta ao Grafo como atributos. Por agora, vamos certificar-nos que o seu Grafo contém todos os atributos que estão atualmente no seu CSV. + +O leitor quererá retornar a uma lista que criou no início do seu *script*: `nodes`. Esta lista contém todas as linhas do `quakers_nodelist.csv`, incluindo colunas para o *name*, a *historical significance*, o *gender*, o *birth year*, o *death year* e o SDFB ID. O leitor quererá iterar por esta lista e adicionar esta informação ao nosso grafo. Existem algumas maneiras de fazer isto, mas o NetworkX providencia duas funções convenientes para adicionar atributos a todos os nós e *edges* dum Grafo duma só vez: `nx.set_node_attributes()` e `nx.set_edge_attributes()`. Para usar estas funções, o leitor irá precisar que os seus dados de atributos estejam na forma dum *dicionário* Python, no qual os nomes dos nós são as *chaves* e os atributos que quer adicionar são os *valores*[^7]. O leitor quererá criar um dicionário para cada um dos seus atributos, e, depois, adicioná-los usando as funções acima. A primeira coisa que o leitor deve fazer é criar cinco dicionários em branco, usando chavetas: + +```python +hist_sig_dict = {} +gender_dict = {} +birth_dict = {} +death_dict = {} +id_dict = {} +``` + +Agora nós podemos fazer o *loop* através da nossa lista de `nodes` e adicionar os itens apropriados a cada dicionário. Nós fazemos isto sabendo antecipadamente a posição, ou *índice*, de cada atributo. Porque o nosso ficheiro `quaker_nodelist.csv` está bem organizado, nós sabemos que o *name* da pessoa será sempre o primeiro item no lista: índice 0, visto que começamos sempre a contar do 0 no Python. A *historical significance* da pessoa será o índice 1, o seu *gender* será o índice 2, e assim por diante. Portanto, nós podemos construir os nossos dicionários desta forma[^8]: + +```python +for node in nodes: # Itere pela lista, uma linha de cada vez + hist_sig_dict[node[0]] = node[1] + gender_dict[node[0]] = node[2] + birth_dict[node[0]] = node[3] + death_dict[node[0]] = node[4] + id_dict[node[0]] = node[5] +``` + +Agora o leitor tem um conjunto de dicionários que pode usar para adicionar atributos a nós no seu objeto Grafo. A função `set_node_attributes` toma três variáveis: o Grafo ao qual o leitor está a adicionar o atributo, o dicionário de pares id-atributo, e o nome do novo atributo. O código para adicionar os seus seis atributos assemelha-se a isto: + +```python +nx.set_node_attributes(G, hist_sig_dict, 'historical_significance') +nx.set_node_attributes(G, gender_dict, 'gender') +nx.set_node_attributes(G, birth_dict, 'birth_year') +nx.set_node_attributes(G, death_dict, 'death_year') +nx.set_node_attributes(G, id_dict, 'sdfb_id') +``` + +Agora todos os seus nós têm estes seis atributos, e o leitor pode aceder a eles a qualquer momento. Por exemplo, o leitor pode obter todos os *birth years* dos seus nós iterando por eles e acedendo ao atributo `birth_year`, assim: + +```python +for n in G.nodes(): # Itere por cada nó, entre os nossos dados "n" estará o nome da pessoa + print(n, G.nodes[n]['birth_year']) # Aceda a cada nó pelo seu nome, e, depois, pelo atributo "birth_year" +``` + +A partir desta instrução, o leitor obterá uma linha de *output* para cada nó na rede. Deve parecer-se como uma simples lista de nomes e anos: + +``` +Anne Camm 1627 +Sir Charles Wager 1666 +John Bellers 1654 +Dorcas Erbery 1656 +Mary Pennyman 1630 +Humphrey Woolrich 1633 +John Stubbs 1618 +Richard Hubberthorne 1628 +Robert Barclay 1648 +William Coddington 1601 +``` + +Os passos acima são um método comum para adicionar atributos a nós que o leitor usará repetidamente mais tarde neste tutorial. Aqui está uma recapitulação do bloco de código desta secção: + +```python +# Crie um dicionário em branco para cada atributo +hist_sig_dict = {} +gender_dict = {} +birth_dict = {} +death_dict = {} +id_dict = {} + +for node in nodes: # Itere pela lista de nós, uma linha de cada vez + hist_sig_dict[node[0]] = node[1] # Aceda ao item correto, adicione-o ao dicionário correspondente + gender_dict[node[0]] = node[2] + birth_dict[node[0]] = node[3] + death_dict[node[0]] = node[4] + id_dict[node[0]] = node[5] + +# Adicione cada dicionário como um atributo de nó ao objeto Grafo +nx.set_node_attributes(G, hist_sig_dict, 'historical_significance') +nx.set_node_attributes(G, gender_dict, 'gender') +nx.set_node_attributes(G, birth_dict, 'birth_year') +nx.set_node_attributes(G, death_dict, 'death_year') +nx.set_node_attributes(G, id_dict, 'sdfb_id') + +# Itere por cada nó, para aceder e obter todos os atributos "birth_year" +for n in G.nodes(): + print(n, G.nodes[n]['birth_year']) +``` + +Agora o leitor aprendeu como criar um objeto Grafo e adicionar atributos ao mesmo. Nesta próxima secção, o leitor aprenderá sobre uma variedade de métricas disponíveis no NetworkX e como aceder às mesmas. Mas relaxe, acabou de aprender o maior parte do código de que precisará para o resto do tutorial! + +# Métricas Disponíveis no NetworkX + +Quando o leitor começa a trabalhar num novo *dataset*, é uma boa ideia obter uma visão geral dos dados. A primeira etapa, descrita acima, consiste simplesmente em abrir os ficheiros e ver o que está lá dentro. Porque é uma rede, o leitor sabe que existirão nós e *edges*, mas quantos de cada um existem? Que informação está anexada a cada nó ou *edge*? + +No nosso caso, existem 174 *edges* e 119 nós. Estas *edges* não têm direções (isto é, existe uma relação simétrica entre pessoas), nem incluem informação adicional. Para os nós, nós sabemos os seus *names*, a sua *historical significance*, o seu *genders*, a sua *birth date* e *death date*, e o SDFB ID. + +Estes detalhes informam o que o leitor pode ou devia fazer com o seu *dataset*. Muitos poucos nós (digamos, 15), e uma análise de rede é menos útil que desenhar uma imagem ou fazer algumas leituras; Demasiadas (digamos, 15 milhões), e o leitor deveria considerar começar com um subconjunto ou encontrar um supercomputador. + +As propriedades da rede também guiam a sua análise. Porque esta rede é **não direcionada**, a sua análise tem que usar métricas que exigem *edges* simétricas entre nós. Por exemplo, o leitor pode determinar em que comunidades as pessoas se encontram, mas não pode determinar as rotas *direcionais* pelas quais a informação poderá fluir ao longo da rede (precisaria duma rede direcionada para isso). Ao usar as relações simétricas e não direcionadas neste caso, o leitor será capaz de encontrar subcomunidades e as pessoas que são importantes nessas comunidades, um processo que seria mais difícil (embora ainda que possível) com uma rede direcionada. O NetworkX permite-lhe realizar a maior parte das análises que o leitor pode conceber, mas deve compreender as possibilidades do seu *dataset* e perceber que alguns logaritmos do NetworkX são mais apropriados do que outros. + +### O Formato da Rede + +Após ver a aparência do *dataset*, é importante ver a aparência da *rede*. Estas são coisas diferentes. O *dataset* é uma representação abstrata do que o leitor assume serem conexões entre entidades; a rede é a instanciação específica dessas suposições. A rede, pelo menos neste contexto, é como o computador, lê as conexões que o leitor codificou num *dataset*. A rede tem uma [Topologia](https://perma.cc/8M84-GESG), ou uma forma conectiva, que pode ser centralizada ou descentralizada; densa ou esparsa; cíclica ou linear. Um *dataset* não tem, fora da estrutura da tabela na qual está digitado. + +O formato e as propriedades básicas da rede irão dar-lhe uma ideia sobre com o que está a trabalhar e que análises parecem razoáveis. O leitor já sabe o número de nós e de *edges*, mas a que a rede se 'assemelha'? Os nós agrupam-se, ou estão espalhados de forma regular? Existem estruturas complexas, ou cada nó está organizado numa linha reta? + +A visualização abaixo, criada na ferramenta de visualização de redes [Gephi](https://gephi.org/), lhe dará uma ideia da Topologia desta rede[^9]. O leitor poderia criar um gráfico similar no Palladio usando [este tutorial](/en/lessons/creating-network-diagrams-from-historical-sources) (em inglês). + +{% include figure.html filename="exploring-and-analyzing-network-data-with-python-1.png" alt="Imagem com uma representação de um gráfico de redes" caption="Visualização de rede baseada em força dos dados *quakers*, criado no Gephi." %} + +Existem várias formas de visualizar uma rede, e um [*layout* baseado em força](https://perma.cc/AM7G-BTWV) (em inglês), do qual a imagem acima é um exemplo, encontra-se entre as mais comuns. Grafos baseados em força tentam encontrar o posicionamento ideal para nós com uma calculação baseada na [tensão de cordas segundo a Lei de Hooke](https://perma.cc/2RTL-CYVL) (em inglês), a qual, para grafos mais pequenos, normalmente cria visualizações limpas e de leitura fácil. A visualização embutida acima mostra-lhe que existe um único grande **componente** de nós conectados (no centro) e vários componentes pequenos com apenas uma ou duas conexões nas periferias. Esta é uma estrutura de rede relativamente comum. Sabendo que existem múltiplos componentes na rede irá limitar de forma útil as calculações que o leitor quererá realizar nela. Ao dispor o número de conexões (conhecidas como **grau**, ver abaixo) como o tamanho dos nós, a visualização também mostra que existem alguns nós com muitas conexões que mantêm o componente central intricado. Estes grandes nós são conhecidos como ***hubs***, e o facto de eles aparecem tão claramente aqui dá-lhe uma pista em relação ao que o leitor encontrará quando medir a **centralidade** na próxima secção. + +Visualizações, no entanto, apenas o levam até certo ponto. Com quantas mais redes trabalhar, mais o leitor se aperceberá que a maior parte parece similar o suficiente ao ponto de ser difícil distinguir uma da outra. Métricas quantitativas deixam-no diferenciar redes, aprender sobre as suas Topologias, e tornar uma confusão de nós e *edges* em algo a partir do qual se pode aprender. + +Uma boa métrica com a qual começar é a **densidade** de rede. Isto é, simplesmente, o rácio de *edges* reais na rede face a todas as *edges* possíveis na rede. Numa rede não direcionada como esta, *poderia* haver uma única *edge* entre quaisquer dois nós, mas como o leitor viu na visualização, apenas algumas dessas *edges* possíveis estão realmente presentes. A densidade de rede dá-lhe uma ideia rápida do quão intimamente próxima a sua rede é. + +E as boas notícias são que muitas destas métricas requerem comandos simples e unilineares no Python. Daqui para a frente, o leitor pode continuar a construir o seu bloco de código das secções anteriores. O leitor não tem de apagar nada que já tenha digitado, e porque criou o seu objeto rede `G` no bloco de código acima, todas as métricas a partir daqui devem trabalhar corretamente. + +O leitor pode calcular a densidade da rede executando `nx.density(G)`. No entanto, a melhor maneira de fazer isto é armazenar a sua métrica numa variável para referência futura, e imprimir essa variável, como: + +```python +density = nx.density(G) +print("Network density:", density) +``` + +O *output* da densidade é um número, então é isso que o leitor verá quando imprimir o valor. Neste caso, a densidade da nossa rede é, aproximadamente, 0.0248. Numa escala de 0 a 1, não é uma rede muito densa, o que confere com o que o leitor consegue ver na visualização[^10]. Um 0 significaria que não existem quaisquer conexões de todo, e um 1 indicaria que todas as *edges possíveis* estão presentes (uma rede perfeitamente conectada): esta rede *quaker* está na extremidade inferior dessa escala, mas, mesmo assim, longe do 0. + +Uma medida de caminho mais curta é um pouco mais complexa. Ela calcula a série mais curta possível de nós e *edges* que se situam entre quaisquer dois nós, algo difícil de ver em visualizações de grandes redes. Esta medida corresponde, essencialmente, a encontrar amigos de amigos---se a minha mãe conhece alguém que eu não conheço, então a minha mãe é o caminho mais curto entre mim e essa pessoa. O jogo *Six Degrees of Kevin Bacon*, a partir do qual o [nosso projeto](https://sixdegreesoffrancisbacon.com/) (em inglês) retira o nome, é basicamente um jogo que consiste em encontrar os caminhos mais curtos (com um **comprimento de caminho** de seis ou menos) de Kevin Bacon a qualquer outro ator. + +Para calcular um caminho mais curto, o leitor precisa de passar por várias variáveis de *input* (informação que dá a uma função do Python): o grafo inteiro, o seu nó *source*, e o seu nó *target*. Vamos procurar o caminho mais curto entre Margaret Fell e George Whitehead. Como usámos nomes para identificar unicamente os nossos nós nesta rede, o leitor pode aceder a esses nós (como a ***source*** e o ***target*** do seu caminho) usando os nomes diretamente. + +```python +fell_whitehead_path = nx.shortest_path(G, source="Margaret Fell", target="George Whitehead") + +print("Shortest path between Fell and Whitehead:", fell_whitehead_path) +``` + +Dependendo do tamanho da sua rede, isto pode demorar algum tempo para calcular, visto que o Python primeiro encontra todos os caminhos possíveis e depois escolhe o mais curto. O *output* de `shortest_path` será uma lista dos nós que incluí a "source" (Fell), o "target" (Whitehead), e os nós entre eles. Neste caso, nós podemos ver que o fundador dos *quakers*, George Fox, se encontra no caminho mais curto entre eles. Como Fox é também um ***hub*** (ver centralidade de grau, abaixo) com muitas conexões, nós podemos supor que vários caminhos mais curtos passam por ele como mediador. O que é que isto pode indicar sobre a importância dos fundadores dos *quakers* para a sua rede social? + +O Python incluí várias ferramentas que calculam os caminhos mais curtos. Existem funções para os comprimentos dos caminhos mais curtos, para todos os caminhos mais curtos, e para saber se um caminho existe ou não de todo na [documentação](https://perma.cc/3MJE-7MQQ) (em inglês). O leitor poderia usar uma função separada para encontrar o comprimento do caminho *Fell-Whitehead* que acabámos de calcular, ou poderia simplesmente tomar o comprimento da lista menos um[^11], assim: + +```python +print("Length of that path:", len(fell_whitehead_path)-1) +``` + +Existem muitas métricas de rede derivadas dos comprimentos de caminho mais curtos. Uma tal medida é o **diâmetro**, que é o mais longo de todos os caminhos mais curtos. Depois de calcular todos os caminhos mais curtos entre cada par de nós possível na rede, o diâmetro é o comprimento do caminho entre os dois nós que estão mais afastados. A medida está projetada para lhe dar um senso do tamanho geral da rede, a distância duma extremidade da rede à outra. + +O diâmetro usa um comando simples: `nx.diameter(G)`. No entanto, executar este comando no grafo *quaker* dará uma mensagem de erro indicando que o Grafo não está conectado ("*not connected*"). Isto significa apenas que o seu grafo, como o leitor já viu, tem mais que um componente. Porque existem alguns nós que não têm um caminho de todo com outros, é impossível encontrar todos os caminhos mais curtos. Veja novamente a visualização do seu grafo: + +{% include figure.html filename="exploring-and-analyzing-network-data-with-python-1.png" alt="Imagem com uma representação de um gráfico de redes" caption="Visualização de rede baseada em força dos dados *quakers*, criado no Gephi." %} + +Como não há caminho entre nós dum componente e nós doutro, `nx.diameter()` retorna a mensagem de erro "*not connected*". O leitor pode remediar isto, primeiro, ao descobrir se o seu Grafo está conectado ("*is connected*") (*i.e.* tudo um componente) e, se não conectado, descobrir apenas o componente mais largo e calcular o diâmetro somente desse componente. Aqui está o código: + +```python +# Se o seu Grafo tiver mais do que um componente, isto retornará como 'False' +print(nx.is_connected(G)) + +# A seguir, use nx.connected_components para obter a lista de componentes, +# depois, use o comando max() para encontrar o mais pesado: +components = nx.connected_components(G) +largest_component = max(components, key=len) + +# Crie um 'Subgrafo' apenas com o componente mais pesado, +# depois, calcule o diâmetro do Subgrafo, tal como fez com a densidade. + +subgraph = G.subgraph(largest_component) +diameter = nx.diameter(subgraph) +print("Network diameter of largest component:", diameter) +``` + +Como nós tomámos o componente mais largo, nós podemos assumir que não há nenhum diâmetro mais largo para os outros componentes. Portanto, esta figura é uma boa representação para o diâmetro de todo o Grafo. O diâmetro de rede do componente mais largo desta rede é 8: existe um comprimento de rede de 8 entre os dois nós mais afastados na rede. Ao contrário da densidade, que é apresentada de 0 a 1, é difícil saber a partir deste número somente se 8 é um diâmetro largo ou curto. Para algumas métricas globais, pode ser melhor compará-lo a redes de tamanho e forma similar[^12]. + +O cálculo estrutural final que o leitor fará nesta rede concerne o conceito de **fechamento triádico**. Fechamento triádico supõe que se duas pessoas conhecem a mesma pessoa, elas provavelmente conhecem-se mutuamente. Se Fox conhece tanto Fell como Whitehead, então Fell e Whitehead podem perfeitamente conhecer-se mutuamente, completando um **triângulo** na visualização de três *edges* conectando Fox, Fell e Whitehead. O número destes triângulos fechados na rede pode ser usado para descobrir aglomerados e comunidades de indivíduos que se conhecem todos intimamente. + +Uma forma de medir o fechamento triádico é o chamado **coeficiente de aglomeração** por causa desta tendência aglomeradora, mas a medida estrutural de rede que o leitor aprenderá é conhecida como **transitividade**[^13]. Transitividade é o rácio de todos os triângulos sobre todos os triângulos possíveis. Um triângulo possível existe quando uma pessoa (Fox) conhece duas pessoas (Fell e Whitehead). Então, transitividade, como a densidade, expressa quão interconectado um grafo é em termos dum rácio de conexões reais sobre as possíveis. Lembre-se, medidas como a transitividade e a densidade lidam com *probabilidades* e não com *certezas*. Todos os *outputs* do seu *script* no Python devem ser interpretados, como qualquer outro objeto de pesquisa. A transitividade permite-lhe uma forma de pensar sobre todas as relações no seu grafo que *podem* existir, mas que, atualmente, não existem. + +O leitor pode calcular a transitividade numa só linha, da mesma forma que calculou a densidade: + +```python +triadic_closure = nx.transitivity(G) +print("Triadic closure:", triadic_closure) +``` + +Tal como a densidade, transitividade é numerada de 0 a 1, e o leitor pode ver que a transitividade da rede é de cerca de 0.1694, um valor um pouco mais alto que o da sua densidade de 0.0248. Porque o grafo não é muito denso, existem menos *triângulos possíveis*, o que pode resultar numa transitividade relativamente mais elevada. Isto é, nós que já têm várias conexões farão provavelmente parte destes triângulos fechados. Para suportar isto, o leitor quererá saber mais sobre nós com muitas conexões. + +## Centralidade + +Depois de obter algumas medidas básicas da estrutura da rede inteira, um bom próximo passo é descobrir quais nós são os mais importantes na sua rede. Na análise de redes, medidas da importância dos nós são referidas como medidas de **centralidade**. Porque existem várias maneiras de abordar a questão "Que nós são os mais importantes?", existem várias formas diferentes de calcular a centralidade. Aqui, o leitor aprenderá sobre as três medidas de centralidade mais comuns: o grau, a centralidade de intermediação, e a centralidade adjacente. + +O **grau** é a forma mais simples e comum de encontrar nós importantes. O grau dum nó é a soma das suas *edges*. Se um nó tem três linhas a estenderem-se a outros nós, o seu grau é de três. Cinco *edges*, o seu grau é de cinco. É extremamente simples. Como cada uma dessas edges terá sempre um nó na outra extremidade, o leitor pode pensar no grau como o número de pessoas às quais qualquer pessoa está diretamente conectada. Os nós com os graus mais elevados numa rede social são as pessoas que conhecem mais pessoas. Estes nós são geralmente referidos como ***hubs***, e calcular o grau é a forma mais rápida de identificar os *hubs*. + +Calcular a centralidade para cada nó no NetworkX não é exatamente tão simples como as métricas de toda a rede acima, mas continua a envolver comandos unilineares. Todos os comandos de centralidade que o leitor aprenderá nesta secção produzem dicionários nos quais as chaves são os nós e os valores são as medidas de centralidade. Isto significa que eles estão prontos para adicionar de volta à nossa rede como um atributo de nó, como o leitor fez na última secção. Comece por calcular o grau e adicione-o como um atributo à sua rede. + +```python +degree_dict = dict(G.degree(G.nodes())) +nx.set_node_attributes(G, degree_dict, 'degree') +``` + +O leitor acabou de executar o método `G.degree()` na lista completa de nós na sua rede (`G.nodes()`). Como o leitor adicionou-o como um atributo, agora pode ver o grau de William Penn, bem como com o resto da sua informação se aceder ao seu nó diretamente: + +```python +print(G.nodes['William Penn']) +``` + +Mas estes resultados são úteis para mais do que simplesmente adicionar atributos ao seu objeto Grafo. Como o leitor já está no Python, pode organizar e compará-los. O leitor pode usar a função incorporada `sorted()` para organizar um dicionário com as suas chaves ou valores e encontrar o *top* vinte dos nós por grau. Para fazer isto, o leitor vai precisar de usar `itemgetter`, o qual nós importámos no início do tutorial. Usando `sorted` e `itemgetter`, pode organizar o dicionário de graus assim: + +```python +sorted_degree = sorted(degree_dict.items(), key=itemgetter(1), reverse=True) +``` + +Aqui, há muitas coisas a acontecer nos bastidores, mas concentre-se só nas três variáveis de *input* que o leitor deu a `sorted()`. A primeira é o dicionário, `degree_dict.items()`, que quer organizar. A segunda é o que organizar por: neste caso, item "1" é o segundo item no par, ou o valor do seu dicionário. Finalmente, o leitor diz a `sorted()` para ir em `reverse` para que os nós de grau mais elevado apareçam primeiro na lista resultante. Assim que o leitor tiver criado esta lista organizada, pode iterar por ela e usar a *list slicing*[^6] para obter somente os primeiros 20 nós: + +```python +print("Top 20 nodes by degree:") +for d in sorted_degree[:20]: + print(d) +``` + +Como o leitor pode ver, o grau de Penn é 18, relativamente elevado para esta rede. Mas digitar estas informações de classificação ilustra as limitações do grau como uma medida de centralidade. O leitor provavelmente não precisava que o NetworkX lhe dissesse que William Penn, líder *quaker* e fundador da Pensilvânia, era importante. A maioria das redes sociais terão somente alguns *hubs* de grau muito elevado, com o resto de grau similar e baixo[^14]. O grau pode informá-lo sobre os maiores *hubs*, mas não pode dizer-lhe muito sobre o resto dos nós. E, em muitos casos, esses *hubs* sobre os quaiso está a informar (como o Penn ou como a cofundadora do Quakerismo, Margaret Fell, com um grau de 13) não são especialmente surpreendentes. Neste caso, quase todos os *hubs* são fundadores da religião ou, noutros casos, figuras políticas importantes. + +Felizmente, existem outras medidas de centralidade que lhe podem dizer mais do que só os *hubs*. A [centralidade adjacente](https://perma.cc/VF28-JDCR) (em inglês) é um tipo de extensão do grau---analisa uma combinação dos *edges* dum nó e as *edges* dos vizinhos desse nó. Centralidade adjacente preocupa-se se um nó é um *hub*, mas também se preocupa com quantos *hubs* um nó está conectado. É calculado como um valor de 0 a 1: quanto mais próximo do um, maior a centralidade. A centralidade adjacente é útil para compreender que nós podem obter informação a outros nós rapidamente. Se o leitor conhece muitas pessoas bem-conectadas, poderia espalhar uma mensagem muito eficientemente. Se o leitor usou o Google, então está já mais ou menos familiarizado com a centralidade adjacente. O seu algoritmo de PageRank usa uma extensão desta fórmula para decidir que páginas de internet são colocadas no topo da lista de resultados. + +A [centralidade de intermediação](https://perma.cc/C55J-7XAJ) (em inglês) é um pouco diferente das outras duas calculações na medida em que não se preocupa com o número de *edges* que qualquer nó ou grupo de nós tem. A centralidade de intermediação observa todos os **caminhos mais curtos** que passam por um nó em particular (ver acima). Para fazer isto, tem que primeiro calcular todos os possíveis caminhos mais curtos na sua rede, por isso mantenha em mente que a centralidade de intermediação vai demorar mais tempo para calcular que as outras medidas de centralidade (mas não será um problema num *dataset* desta dimensão). A centralidade de intermediação, que também é expressa numa escala de 0 a 1, é particularmente boa a encontrar nós que conectam duas partes distintas duma rede. Se o leitor é a única coisa conectando dois aglomerados, cada comunicação entre esses aglomerados tem que passar por si. Em contraste com um *hub*, este tipo de nó é regularmente referido como um ***broker***. A centralidade de intermediação não é a única maneira de encontrar *brokerage* (e outros métodos são mais sistemáticos), mas é uma forma rápida de lhe dar uma ideia de quais nós são importantes, não porque têm muitas conexões eles próprios, mas porque eles situam-se *entre* grupos, dando à rede conectividade e coesão. + +Estas duas medidas de centralidade são ainda mais simples de executar que um grau---eles não precisam de receber uma lista de nós, só o grafo `G`. O leitor pode executá-las com estas funções: + +```python +betweenness_dict = nx.betweenness_centrality(G) # Execute a centralidade de intermediação +eigenvector_dict = nx.eigenvector_centrality(G) # Execute a centralidade adjacente + +# Atribua cada a um atributo na sua rede +nx.set_node_attributes(G, betweenness_dict, 'betweenness') +nx.set_node_attributes(G, eigenvector_dict, 'eigenvector') +``` + +O leitor pode organizar a centralidade de intermediação (ou a adjacente) ao mudar os nomes das variáveis no código organizador acima, como: + +```python +sorted_betweenness = sorted(betweenness_dict.items(), key=itemgetter(1), reverse=True) + +print("Top 20 nodes by betweenness centrality:") +for b in sorted_betweenness[:20]: + print(b) +``` + +O leitor notará que muitos, mas não todos, dos nós que têm graus elevados também têm uma centralidade de intermediação alta. De facto, centralidade de intermediação apresenta duas mulheres, Elizabeth Leavens e Mary Penington, cuja importância tinha sido obscurecida pela métrica da centralidade de grau. Uma vantagem de fazer estes cálculos no Python é que o leitor pode rapidamente comparar dois conjuntos de cálculos. E se o leitor quiser saber quais dos nós com alta centralidade de intermediação têm graus baixos? Isto é o mesmo que dizer: quais nós de alta intermediação são inesperados? Pode usar uma combinação da lista organizada acima: + +```python +# Primeiro, obtenha uma lista do top 20 nós por intermediação +top_betweenness = sorted_betweenness[:20] + +# Depois, encontre e obtenha o grau de cada um +for tb in top_betweenness: # Itere por top_betweenness + degree = degree_dict[tb[0]] # Use degree_dict para aceder ao grau dum nó, veja a nota de rodapé 4 + print("Name:", tb[0], "| Betweenness Centrality:", tb[1], "| Degree:", degree) +``` + +O leitor pode confirmar a partir destes resultados que algumas pessoas, como Leavens e Penington, têm alta centralidade de intermediação, mas baixo grau. Isto pode significar que estas mulheres eram *brokers* importantes, conectando partes díspares do grafo. O leitor também pode aprender coisas inesperadas sobre pessoas sobre as quais já se sabe algo---nesta lista, consegue ver que Penn tem um grau inferior ao do fundador *quaker* George Fox, mas uma centralidade de intermediação mais elevada. Isto é o mesmo que dizer, simplesmente conhecer mais pessoas não é tudo. + +Isto aborda somente a superfície do que pode ser feito com métricas de rede no Python. O NetworkX oferece dezenas de funções e medidas para o leitor usar em várias combinações, e pode usar Python para estender estas medidas de formas quase ilimitadas. Uma linguagem de programação como o Python ou o R dar-lhe-á a flexibilidade para explorar a sua rede computacionalmente de formas que outros *interfaces* não podem ao permitir-lhe combinar e comparar os resultados estatísticos da sua rede com outros atributos dos seus dados (como as datas e ocupações que adicionou à rede no início deste tutorial!). + +## Noções Avançadas do NetworkX: Deteção de Comunidades com Modularidade + +Outra coisa regularmente questionada sobre o *dataset* duma rede é quais são os subgrupos e comunidades dentro da estrutura social mais larga. A sua rede é uma família grande e feliz na qual todos se conhecem? Ou é uma coleção de subgrupos mais pequenos que estão conectados por um ou dois intermediários? O campo da deteção de comunidades em redes está desenhado para responder a estas questões. Existem várias formas de calcular comunidades, cliques, e aglomerados na sua rede, mas o método mais popular atualmente é a **modularidade**. A modularidade é uma medida de densidade relativa na sua rede: uma comunidade (chamada um **módulo** ou **classe** modular) tem uma densidade elevada em relação a outros nós dentro do seu módulo, mas densidade baixa com os outros de fora. A modularidade dá-lhe uma pontuação geral de quão fracioanda a sua rede é, e essa pontuação pode ser usada para **repartir** a rede e evidenciar as comunidades individuais[^15]. + +Redes muito densas são geralmente mais difíceis de dividir em repartições sensatas. Felizmente, como o leitor descobriu anteriormente, esta rede não é assim tão densa. Não existem tantas conexões reais quanto conexões possíveis, e existem componentes desconectados de todo. Vale a pena repartir esta rede esparsa com modularidade e ver se os resultados fazem sentido histórico e analítico. + +A deteção e repartição de comunidades no NetworkX requere um pouco mais de configuração do que algumas das outras métricas. Existem algumas abordagens incorporadas para a deteção de comunidades (como o [*minimum cut*](https://perma.cc/K59Y-WZRX) (em inglês)), mas modularidade não vem incluída com o NetworkX. Felizmente, existe um [módulo adicional no Python](https://github.com/taynaud/python-louvain/) (em inglês) que o leitor pode usar com o NetworkX, e que já instalou e importou no início deste tutorial. O leitor pode ler a [documentação completa](https://perma.cc/KW5K-ZX67) (em inglês) para todas as funções que oferece, mas para a maior parte dos propósitos da deteção de comunidades, quererá apenas `best_partition()`: + +```python +communities = community.greedy_modularity_communities(G) +``` + +O método `greedy_modularity_communities()` tenta determinar o número de comunidades apropriadas para o grafo, e agrupa todos os nós em subconjuntos baseados nestas comunidades. Ao contrário das funções de centralidade, o código acima não criará um dicionário. Ao invés, criará uma lista especial de objetos "*frozenset*" (similar a listas). Existe um conjunto para cada grupo, e os conjuntos contêm os nomes das pessoas em cada grupo. Para adicionar esta informação à sua rede na maneira agora familiar, o leitor tem que primeiro criar um dicionário que classifique cada pessoa com um valor numérico para o grupo ao qual pertencem: + +```python +modularity_dict = {} # Crie um dicionário vazio +for i,c in enumerate(communities): # Itere pela lista de comunidades, mantendo em mente o número para a comunidade + for name in c: # Itere por cada pessoa numa comunidade + modularity_dict[name] = i # Crie uma entrada no dicionário para a pessoa, na qual o valor é o grupo ao qual pertence. + +# Agora, o leitor pode adicionar a informação de modularidade como fez com as outras métricas +nx.set_node_attributes(G, modularity_dict, 'modularity') +``` + +Como sempre, o leitor pode combinar estas medidas com outras. Por exemplo, aqui está como encontrar os nós de centralidade adjacente mais elevada na classe modular 0 (a primeira): + +```python +# Primeiro, obtenha uma lista apenas dos nós nessa classe +class0 = [n for n in G.nodes() if G.nodes[n]['modularity'] == 0] + +# Depois, crie um dicionário das centralidades adjacentes desses nós +class0_eigenvector = {n:G.nodes[n]['eigenvector'] for n in class0} + +# Depois, organize esse dicionário e obtenha os primeiros 5 resultados +class0_sorted_by_eigenvector = sorted(class0_eigenvector.items(), key=itemgetter(1), reverse=True) + +print("Modularity Class 0 Sorted by Eigenvector Centrality:") +for node in class0_sorted_by_eigenvector[:5]: + print("Name:", node[0], "| Eigenvector Centrality:", node[1]) +``` + +Usando a centralidade adjacente como um *ranking* pode dar-lhe uma ideia das pessoas importantes nesta classe modular. O leitor notará que algumas destas pessoas, especialmente William Penn, William Bradford (*não* o fundador de Plymouth em que estará a pensar[^16]) e James Logan, passaram muito tempo na América. Também, Bradford e Tace Sowle eram ambos impressores *quakers* proeminentes. Com um pouco de pesquisa, nós podemos descobrir que existem tanto razões geográficas como ocupacionais que explicam que este grupo de pessoas se juntem. Isto é uma indicação de que a modularidade está a trabalhar como esperado. + +Em redes mais pequenas como esta, uma tarefa comum é encontrar e listar todas as classes modulares e seus membros[^17]. O leitor pode fazer isto ao percorrer pela lista `communities`: + +```python +for i,c in enumerate(communities): # Itere pela lista de comunidades + if len(c) > 2: # Filtre as classes modulares com 2 ou menos nós + print('Class '+str(i)+':', list(c)) # Obtenha as classes e os seus membros +``` + +Note no código acima que está a filtrar qualquer classe modular com dois ou menos nós, na linha `if len(c) > 2`. O leitor recordar-se-á da visualização que existiam vários componentes pequenos da rede com apenas dois nós. A modularidade encontrará estes componentes e tratá-los-á como classes separadas (visto que eles não estão conectados a mais nada). Ao filtrá-los, o leitor obtém uma ideia melhor das classes modulares maiores dentro do principal componente da rede. + +Trabalhando só com o NetworkX trá-lo-á longe, e o leitor pode encontrar muito sobre classes modulares apenas ao trabalhar com os dados diretamente. Mas quase sempre quer visualizar os seus dados (e, talvez, expressar a modularidade como a cor de nó). Na próxima secção, o leitor irá aprender como exportar os seus dados do NetworkX para uso noutros programas. + + + +# Exportar Dados + +O NetworkX suporta um grande número de formatos de ficheiros para [exportação de dados](https://perma.cc/X65S-HRCF) (em inglês). Se o leitor quiser exportar uma lista de *edges* em texto simples para carregar no Palladio, existe um [*wrapper* conveniente](https://perma.cc/P9ES-57X3) (em inglês) para isso. Frequentemente, no *Six Degrees of Francis Bacon*, nós exportamos dados do NetworkX no [formato JSON especializado do D3](https://perma.cc/SF8Z-DWPW) (em inglês), para visualização no navegador de internet. O leitor poderia até [exportar](https://perma.cc/Y6QJ-5VM8) (em inglês) o seu grafo como um [*dataframe* do Pandas](https://perma.cc/87NA-KCK4) (em inglês) se existissem operações estatísticas mais avançadas que quisesse executar. Existem várias opções, e se o leitor tiver adicionado diligentemente todas as suas métricas de volta no seu objeto Grafo como atributos, todos os seus dados serão exportados duma só vez. + +A maior parte das opções de exportação funcionam da mesma maneira, por isso, para este tutorial o leitor aprenderá como exportar os seus dados para o formato GEXF do Gephi. Assim que tiver exportado o ficheiro, o leitor pode fazer o *upload* [diretamente para o Gephi](https://gephi.org/users/supported-graph-formats/) (em inglês) para a visualização. + +Exportar dados é, normalmente, um simples comando unilinear. Tudo o que é preciso é escolher um nome de ficheiro. Neste caso, usaremos `quaker_network.gexf`. Para exportar, digite: + +```python +nx.write_gexf(G, 'quaker_network.gexf') +``` + +É só! Quando executar o seu *script* no Python, colocará automaticamente o novo ficheiro GEXF no mesmo diretório que o seu ficheiro Python.[^18] + +# Conclusões + +Agora, tendo realizado e revisto uma panóplia de métricas de rede no Python, o leitor tem as evidências a partir das quais os argumentos se contrõem e se retiram conclusões sobre esta rede de *quakers* na Grã-Bretanha moderna. O leitor sabe, por exemplo, que a rede tem uma **densidade** relativamente baixa, sugerindo associações ténues e/ou dados originais imcompletos. O leitor sabe que a comunidade está organizada em torno de vários ***hubs*** desproporcionalmente grandes, entre eles fundadores da denominação, como Margaret Fell e George Fox, bem como líderes políticos e religiosos importantes, como William Penn. Mais útil, o leitor sabe sobre mulheres com graus relativamente baixos, como Elizabeth Leavens e Mary Penington, que (como resultado de centralidade de intermediação elevada) podem ter agido como ***brokers***, conectando múltiplos grupos. Finalmente, o leitor aprendeu que a rede é feita dum grande **componente** e muitos muito pequenos. No interior desse grande componente, existem várias **comunidades** distintas, algumas das quais parecem organizadas em torno do tempo ou local (como Penn e os seus associados estadunidenses). Por causa dos metadados que adicionou à sua rede, o leitor tem as ferramentas para explorar estas métricas em profundidade e para, potencialmente, explicar alguns dos recursos estruturais que identificou. + +Cada uma destas descobertas é um convite para mais pesquisa ao invés dum ponto final ou prova. A análise de redes é um conjunto de ferramentas para perguntar questões específicas sobre a estrutura das relações num *dataset*, e o NetworkX providencia um interface relativamente simples a muitas das técnicas e métricas comuns. As redes são uma maneira útil de estender a sua pesquisa a um grupo ao providenciar informações sobre a estrutura da comunidade, e nós esperamos que o leitor será inspirado por este tutorial para usar métricas para enriquecer a sua própria pesquisa e para explorar a flexibilidade da análise de redes para além da visualização. + +[^1]: **Nota de tradução**: Como o leitor poderá confirmar mais abaixo, os autores desta lição transformaram os dados aqui analisados num gráfico, sem explicar tal passo, visto que o artigo lida com a análise dos dados, e não com a sua visualização. Se desejar, pode ler também a lição aqui referida e voltar a esta para confirmar se o seu gráfico se assemelha ao dos quatro autores. Aconselhamos que o faça após ter concluído todos os passos aqui descritos. + +[^2]: Em muitos (mas não todos os) casos, `pip` ou `pip3` serão instalados automaticamente com o Python3. + +[^3]: **Nota de tradução**: Isto pode estender-se ao uso de comandos, na sua *shell*, nomeadamente aquando da instalação do pip e de pacotes (ver Preparação dos Dados e Instalação do NetworkX). + +[^4]: Algumas instalações só quererão que o leitor digite `pip` sem "3," mas no Python 3, `pip3` é a mais comum. Se um não funcionar, tente o outro! + +[^5]: **Nota de tradução**: É importante lembrar que existem variações entre as diferentes versões do NetworkX que podem resultar em erros ou outputs diferentes. Tal é o caso da 2.6, com a qual obtivemos uma mensagem de erro durante a avaliação da modularidade e uma resposta diferente com a função print(nx.info(G)) daquela apresentada com a 2.4. + +[^6]: Existem algumas técnicas *pythónicas* que este código usa. A primeira é a 'compreensão de lista' (*list comprehensions*), que incorpora *loops* (`for n in nodes`) para criar novas listas (em parêntesis retos), assim: `new_list = [item for item in old_list]`. A segunda é a *list slicing*, que permite-lhe subdividir ou "*slice*" ("cortar") a lista. A notação da *list slicing* `[1:]` toma tudo *exceto* o primeiro item na lista. O 1 informa o Python para começar com o segundo item nesta lista (no Python, o leitor começa a contar do 0), e os dois pontos dizem ao Python para tomar tudo até ao fim da lista. Como a primeira linha em ambas destas listas é a fila de cabeçalho de cada CSV, nós não queremos que esses cabeçalhos sejam incluídos nos nossos dados. + +[^7]: Dicionários são um tipo de dados incorporados no Python, construídos com pares de chave-valor. Pense numa chave como a palavra-chave num dicionário, e o valor como a sua definição. Chaves têm que ser únicas (só uma de cada por dicionário), mas os valores podem ser qualquer coisa. Dicionários são representados por chavetas, com chaves e valores separados por dois pontos: `{key1:value1, key2:value2, ...}`. Dicionários são uma das maneiras mais rápidas de armazenar valores que o leitor pode necessitar mais tarde. De facto, um objeto Grafo do NetworkX é, ele próprio, feito de dicionários aninhados. + +[^8]: Note que este código usa parêntesis retos de duas formas. Usa números em parêntesis retos para aceder índices específicos numa lista de nós (por exemplo, o ano de nascimento no `node[4]`), mas também para designar uma *chave* (sempre `node[0]`, o ID) a qualquer um dos nossos dicionários vazios: `dictionary[key] = value`. Conveniente! + +[^9]: Por uma questão de simplicidade, removemos quaisquer nós que *não estão conectados a quaisquer outros* do *dataset* antes de termos começado. Isto foi feito simplesmente para reduzir a desordem, mas também é muito comum de se ver muitos destes nós solteiros no seu *dataset* de rede comum. + +[^10]: Mas mantenha em mente que isto é a densidade de *toda* a rede, incluindo esses componentes não conectados a flutuar em órbita. Existem várias conexões possíveis entre e com eles. Se o leitor tivesse tomado a densidade somente do componente maior, poderia ter obtido um número diferente. O leitor poderia fazê-lo ao encontrar o componente mais largo como nós lhe mostramos na próxima secção sobre o **diâmetro**, e, depois, ao executar o mesmo método de densidade somente nesse componente. + +[^11]: Nós tomamos o comprimento da lista *menos um* porque nós queremos o número de *edges* (ou passos) entre os nós listados aqui, ao invés do número de nós. + +[^12]: A forma mais correta de fazer este tipo de comparação é criar *grafos aleatórios* de tamanho idêntico para ver se as métricas diferem da norma. O NetworkX oferece várias ferramentas para [gerar grafos aleatórios](https://perma.cc/7Z4U-KAY7) (em inglês). + +[^13]: Porque se chama transitividade? O leitor pode recordar-se da propriedade transitiva de Geometria das aulas de Matemática no Ensino Secundário: se A=B e B=C, o A deve ser igual a C. Semelhantemente, no fechamento triádico, se a pessoa A conhece a pessoa B e a pessoa B conhece a pessoa C, então a pessoa A provavelmente conhece a pessoa C: logo, transitividade. + +[^14]: Aqueles com experiência em Estatística notarão que grau em redes sociais segue tipicamente uma *lei de potência*, mas isto não é nem pouco usual, nem especialmente útil saber. + +[^15]: Embora não venhamos a cobri-lo neste tutorial, é geralmente boa ideia obter a clasificação modular global primeiro para determinar se o leitor aprenderá qualquer coisa ao repartir a sua rede de acordo com a modularidade. Para ver a classificação geral da modularidade, tome as comunidades que calculou com `communities = community.best_partition(G)` e execute `global_modularity = community.modularity(communities, G)`. E depois basta aplicar `print(global_modularity)`. + +[^16]: **Nota de tradução**: [Plymouth](https://perma.cc/2EKN-TJPW) foi a primeira colónia inglesa permanente na região da Nova Inglaterra, no nordeste dos Estados Unidos da América, tendo sido fundada em 1620 por vários colonos puritanos, entre os quais um tal [William Bradford](https://perma.cc/UA8V-J4CX). Este [outro](https://perma.cc/TW4C-QWUY) referido foi um importante impressor *quaker*. + +[^17]: Em redes grandes, as listas seriam provavelmente ilegivelmente longas, mas o leitor poderia obter uma ideia de todas as classes modulares duma só vez ao visualizar a rede e adicionar cor aos nós baseada na sua classe modular. + +[^18]: Cada formato de ficheiro que é exportável é também importável. Se o leitor tiver um ficheiro GEXF do Gephi que quer pôr no NetworkX, digitaria `G = nx.read_gexf('some_file.gexf')`. diff --git a/pt/licoes/extrair-paginas-ilustradas-com-python.md b/pt/licoes/extrair-paginas-ilustradas-com-python.md index 1436033065..61a24e9cef 100644 --- a/pt/licoes/extrair-paginas-ilustradas-com-python.md +++ b/pt/licoes/extrair-paginas-ilustradas-com-python.md @@ -1,483 +1,483 @@ ---- -title: Extrair Páginas Ilustradas de Bibliotecas Digitais com Python -slug: extrair-paginas-ilustradas-com-python -layout: lesson -date: 2019-01-14 -translation_date: 2023-05-03 -authors: -- Stephen Krewson -reviewers: -- Catherine DeRose -- Taylor Arnold -editors: -- Anandi Silva Knuppel -translator: -- João Domingues Pereira -translation-editor: -- Eric Brasil -translation-reviewer: -- Felipe Lamarca -- Salete Farias -review-ticket: https://github.com/programminghistorian/ph-submissions/issues/447 -difficulty: 2 -activity: acquiring -topics: [api] -abstract: A aprendizagem de máquina e as extensões de API do HathiTrust e do Internet Archive estão a tornar mais fácil a extração de regiões de página com interesse visual de volumes digitalizados. Esta lição mostra como extrair eficientemente essas regiões e, ao fazê-lo, como fomentar novas questões sobre a pesquisa visual. -avatar_alt: Instrumento Científico de Medição -original: extracting-illustrated-pages -doi: 10.46430/phpt0040 ---- - -{% include toc.html %} - -# Visão Geral - -E se só quisesse ver as imagens num livro? Este é um pensamento que já ocorreu tanto a jovens crianças como a pesquisadores adultos. Se soubesse que o livro está disponível através duma biblioteca digital, seria útil fazer o *download* somente das páginas com imagens e ignorar o resto. - -Aqui estão as miniaturas de página dum volume do HathiTrust com o identificador exclusivo `osu.32435078698222`. Após o processo descrito nesta lição, apenas as páginas com imagens (31 no total) foram baixadas como JPEGs para uma pasta. - -{% include figure.html filename="file-explorer-example.png" alt="Imagem com a apresentação das páginas de um livro que contêm imagens" caption="Visualização dum volume para o qual só as páginas com imagens foram baixadas." %} - -Para ver quantas páginas *não ilustradas* foram filtradas, compare com o [conjunto total de miniaturas](https://babel.hathitrust.org/cgi/pt?id=osu.32435078698222;view=thumb;seq=1) para todas as 148 páginas nesta edição revisada de 1845 do livro infantil *bestseller* de Samuel Griswold Goodrich, *The Tales of Peter Parley About America* (1827). - -{% include figure.html filename="parley-full-thumbnails.png" alt="Imagem com a visualização de todas as miniaturas das páginas de um livro" caption="Visualização das miniaturas do HathiTrust para todas as páginas." %} - -Esta lição mostra como completar estas etapas de filtragem e de *download* para volumes de texto em domínio público detidos pelo HathiTrust (HT) e pelo Internet Archive (IA), duas das maiores bibliotecas digitais no mundo. Será do interesse de qualquer um que deseje criar coleções de imagens com o fim de aprender sobre a História da Ilustração e o *layout* (*mise en page*) dos livros. As abordagens visuais à bibliografia digital estão a tornar-se populares, seguindo os esforços pioneiros do [EBBA](https://perma.cc/3QYS-XNSF) e do [Aida](https://perma.cc/SH49-K56K). Projetos recentemente concluídos ou financiados exploram maneiras de [identificar notas de rodapé](https://web.archive.org/web/20190526050917/http://culturalanalytics.org/2018/12/detecting-footnotes-in-32-million-pages-of-ecco/) e de [rastrear notas de margem de página](https://perma.cc/QB4J-55GU), para dar só dois [exemplos](https://perma.cc/9RC2-PJBL). - -A minha própria pesquisa tenta responder a questões empíricas sobre alterações na frequência e modo de ilustração em textos médicos e educacionais do século dezanove. Isto envolve agregar múltiplas imagens por livro e tentar estimar que processo de impressão foi usado para fazer tais imagens. Um caso de uso mais direcionado para a extração de páginas ilustradas pode ser a catalogação de ilustrações ao longo de [diferentes edições](https://perma.cc/2FCU-YW6D) do mesmo livro. Trabalhos futuros poderão investigar com sucesso as características visuais e o *significado* das imagens extraídas: a sua cor, o seu tamanho, o seu tema, o seu género, o número de figuras e assim por diante. - -Como obter informação *localizada* sobre regiões visuais de interesse está para além do âmbito desta lição, visto que o processo envolve uma quantidade significativa de aprendizagem de máquina. No entanto, a classificação sim/não de páginas com (ou sem) imagens é um primeiro passo prático para reduzir o enorme volume de *todas* as páginas para cada livro numa coleção visada, tornando viável a localização de ilustrações. Para dar um ponto de referência, os textos médicos do século dezanove contêm (em média) ilustrações em 1-3% das suas páginas. Se estiver a tentar estudar a ilustração no interior dum *corpus* duma biblioteca digital sobre o qual não tem qualquer informação preexistente, é, consequentemente, razoável assumir que 90+% das páginas nesse *corpus* NÃO estarão ilustradas. - -O HT e o IA permitem que a questão com imagens/sem imagens seja respondida indiretamente através da análise dos dados gerados pelo *software* *optical character recognition* (OCR) ou reconhecimento ótico de caracteres, em português (o OCR é aplicado após um volume físico ser digitalizado com o objetivo de gerar uma transcrição do texto muitas vezes desordenada). Aproveitar o resultado do *output* do OCR para encontrar páginas ilustradas foi proposto primeiramente por Kalev Leetaru numa [colaboração de 2014](https://perma.cc/3J79-4QA6) com o Internet Archive e o Flickr. Esta lição transfere a abordagem de Leetaru para o HathiTrust e tira proveito de bibliotecas de processamento de XML mais rápidas no Python, bem como da gama recentemente ampliada de formatos de ficheiro de imagem do IA. - -Uma vez que o HT e o IA expõem a sua informação derivada do OCR de maneiras ligeiramente diferentes, eu irei adiar a apresentação dos detalhes das "características visuais" de cada biblioteca para as suas secções respetivas. - -# Objetivos - -No final da lição, o leitor será capaz de: - -- Configurar a versão "mínima" da distribuição Anaconda do Python (Miniconda) e criar um ambiente; -- Salvar e iterar sobre uma lista de IDs de volumes do HT ou do IA gerados por uma pesquisa; -- Acessar aos *application programming interfaces* (APIs) ou interfaces de programação de aplicações, em português, de dados do HT e do IA através das bibliotecas do Python; -- Encontrar características visuais ao nível da página; -- Fazer o *download* dos JPEGs de páginas programaticamente. - -O grande objetivo é fortalecer as competências de coleta e exploração de dados ao criar um *corpus* de ilustração histórica. Combinar dados de imagem com os metadados dum volume permite a formulação de questões de pesquisa promissoras sobre a mudança visual ao longo do tempo. - -# Requisitos - -Os requisitos de *software* desta lição são mínimos: o acesso a uma máquina executando um sistema operacional padrão e um navegador de internet. O Miniconda está disponível em duas versões de 32 e de 64 *bits* para Windows, macOS e Linux. O Python 3 é a versão estável atual da linguagem e será suportado indefinidamente[^1]. - -Este tutorial assume um conhecimento básico da linha de comando e da linguagem de programação Python. O leitor deve compreender as convenções para comentários e comandos num tutorial baseado num *shell*. Eu recomendo a [*Introduction to the Bash Command Line*](/en/lessons/intro-to-bash), de Ian Milligan e James Baker, para aprender ou para rever as suas competências com a linha de comando. - -# Configuração - -## Dependências - -Os leitores mais experientes podem querer simplesmente instalar as dependências e executar os *notebooks* nos seus ambientes de escolha. Mais informações sobre a minha própria configuração do Miniconda (e algumas diferenças entre o Windows e o *nix) são providenciadas. - -> **Nota de tradução**: Para instalar as dependências, altere o seu diretório de trabalho para a pasta onde se encontra instalado o Python executando o comando `cd` e, depois, digite o comando `pip install` ou `pip3 install` acompanhado pelas seguintes linhas: - -- `hathitrust-api` ou `hathitrust_api` ([Documentos de Instalação](https://github.com/rlmv/hathitrust-api)); -- `internetarchive` ([Documentos de Instalação](https://archive.org/services/docs/api/internetarchive/)); -- `jupyter` ([Documentos de Instalação](https://jupyter.org/install)); -- `requests` ([Documentos de Instalação](https://requests.readthedocs.io/en/latest/user/install/#install)) [o criador recomenda a instalação do`pipenv`; para a instalação do `pip`, veja [PyPI](https://pypi.org/project/requests/)]. - -## Ficheiros da Lição - -Faça o *download* desta [pasta comprimida](/assets/extracting-illustrated-pages/lesson-files.zip) que contém dois *Jupyter notebooks*, um para cada uma das bibliotecas digitais. A pasta também contém um ficheiro de metadados JSON de amostra descrevendo uma coleção do HathiTrust. Descomprima e confirme que os seguintes ficheiros estão presentes: `554050894-1535834127.json`, `hathitrust.ipynb` e `internetarchive.ipynb`. - -
    -Todos os comandos subsequentes assumem que o seu diretório de trabalho atual é a pasta que contém os ficheiros da lição. -
    - -### Destino do *Download* - -Aqui está o diretório predefinido que será criado assim que todas as células em ambos os *notebooks* tiverem sido executadas (como providenciado). Depois de obter uma lista de quais páginas num volume contêm imagens, as funções de *download* do HT e do IA solicitam essas páginas como JPEGs (nomeadas pelo número de página) e arquivam-nas em subdiretórios (nomeados pelo ID do item). É claro que o leitor pode usar diferentes listas de volumes ou mudar o destino `out_dir` para algo que não `items`. - -``` -items/ -├── hathitrust -│ ├── hvd.32044021161005 -│ │ ├── 103.jpg -│ │ └── ... -│ └── osu.32435078698222 -│ ├── 100.jpg -│ ├── ... -└── internetarchive - └── talespeterparle00goodgoog - ├── 103.jpg - └── ... - -5 diretórios, 113 ficheiros -``` - -As funções de *download* são lentas; se executar os *notebooks* novamente, com o diretório `items` similar ao que se apresenta em cima, qualquer item que já tenha a sua própria subpasta será ignorado. - -## Anaconda (Opcional) - -A Anaconda é a principal distribuição científica do Python. O seu gerenciador de pacotes `conda` permite-lhe instalar bibliotecas como a `numpy` e a `tensorflow` com facilidade. A versão "Miniconda" não é acompanhada por quaisquer pacotes supérfluos pré-instalados, o que incentiva o leitor a manter o seu ambiente de base limpo e a instalar apenas o que necessita para um projeto dentro dum ambiente nomeado. - -Faça o *download* e instale o [Miniconda](https://conda.io/miniconda.html). Escolha a versão estável mais recente do Python 3. Se tudo correr bem, o leitor conseguirá executar `which conda` (no Linux/macOS) ou `where conda` (no Windows) no seu *shell* e ver a localização do programa executável no *output*. - -A Anaconda tem uma [*cheat sheet*](http://web.archive.org/web/20190115051900/https://conda.io/docs/_downloads/conda-cheatsheet.pdf) ou folha de dicas, em português, útil para comandos de uso frequente. - -### Criar um Ambiente - -Os ambientes, entre outras coisas, ajudam a controlar a complexidade associada ao uso de múltiplos gerenciadores de pacotes em conjunto. Nem todas as bibliotecas do Python podem ser instaladas através do `conda`. Em alguns casos, nós recorreremos ao gestor de pacote padrão do Python, o `pip` (ou alterações planejadas, como o `pipenv`). No entanto, quando o fizermos, nós usaremos uma versão do `pip` instalada através do `conda`. Isto mantém todos os pacotes que nós precisamos para o projeto no mesmo espaço virtual. - -```bash -# O seu ambiente atual é precedido por um asterisco -# (será a "base" num novo shell) -conda env list - -# Pacotes instalados no ambiente atual -conda list -``` - -Agora nós criamos um ambiente nomeado, configuramo-lo para usar Python 3, e ativamo-lo. - -```bash -# Note a sinalização "--name", que toma um argumento de string (e.g. "extract-pages") -# e a sintaxe para especificar a versão do Python -conda create --name extract-pages python=3 - -# Indique o novo ambiente (no Linux/macOS) -source activate extract-pages -``` - -```bash -# O comando do Windows para ativar o ambiente é ligeiramente diferente -conda activate extract-pages -``` - -Para sair dum ambiente, execute `source deactivate` no Linux/macOS ou `deactivate` no Windows. Mas certifique-se que permanece no ambiente `extract-pages` durante o decorrer da lição! - -### Instalar os Pacotes do Conda - -Nós podemos usar o `conda` para instalar os nossos primeiros pacotes. Todos os outros pacotes necessários (gzip, JSON, os, sys e time) fazem parte da [biblioteca padrão do Python](https://docs.python.org/3/library/). Note como nós precisamos de especificar um canal em alguns casos. O leitor pode pesquisar por pacotes no [Anaconda Cloud](https://anaconda.org/). - - -```bash -# Para garantir que nós temos uma versão local do pip (veja a discussão em baixo) -conda install pip - -conda install jupyter - -conda install --channel anaconda requests -``` - -O Jupyter tem muitas dependências (outros pacotes dos quais depende), por isso esta etapa pode exigir alguns minutos. Recorde-se que quando o `conda` lhe pergunta se deseja continuar com a instalação por via da questão `Proceed ([y]/n)?`, o leitor deve digitar um `y` ou um `yes` e, depois, pressionar *Enter* para aceitar a instalação do pacote. - -
    -Nos bastidores, o conda está a trabalhar para certificar-se que todos os pacotes e dependências necessários serão instalados numa maneira compatível. -
    - -### Instalar Pacotes do Pip - -Se estiver a usar um ambiente `conda`, é melhor usar a versão local do `pip`. Confirme que os seguintes comandos dão como resultado do *output* um programa cujo caminho absoluto contém algo como `/Miniconda/envs/extract-pages/Scripts/pip`. - -```bash -which pip -``` - -```bash -# O equivalente do Windows ao "which" -where pip -``` - -Se vir duas versões do `pip` no *output* em cima, certifique-se de digitar o caminho absoluto para a versão do ambiente *local* ao instalar as bibliotecas *wrapper* da API. - -```bash -pip install hathitrust-api -pip install internetarchive -``` - -```bash -# Exemplo do Windows usando o caminho absoluto para o executável do pip local -C:\Users\stephen-krewson\Miniconda\envs\extract-pages\Scripts\pip.exe install hathitrust-api internetarchive -# Substitua "stephen-krewson" pelo seu nome de utilizador -``` - -## *Jupyter Notebooks* - -O [*Text Mining in Python Through the HTRC Feature Reader*](/en/lessons/text-mining-with-extracted-features#start-a-notebook), de Peter Organisciak e Boris Capitanu, explica os benefícios dos *notebooks* para o desenvolvimento e a exploração de dados. Também contém informação útil sobre como executar eficazmente as células. Visto que nós instalámos a versão minimalista da Anaconda, nós precisamos de iniciar o Jupyter a partir da linha de comandos. No seu *shell* (a partir do interior da pasta contendo os ficheiros da lição) execute `jupyter notebook`. - -Isto executará o servidor do *notebook* no seu *shell* e iniciará o seu navegador de internet predefinido com a página inicial do Jupyter[^2]. A página inicial mostra todos os ficheiros no diretório de trabalho atual. - -{% include figure.html filename="jupyter-home.png" alt="Imagem com a apresentação da estrutura de ficheiros da página inicial do Jupyter" caption="A página inicial do Jupyter mostrando os ficheiros da lição." %} - -
    -No seu shell, certifique-se que usou o comando cd para ir até ao diretório descomprimido lesson-files. -
    - -Clique nos *notebooks* `hathitrust.ipynb` e `internetarchive.ipynb` para abri-los em novas abas do navegador de internet. A partir daqui, nós não precisamos de executar qualquer comando no *shell*. Os *notebooks* permitem-nos executar o código Python e ter acesso total ao sistema de pastas do computador. Quando o leitor tiver terminado, pode parar o servidor do *notebook* carregando em "*Quit*" na página inicial do Jupyter ou executando `ctrl+c` no *shell*. - -# HathiTrust - -## Acesso à API - -O leitor precisa efetuar um registro no HathiTrust antes de usar o API de dados. Dirija-se ao [portal de registro](https://babel.hathitrust.org/cgi/kgs/request) e preencha o seu nome, a sua organização e o seu e-mail para requerer chaves de acesso. O leitor deverá receber uma resposta no e-mail dentro de cerca dum minuto (**nota de tradução**: verifique também a caixa de *spam*). Clique no link, que o trará a uma página temporária com ambas as chaves exibidas. - -No *notebook* `hathitrust.ipynb`, examine a primeira célula (mostrada em baixo). Preencha as suas chaves da API como indicado. Depois, execute a célula clicando em "*Run*" na barra de navegação do *notebook*. - -```python -# Importe o wrapper da API de dados do HT -from hathitrust_api import DataAPI - -# Substitua as strings com as suas credenciais do HT (deixando as aspas) -ht_access_key = "YOUR_ACCESS_KEY_HERE" -ht_secret_key = "YOUR_SECRET_KEY_HERE" - -# Instancie o objeto de conexão da API de dados -data_api = DataAPI(ht_access_key, ht_secret_key) -``` - -
    -Cuidado! Não exponha as suas chaves de acesso através dum repositório público no GitHub (ou outro host de controle de versões). Elas serão pesquisáveis por qualquer outra pessoa. Uma boa prática para um projeto Python é a de armazenar as suas chaves de acesso como variáveis de ambiente ou salvá-las num ficheiro que não é versionado. -
    - -## Criar uma Lista de Volumes - -O HT permite a qualquer um fazer uma coleção de itens—o leitor nem sequer tem que estar na sua conta! No entanto, o leitor deveria registrar uma conta se quiser salvar a sua lista de volumes. Siga as [instruções](https://babel.hathitrust.org/cgi/mb?colltype=updated) para fazer algumas pesquisas no texto completo e para, depois, adicionar resultados escolhidos a uma coleção. Atualmente, o HathiTrust não tem uma API de pesquisa pública para adquirir volumes programaticamente; o leitor precisa de pesquisar através da sua *interface* da internet. - -Ao atualizar uma coleção, o HT mantém o rastro dos metadados associados para cada item nela. Eu incluí nos ficheiros da lição os metadados para uma lição de amostra no formato JSON. Se quisesse usar o ficheiro da sua própria coleção do HT, o leitor navegaria até à página das suas coleções e colocaria o cursor do *mouse* sobre o link dos metadados à esquerda para revelar a opção para fazer o *download* como JSON, como observado na seguinte captura de tela. - -{% include figure.html filename="download-ht-json.png" alt="Imagem de uma página web do site HathiTrust com instruções para download de metadados de ficheiros JSON" caption="Captura de tela de como fazer o *download* dos metadados de coleções no formato JSON." %} - -Assim que o leitor tiver feito o *download* do ficheiro JSON, basta movê-lo para o diretório onde colocou os *Jupyter notebooks*. Substitua o nome do ficheiro JSON no *notebook* do HT com o nome do ficheiro da sua coleção. - -O *notebook* mostra como usar *list comprehension* para obter todas as *strings* `htitem_id` dentro do objeto `gathers` que contem todas as informações da coleção. - -```python -# O leitor pode especificar o ficheiro de metadados da sua coleção aqui -metadata_path = "554050894-1535834127.json" - -with open(metadata_path, "r") as fp: - data = json.load(fp) - -# Uma lista de todas as IDs exclusivas na coleção -vol_ids = [item['htitem_id'] for item in data['gathers']] -``` - -
    -Os tutoriais normalmente mostram-lhe como processar um item de exemplo (muitas vezes de tamanho ou complexidade trivial). Isto é pedagogicamente conveniente, mas significa que o leitor está menos equipado para aplicar esse código a múltiplos itens—de longe o caso de uso mais comum. Nos notebooks, o leitor verá como encapsular transformações aplicadas a um item em funções que podem ser usadas num loop sobre uma coleção de itens. -
    - -## Característica Visual: IMAGE_ON_PAGE - -Dada uma lista de volumes, nós queremos explorar que características visuais eles têm ao nível da página. A [documentação mais recente](https://perma.cc/Y6UU-G9HZ) (2015) para o API de dados descreve um objeto metadados chamado `htd:pfeat` nas páginas 9-10. `htd:pfeat` é a abreviação para "HathiTrust Data API: Page Features". - -> * `htd:pfeat`­ - the page feature key (if available): -> - CHAPTER_START -> - COPYRIGHT -> - FIRST_CONTENT_CHAPTER_START -> - FRONT_COVER -> - INDEX -> - REFERENCES -> - TABLE_OF_CONTENTS -> - TITLE - -O que o *wrapper* `hathitrust-api` faz é disponibilizar os metadados completos para um volume do HT como um objeto Python. Dado o identificador dum volume, nós podemos pedir os seus metadados e, depois, fazer o *drill down* através da *sequência* de páginas até à informação ao nível da página. A *lista* `htd:pfeat` está associada com cada página num volume e, em teoria, contém todas as características que se aplicam a essa página. Na prática, existem mais algumas *tags* de características do que as oito listadas em cima. Aquela com a qual nós iremos trabalhar chama-se `IMAGE_ON_PAGE` e é mais abstratamente visual que *tags* estruturais como `CHAPTER_START`. - -Tom Burton-West, um bibliotecário pesquisador na biblioteca da *University of Michigan*, trabalha em estreita colaboração com o HathiTrust e o HTRC, o Centro de Pesquisa do HathiTrust. O Tom disse-me por e-mail que o HathiTrust recebe a informação `htd:pfeat` via o Google, com o qual trabalham proximamente desde a fundação do HT, em 2008. Um contacto no Google deu permissão ao Tom para partilhar o seguinte: - -> Estas *tags* são derivadas duma combinação de Heurística, de aprendizagem de máquina e de anotação humana. - -Um exemplo heurístico pode ser o facto do primeiro elemento na sequência de páginas do volume ser quase sempre a `FRONT_COVER`. A aprendizagem de máquina pode ser usada para treinar modelos a discriminar, digamos, entre dados de imagem que são mais típicos das linhas de prosa numa escrita ocidental ou das linhas numa gravura. A anotação humana é a atribuição manual de etiquetas a imagens. A habilidade de ver as ilustrações dum volume nos bancos de dados do EEBO e do ECCO é um exemplo de anotação humana. - -O uso da "aprendizagem de máquina" pelo Google parece um pouco misterioso. Até o Google publicitar os seus métodos, é impossível saber todos os detalhes. No entanto, é provável que as *tags* `IMAGE_ON_PAGE` tenham sido propostas pela primeira vez após a deteção de blocos de "Imagens" nos ficheiros de _output_ do OCR (um processo discutido em baixo, na secção do Internet Archive). Mais filtragem pode, então, ser aplicada. - -## Passo a Passo Para o Código - -### Encontrar as imagens - -Nós vimos como criar uma lista de volumes e observámos que a API de dados pode ser usada para obter objetos metadados contendo características experimentais ao nível da página. A função essencial no *notebook* do HT tem a assinatura digital `ht_picture_download(item_id, out_dir=None)`. Dado um identificador exclusivo e um diretório de destino opcional, esta função irá, em primeiro lugar, obter os metadados do volume a partir da API e convertê-los num formato JSON. Depois, percorre a sequência de páginas e verifica se a *tag* `IMAGE_ON_PAGE` está na lista `htd:pfeat` (se a mesma existir). - -```python -# Metadados da API no formato JSON (diferente dos metadados da coleção do HT) -meta = json.loads(data_api.getmeta(item_id, json=True)) - -# A sequência dá-nos cada página do item digitalizado em ordem, com qualquer -# informação adicional que lhe pode estar disponível -sequence = meta['htd:seqmap'][0]['htd:seq'] - -# A lista de páginas com imagens (vazio para a iniciação) -img_pages = [] - -# O bloco try/except lida com situações onde nenhuma "pfeats" existe OU -# os números da sequência não são numéricos -for page in sequence: - try: - if 'IMAGE_ON_PAGE' in page['htd:pfeat']: - img_pages.append(int(page['pseq'])) - except (KeyError, TypeError) as e: - continue -``` - -Note que nós precisamos de fazer o *drill down* por vários níveis até ao objeto do nível de topo para obter o objeto `htd:seq`, sobre o qual nós podemos iterar. - -As duas exceções que eu quero evitar são o `KeyError`, que ocorre quando a página não tem qualquer característica ao nível da página a si associada, e o `TypeError`, que ocorre quando o campo `pseq` para a página é, por alguma razão, não numérico e, portanto, não pode ser destinado a um `int`. Se algo correr mal com uma página, nós simplesmente executamos `continue` para passar à próxima. O plano é obter todos os dados bons que conseguirmos. Não é limpar inconsistências ou falhas nos metadados do item. - -### Fazer o *Download* das Imagens - -Assim que `img_pages` contém a lista completa de páginas com a *tag* `IMAGE_ON_PAGE`, nós podemos fazer o download dessas páginas. Note que, se nenhum `out_dir` for fornecido a `ht_picture_download()`, então a função simplesmente retorna a lista `img_pages` e NÃO faz o *download* do quer que seja. - -A chamada da API `getpageimage()` retorna um JPEG por predefinição. Nós simplesmente colocamos os bytes do JPEG num ficheiro na forma normal. Dentro da subpasta do volume (ela própria dentro do `out_dir`), as páginas serão nomeadas `1.jpg` para a página 1 e assim sucessivamente. - -Uma coisa a considerar é a nossa taxa de uso da API. Nós não queremos abusar do nosso acesso ao fazer centenas de pedidos por minuto. Para estar a salvo, especialmente se pretendermos executar grandes trabalhos, nós esperamos dois segundos antes de fazer cada pedido de página. Isto pode ser frustrante a curto prazo, mas ajuda a evitar o sufocamento ou a suspenção da API. - - -```python -for i, page in enumerate(img_pages): - try: - # Uma simples mensagem de estado - print("[{}] Downloading page {} ({}/{})".format(item_id, page, i+1, total_pages)) - - img = data_api.getpageimage(item_id, page) - - # N.B.: O loop só é executado se "out_dir" não for "None" - img_out = os.path.join(out_dir, str(page) + ".jpg") - - # Escreva a imagem - with open(img_out, 'wb') as fp: - fp.write(img) - - # Para evitar exceder o uso da API permitido - time.sleep(2) - - except Exception as e: - print("[{}] Error downloading page {}: {}".format(item_id, page,e)) -``` - -# Internet Archive - -## Acesso à API - -Nós conectamos à biblioteca API do Python usando uma conta no Archive.org com e-mail e palavra-chave ao invés das chaves de acesso do API. Isto é discutido no [Guia Quickstart](https://archive.org/services/docs/api/internetarchive/quickstart.html). Se não tiver uma conta, [registre-se](https://archive.org/account/login.createaccount.php) para obter o seu "Virtual Library Card". - -Na primeira célula do *notebook* `internetarchive.ipynb`, introduza as suas credenciais como indicado. Execute a célula para autenticar-se perante a API. - -> **Nota de tradução**: O comando `ia.configure(ia_email, ia_password)` é atualmente desnecessário e pode gerar um erro extenso, em cuja mensagem final consta: `InvalidURL: Invalid URL 'https:///services/xauthn/': No host supplied`. Sugerimos que o mesmo não seja executado no ficheiro IPYNB. - -## Criar uma Lista de Volumes - -A biblioteca IA do Python permite-lhe submeter *query strings* e receber uma lista de pares chave-valor correspondentes na qual a palavra "*identifier*", ou identificador, em português, é a chave e o verdadeiro identificador é o valor. A sintaxe para uma *query* é explicada na [página de Advanced Search](https://archive.org/advancedsearch.php) para o IA. O leitor pode especificar parâmetros ao usar uma palavra-chave como "*date*" ou "*mediatype*" seguida de dois pontos e o valor que quer atribuir a esse parâmetro. Por exemplo, eu só quero resultados que são *textos* (em oposição a vídeos, *etc.*). Certifique-se que os parâmetros e as opções que está a tentar usar são suportadas pela funcionalidade de pesquisa do IA. Caso contrário, pode perder ou obter resultados estranhos e não saber porquê. - -No *notebook*, eu gero uma lista de IDs do IA com o seguinte código: - -```python -# Uma pesquisa de amostra (deve gerar dois resultados) -query = "peter parley date:[1825 TO 1830] mediatype:texts" -vol_ids = [result['identifier'] for result in ia.search_items(query)] -``` - -## Característica Visual: Blocos de Imagens - -O Internet Archive não apresenta quaisquer características ao nível da página. Ao invés, disponibiliza um certo número de ficheiros brutos do processo de digitalização aos utilizadores. O mais importante destes para os nossos propósitos é o ficheiro XML Abbyy. Abbyy é uma empresa russa cujo *software* FineReader domina o mercado do OCR. - -Todas as versões recentes do FineReader produzem um [documento XML](https://perma.cc/83EK-LXP2) que associa diferentes "blocos" com cada página no documento digitalizado. O tipo de bloco mais comum é `Text` mas também existem blocos `Picture` ou "Imagem", em português. Aqui está um bloco de exemplo tirado dum ficheiro de XML Abbyy do IA. Os cantos superior esquerdo ("t" e "l") e inferior direito ("b" e "r") são suficientes para identificar a região de bloco retangular. - -```xml - - - -``` - -O equivalente no IA a ver as *tags* `IMAGE_ON_PAGE` no HT é a análise do ficheiro XML Abbyy e a iteração sobre cada página. Se existir pelo menos um bloco `Picture` nessa página, a página é sinalizada como possivelmente contendo uma imagem. - -Enquanto a característica `IMAGE_ON_PAGE` do HT não contém informação sobre a *localização* dessa imagem, os blocos `Picture` no ficheiro XML estão associados a uma região retangular na página. No entanto, porque o FineReader se especializa no reconhecimento de letras de conjuntos de caracteres ocidentais, é muito menos preciso a identificar regiões de imagem. O projeto de Leetaru (veja *Visão Geral*) usou as coordenadas da região para cortar imagens, mas nesta lição nós iremos simplesmente fazer o *download* da página inteira. - -Parte da diversão intelectual desta lição é usar um *dataset* (*tags* de bloco do OCR) por vezes confuso para um propósito largamente não intencional: identificar imagens e não palavras. A certa altura, tornar-se-á computacionalmente viável executar modelos de aprendizagem aprofundada em todas as páginas ilustradas nuas num volume e escolher o(s) tipo(s) de imagem(/ns) desejada(s). Mas, como a maior parte das páginas na maioria dos volumes não são ilustradas, esta é uma tarefa dispendiosa. Por agora, faz mais sentido aproveitar os dados existentes que nós detemos do processo de ingestão do OCR. - -Para mais informações sobre como o próprio OCR funciona e interage com o processo de digitalização, por favor, veja a lição do *PH* de Mila Oiva, [OCR With Tesseract and ScanTailor](/en/lessons/retired/OCR-with-Tesseract-and-ScanTailor) (atenção que esta lição já não é actualizada). Erros podem surgir por causa de distorções, artefactos e muitos outros problemas. Estes erros acabam por afetar a fiabilidade e a precisão dos blocos "Picture". Em muitos casos, o Abbyy estimará que páginas em branco ou descoloridas são, na realidade, imagens. Estas *tags* de bloco incorretas, ainda que indesejadas, podem ser combatidas com o uso de redes neurais convolucionais retreinadas. Pense nas páginas com imagens cujo download foi feito nesta lição como um primeiro passo num processo mais longo para obter um *dataset* limpo e útil de ilustrações históricas. - -## Passo a Passo do Código - -### Encontrar as Imagens - -Tal como com o HT, a função principal para o IA é `ia_picture_download(item_id, out_dir=None)`. - -Visto que envolve o I/O dum ficheiro, o processo para obter a lista `img_pages` é mais complicado do que o do HT. Usando a utilidade `ia` (que é instalada com a biblioteca) da linha de comando, o leitor pode obter uma ideia dos ficheiros de metadados disponíveis para um volume. Com muitas poucas exceções, um ficheiro com o formato "Abbyy GZ" deveria estar disponível para volumes com o tipo de *media* `text` no Internet Archive. - -Estes ficheiros, mesmo quando comprimidos, podem facilmente ter centenas de megabytes de tamanho! Se existir um ficheiro Abbyy para o volume, nós obtemos o seu nome e depois fazemos o *download*. A chamada `ia.download()` usa alguns parâmetros úteis para ignorar a solicitação se o ficheiro já existe e, se não, para fazer o seu *download* sem criar um diretório aninhado. Para salvar espaço, nós eliminamos o ficheiro Abbyy depois de o analisar. - -```python -# Use o cliente da linha de comandos para ver os formatos de metadados disponíveis: -# `ia metadata formats VOLUME_ID` - -# Para esta lição, só o ficheiro Abbyy é necessário -returned_files = list(ia.get_files(item_id, formats=["Abbyy GZ"])) - -# Certifique-se de que algo é devolvido -if len(returned_files) > 0: - abbyy_file = returned_files[0].name -else: - print("[{}] Could not get Abbyy file".format(item_id)) - return None - -# Faça o download do ficheiro Abbyy para o CWD -ia.download(item_id, formats=["Abbyy GZ"], ignore_existing=True, \ - destdir=os.getcwd(), no_directory=True) -``` - -Assim que nós tivermos o ficheiro, nós precisamos de analisar o XML usando a biblioteca padrão do Python. Nós tomamos vantagem do facto de que nós podemos abrir o ficheiro comprimido diretamente com a biblioteca `gzip`. Os ficheiros Abbyy são indexadas a partir do zero, por isso a primeira página na sequência digitalizada tem o índice de 0. No entanto, nós temos que filtrar 0 porque não pode ser exigido do IA. A exclusão do índice 0 por parte do IA não está documentada em qualquer lugar; em vez disso, eu descobri através de tentativa e erro. Se o leitor ver uma mensagem de erro de explicação difícil, tente rastrear a origem e não tenha medo em pedir ajuda, seja a alguém com experiência relevante, seja a alguém da própria organização. - -```python -# Colecione as páginas com pelo menos um bloco de imagem -img_pages = [] - -with gzip.open(abbyy_file) as fp: - tree = ET.parse(fp) - document = tree.getroot() - for i, page in enumerate(document): - for block in page: - try: - if block.attrib['blockType'] == 'Picture': - img_pages.append(i) - break - except KeyError: - continue - -# 0 não é uma página válida para a realização de solicitações GET ao IA, mas às vezes -# está no ficheiro Abbyy comprimido -img_pages = [page for page in img_pages if page > 0] - -# Acompanhe o progresso do download -total_pages = len(img_pages) - -# Os ficheiros do OCR são pesados, por isso elimine assim que tivermos a lista de páginas -os.remove(abbyy_file) -``` - -### Fazer o *Download* das Imagens - -O *wrapper* do IA incorporado no Python não providencia uma função de download de páginas únicas—apenas em massa. Isto significa que nós usaremos a RESTful API do IA para obter páginas específicas. Primeiro, nós construímos um URL para cada página de que nós precisamos. Depois, nós usamos a biblioteca `requests` para enviar uma solicitação `GET` de HTTP e, se tudo correr bem (*i.e.* o código 200 é enviado na resposta), nós escrevemos o conteúdo da resposta num ficheiro JPEG. - -O IA tem estado a trabalhar numa [versão *alpha*](https://perma.cc/F6HJ-YGM7) duma API para o corte e redimensionamento de imagens que obedeça às exigências do International Image Interoperability Framework ([IIIF](https://perma.cc/7ABF-GGJM)). O IIIF representa uma profunda melhoria face ao antigo método para *downloads* de páginas únicas que requeriam a realização do *download* de ficheiros JP2, um formato de ficheiro largamente não suportado. Agora, é extremamente simples obter um só JPEG duma página: - -```python -# Veja: https://iiif.archivelab.org/iiif/documentation -urls = ["https://iiif.archivelab.org/iiif/{}${}/full/full/0/default.jpg".format(item_id, page) - for page in img_pages] - -# Sem download de página direto a partir da biblioteca do Python, construa uma solicitação GET -for i, page, url in zip(range(1,total_pages), img_pages, urls): - - rsp = requests.get(url, allow_redirects=True) - - if rsp.status_code == 200: - print("[{}] Downloading page {} ({}/{})".format(item_id, \ - page, i+1, total_pages)) - - with open(os.path.join(out_dir, str(page) + ".jpg"), "wb") as fp: - fp.write(rsp.content) -``` - -# Próximos Passos - -Assim que o leitor tiver entendido as principais funções e o código de *unpacking* dos dados nos *notebooks*, sinta-se livre para executar as células em sequência ou carregar em "*Run All*" e ver as páginas ilustradas a entrar nas pastas. O leitor é encorajado a adaptar estes *scripts* e funções para as suas próprias questões de pesquisa. - -[^1]: **Nota de tradução**: Aconselhamos o leitor a adicionar o Python ao PATH, processo que pode ser feito na ocasião da sua instalação. Isto irá suavizar a incorporação das dependências (veja *Dependências*). - -[^2]: **Nota de tradução**: Inicialmente, aparece uma página de transição, a qual deverá remeter rapidamente para o Jupyter. Caso tal não aconteça, basta seguir as instruções nesta página. +--- +title: Extrair Páginas Ilustradas de Bibliotecas Digitais com Python +slug: extrair-paginas-ilustradas-com-python +layout: lesson +date: 2019-01-14 +translation_date: 2023-05-03 +authors: +- Stephen Krewson +reviewers: +- Catherine DeRose +- Taylor Arnold +editors: +- Anandi Silva Knuppel +translator: +- João Domingues Pereira +translation-editor: +- Eric Brasil +translation-reviewer: +- Felipe Lamarca +- Salete Farias +review-ticket: https://github.com/programminghistorian/ph-submissions/issues/447 +difficulty: 2 +activity: acquiring +topics: [api] +abstract: A aprendizagem de máquina e as extensões de API do HathiTrust e do Internet Archive estão a tornar mais fácil a extração de regiões de página com interesse visual de volumes digitalizados. Esta lição mostra como extrair eficientemente essas regiões e, ao fazê-lo, como fomentar novas questões sobre a pesquisa visual. +avatar_alt: Instrumento Científico de Medição +original: extracting-illustrated-pages +doi: 10.46430/phpt0040 +--- + +{% include toc.html %} + +# Visão Geral + +E se só quisesse ver as imagens num livro? Este é um pensamento que já ocorreu tanto a jovens crianças como a pesquisadores adultos. Se soubesse que o livro está disponível através duma biblioteca digital, seria útil fazer o *download* somente das páginas com imagens e ignorar o resto. + +Aqui estão as miniaturas de página dum volume do HathiTrust com o identificador exclusivo `osu.32435078698222`. Após o processo descrito nesta lição, apenas as páginas com imagens (31 no total) foram baixadas como JPEGs para uma pasta. + +{% include figure.html filename="file-explorer-example.png" alt="Imagem com a apresentação das páginas de um livro que contêm imagens" caption="Visualização dum volume para o qual só as páginas com imagens foram baixadas." %} + +Para ver quantas páginas *não ilustradas* foram filtradas, compare com o [conjunto total de miniaturas](https://babel.hathitrust.org/cgi/pt?id=osu.32435078698222;view=thumb;seq=1) para todas as 148 páginas nesta edição revisada de 1845 do livro infantil *bestseller* de Samuel Griswold Goodrich, *The Tales of Peter Parley About America* (1827). + +{% include figure.html filename="parley-full-thumbnails.png" alt="Imagem com a visualização de todas as miniaturas das páginas de um livro" caption="Visualização das miniaturas do HathiTrust para todas as páginas." %} + +Esta lição mostra como completar estas etapas de filtragem e de *download* para volumes de texto em domínio público detidos pelo HathiTrust (HT) e pelo Internet Archive (IA), duas das maiores bibliotecas digitais no mundo. Será do interesse de qualquer um que deseje criar coleções de imagens com o fim de aprender sobre a História da Ilustração e o *layout* (*mise en page*) dos livros. As abordagens visuais à bibliografia digital estão a tornar-se populares, seguindo os esforços pioneiros do [EBBA](https://perma.cc/3QYS-XNSF) e do [Aida](https://perma.cc/SH49-K56K). Projetos recentemente concluídos ou financiados exploram maneiras de [identificar notas de rodapé](https://web.archive.org/web/20190526050917/https://culturalanalytics.org/2018/12/detecting-footnotes-in-32-million-pages-of-ecco/) e de [rastrear notas de margem de página](https://perma.cc/QB4J-55GU), para dar só dois [exemplos](https://perma.cc/9RC2-PJBL). + +A minha própria pesquisa tenta responder a questões empíricas sobre alterações na frequência e modo de ilustração em textos médicos e educacionais do século dezanove. Isto envolve agregar múltiplas imagens por livro e tentar estimar que processo de impressão foi usado para fazer tais imagens. Um caso de uso mais direcionado para a extração de páginas ilustradas pode ser a catalogação de ilustrações ao longo de [diferentes edições](https://perma.cc/2FCU-YW6D) do mesmo livro. Trabalhos futuros poderão investigar com sucesso as características visuais e o *significado* das imagens extraídas: a sua cor, o seu tamanho, o seu tema, o seu género, o número de figuras e assim por diante. + +Como obter informação *localizada* sobre regiões visuais de interesse está para além do âmbito desta lição, visto que o processo envolve uma quantidade significativa de aprendizagem de máquina. No entanto, a classificação sim/não de páginas com (ou sem) imagens é um primeiro passo prático para reduzir o enorme volume de *todas* as páginas para cada livro numa coleção visada, tornando viável a localização de ilustrações. Para dar um ponto de referência, os textos médicos do século dezanove contêm (em média) ilustrações em 1-3% das suas páginas. Se estiver a tentar estudar a ilustração no interior dum *corpus* duma biblioteca digital sobre o qual não tem qualquer informação preexistente, é, consequentemente, razoável assumir que 90+% das páginas nesse *corpus* NÃO estarão ilustradas. + +O HT e o IA permitem que a questão com imagens/sem imagens seja respondida indiretamente através da análise dos dados gerados pelo *software* *optical character recognition* (OCR) ou reconhecimento ótico de caracteres, em português (o OCR é aplicado após um volume físico ser digitalizado com o objetivo de gerar uma transcrição do texto muitas vezes desordenada). Aproveitar o resultado do *output* do OCR para encontrar páginas ilustradas foi proposto primeiramente por Kalev Leetaru numa [colaboração de 2014](https://perma.cc/3J79-4QA6) com o Internet Archive e o Flickr. Esta lição transfere a abordagem de Leetaru para o HathiTrust e tira proveito de bibliotecas de processamento de XML mais rápidas no Python, bem como da gama recentemente ampliada de formatos de ficheiro de imagem do IA. + +Uma vez que o HT e o IA expõem a sua informação derivada do OCR de maneiras ligeiramente diferentes, eu irei adiar a apresentação dos detalhes das "características visuais" de cada biblioteca para as suas secções respetivas. + +# Objetivos + +No final da lição, o leitor será capaz de: + +- Configurar a versão "mínima" da distribuição Anaconda do Python (Miniconda) e criar um ambiente; +- Salvar e iterar sobre uma lista de IDs de volumes do HT ou do IA gerados por uma pesquisa; +- Acessar aos *application programming interfaces* (APIs) ou interfaces de programação de aplicações, em português, de dados do HT e do IA através das bibliotecas do Python; +- Encontrar características visuais ao nível da página; +- Fazer o *download* dos JPEGs de páginas programaticamente. + +O grande objetivo é fortalecer as competências de coleta e exploração de dados ao criar um *corpus* de ilustração histórica. Combinar dados de imagem com os metadados dum volume permite a formulação de questões de pesquisa promissoras sobre a mudança visual ao longo do tempo. + +# Requisitos + +Os requisitos de *software* desta lição são mínimos: o acesso a uma máquina executando um sistema operacional padrão e um navegador de internet. O Miniconda está disponível em duas versões de 32 e de 64 *bits* para Windows, macOS e Linux. O Python 3 é a versão estável atual da linguagem e será suportado indefinidamente[^1]. + +Este tutorial assume um conhecimento básico da linha de comando e da linguagem de programação Python. O leitor deve compreender as convenções para comentários e comandos num tutorial baseado num *shell*. Eu recomendo a [*Introduction to the Bash Command Line*](/en/lessons/intro-to-bash), de Ian Milligan e James Baker, para aprender ou para rever as suas competências com a linha de comando. + +# Configuração + +## Dependências + +Os leitores mais experientes podem querer simplesmente instalar as dependências e executar os *notebooks* nos seus ambientes de escolha. Mais informações sobre a minha própria configuração do Miniconda (e algumas diferenças entre o Windows e o *nix) são providenciadas. + +> **Nota de tradução**: Para instalar as dependências, altere o seu diretório de trabalho para a pasta onde se encontra instalado o Python executando o comando `cd` e, depois, digite o comando `pip install` ou `pip3 install` acompanhado pelas seguintes linhas: + +- `hathitrust-api` ou `hathitrust_api` ([Documentos de Instalação](https://github.com/rlmv/hathitrust-api)); +- `internetarchive` ([Documentos de Instalação](https://archive.org/services/docs/api/internetarchive/)); +- `jupyter` ([Documentos de Instalação](https://jupyter.org/install)); +- `requests` ([Documentos de Instalação](https://requests.readthedocs.io/en/latest/user/install/#install)) [o criador recomenda a instalação do`pipenv`; para a instalação do `pip`, veja [PyPI](https://pypi.org/project/requests/)]. + +## Ficheiros da Lição + +Faça o *download* desta [pasta comprimida](/assets/extracting-illustrated-pages/lesson-files.zip) que contém dois *Jupyter notebooks*, um para cada uma das bibliotecas digitais. A pasta também contém um ficheiro de metadados JSON de amostra descrevendo uma coleção do HathiTrust. Descomprima e confirme que os seguintes ficheiros estão presentes: `554050894-1535834127.json`, `hathitrust.ipynb` e `internetarchive.ipynb`. + +
    +Todos os comandos subsequentes assumem que o seu diretório de trabalho atual é a pasta que contém os ficheiros da lição. +
    + +### Destino do *Download* + +Aqui está o diretório predefinido que será criado assim que todas as células em ambos os *notebooks* tiverem sido executadas (como providenciado). Depois de obter uma lista de quais páginas num volume contêm imagens, as funções de *download* do HT e do IA solicitam essas páginas como JPEGs (nomeadas pelo número de página) e arquivam-nas em subdiretórios (nomeados pelo ID do item). É claro que o leitor pode usar diferentes listas de volumes ou mudar o destino `out_dir` para algo que não `items`. + +``` +items/ +├── hathitrust +│ ├── hvd.32044021161005 +│ │ ├── 103.jpg +│ │ └── ... +│ └── osu.32435078698222 +│ ├── 100.jpg +│ ├── ... +└── internetarchive + └── talespeterparle00goodgoog + ├── 103.jpg + └── ... + +5 diretórios, 113 ficheiros +``` + +As funções de *download* são lentas; se executar os *notebooks* novamente, com o diretório `items` similar ao que se apresenta em cima, qualquer item que já tenha a sua própria subpasta será ignorado. + +## Anaconda (Opcional) + +A Anaconda é a principal distribuição científica do Python. O seu gerenciador de pacotes `conda` permite-lhe instalar bibliotecas como a `numpy` e a `tensorflow` com facilidade. A versão "Miniconda" não é acompanhada por quaisquer pacotes supérfluos pré-instalados, o que incentiva o leitor a manter o seu ambiente de base limpo e a instalar apenas o que necessita para um projeto dentro dum ambiente nomeado. + +Faça o *download* e instale o [Miniconda](https://conda.io/miniconda.html). Escolha a versão estável mais recente do Python 3. Se tudo correr bem, o leitor conseguirá executar `which conda` (no Linux/macOS) ou `where conda` (no Windows) no seu *shell* e ver a localização do programa executável no *output*. + +A Anaconda tem uma [*cheat sheet*](https://web.archive.org/web/20190115051900/https://conda.io/docs/_downloads/conda-cheatsheet.pdf) ou folha de dicas, em português, útil para comandos de uso frequente. + +### Criar um Ambiente + +Os ambientes, entre outras coisas, ajudam a controlar a complexidade associada ao uso de múltiplos gerenciadores de pacotes em conjunto. Nem todas as bibliotecas do Python podem ser instaladas através do `conda`. Em alguns casos, nós recorreremos ao gestor de pacote padrão do Python, o `pip` (ou alterações planejadas, como o `pipenv`). No entanto, quando o fizermos, nós usaremos uma versão do `pip` instalada através do `conda`. Isto mantém todos os pacotes que nós precisamos para o projeto no mesmo espaço virtual. + +```bash +# O seu ambiente atual é precedido por um asterisco +# (será a "base" num novo shell) +conda env list + +# Pacotes instalados no ambiente atual +conda list +``` + +Agora nós criamos um ambiente nomeado, configuramo-lo para usar Python 3, e ativamo-lo. + +```bash +# Note a sinalização "--name", que toma um argumento de string (e.g. "extract-pages") +# e a sintaxe para especificar a versão do Python +conda create --name extract-pages python=3 + +# Indique o novo ambiente (no Linux/macOS) +source activate extract-pages +``` + +```bash +# O comando do Windows para ativar o ambiente é ligeiramente diferente +conda activate extract-pages +``` + +Para sair dum ambiente, execute `source deactivate` no Linux/macOS ou `deactivate` no Windows. Mas certifique-se que permanece no ambiente `extract-pages` durante o decorrer da lição! + +### Instalar os Pacotes do Conda + +Nós podemos usar o `conda` para instalar os nossos primeiros pacotes. Todos os outros pacotes necessários (gzip, JSON, os, sys e time) fazem parte da [biblioteca padrão do Python](https://docs.python.org/3/library/). Note como nós precisamos de especificar um canal em alguns casos. O leitor pode pesquisar por pacotes no [Anaconda Cloud](https://anaconda.org/). + + +```bash +# Para garantir que nós temos uma versão local do pip (veja a discussão em baixo) +conda install pip + +conda install jupyter + +conda install --channel anaconda requests +``` + +O Jupyter tem muitas dependências (outros pacotes dos quais depende), por isso esta etapa pode exigir alguns minutos. Recorde-se que quando o `conda` lhe pergunta se deseja continuar com a instalação por via da questão `Proceed ([y]/n)?`, o leitor deve digitar um `y` ou um `yes` e, depois, pressionar *Enter* para aceitar a instalação do pacote. + +
    +Nos bastidores, o conda está a trabalhar para certificar-se que todos os pacotes e dependências necessários serão instalados numa maneira compatível. +
    + +### Instalar Pacotes do Pip + +Se estiver a usar um ambiente `conda`, é melhor usar a versão local do `pip`. Confirme que os seguintes comandos dão como resultado do *output* um programa cujo caminho absoluto contém algo como `/Miniconda/envs/extract-pages/Scripts/pip`. + +```bash +which pip +``` + +```bash +# O equivalente do Windows ao "which" +where pip +``` + +Se vir duas versões do `pip` no *output* em cima, certifique-se de digitar o caminho absoluto para a versão do ambiente *local* ao instalar as bibliotecas *wrapper* da API. + +```bash +pip install hathitrust-api +pip install internetarchive +``` + +```bash +# Exemplo do Windows usando o caminho absoluto para o executável do pip local +C:\Users\stephen-krewson\Miniconda\envs\extract-pages\Scripts\pip.exe install hathitrust-api internetarchive +# Substitua "stephen-krewson" pelo seu nome de utilizador +``` + +## *Jupyter Notebooks* + +O [*Text Mining in Python Through the HTRC Feature Reader*](/en/lessons/text-mining-with-extracted-features#start-a-notebook), de Peter Organisciak e Boris Capitanu, explica os benefícios dos *notebooks* para o desenvolvimento e a exploração de dados. Também contém informação útil sobre como executar eficazmente as células. Visto que nós instalámos a versão minimalista da Anaconda, nós precisamos de iniciar o Jupyter a partir da linha de comandos. No seu *shell* (a partir do interior da pasta contendo os ficheiros da lição) execute `jupyter notebook`. + +Isto executará o servidor do *notebook* no seu *shell* e iniciará o seu navegador de internet predefinido com a página inicial do Jupyter[^2]. A página inicial mostra todos os ficheiros no diretório de trabalho atual. + +{% include figure.html filename="jupyter-home.png" alt="Imagem com a apresentação da estrutura de ficheiros da página inicial do Jupyter" caption="A página inicial do Jupyter mostrando os ficheiros da lição." %} + +
    +No seu shell, certifique-se que usou o comando cd para ir até ao diretório descomprimido lesson-files. +
    + +Clique nos *notebooks* `hathitrust.ipynb` e `internetarchive.ipynb` para abri-los em novas abas do navegador de internet. A partir daqui, nós não precisamos de executar qualquer comando no *shell*. Os *notebooks* permitem-nos executar o código Python e ter acesso total ao sistema de pastas do computador. Quando o leitor tiver terminado, pode parar o servidor do *notebook* carregando em "*Quit*" na página inicial do Jupyter ou executando `ctrl+c` no *shell*. + +# HathiTrust + +## Acesso à API + +O leitor precisa efetuar um registro no HathiTrust antes de usar o API de dados. Dirija-se ao [portal de registro](https://babel.hathitrust.org/cgi/kgs/request) e preencha o seu nome, a sua organização e o seu e-mail para requerer chaves de acesso. O leitor deverá receber uma resposta no e-mail dentro de cerca dum minuto (**nota de tradução**: verifique também a caixa de *spam*). Clique no link, que o trará a uma página temporária com ambas as chaves exibidas. + +No *notebook* `hathitrust.ipynb`, examine a primeira célula (mostrada em baixo). Preencha as suas chaves da API como indicado. Depois, execute a célula clicando em "*Run*" na barra de navegação do *notebook*. + +```python +# Importe o wrapper da API de dados do HT +from hathitrust_api import DataAPI + +# Substitua as strings com as suas credenciais do HT (deixando as aspas) +ht_access_key = "YOUR_ACCESS_KEY_HERE" +ht_secret_key = "YOUR_SECRET_KEY_HERE" + +# Instancie o objeto de conexão da API de dados +data_api = DataAPI(ht_access_key, ht_secret_key) +``` + +
    +Cuidado! Não exponha as suas chaves de acesso através dum repositório público no GitHub (ou outro host de controle de versões). Elas serão pesquisáveis por qualquer outra pessoa. Uma boa prática para um projeto Python é a de armazenar as suas chaves de acesso como variáveis de ambiente ou salvá-las num ficheiro que não é versionado. +
    + +## Criar uma Lista de Volumes + +O HT permite a qualquer um fazer uma coleção de itens—o leitor nem sequer tem que estar na sua conta! No entanto, o leitor deveria registrar uma conta se quiser salvar a sua lista de volumes. Siga as [instruções](https://babel.hathitrust.org/cgi/mb?colltype=updated) para fazer algumas pesquisas no texto completo e para, depois, adicionar resultados escolhidos a uma coleção. Atualmente, o HathiTrust não tem uma API de pesquisa pública para adquirir volumes programaticamente; o leitor precisa de pesquisar através da sua *interface* da internet. + +Ao atualizar uma coleção, o HT mantém o rastro dos metadados associados para cada item nela. Eu incluí nos ficheiros da lição os metadados para uma lição de amostra no formato JSON. Se quisesse usar o ficheiro da sua própria coleção do HT, o leitor navegaria até à página das suas coleções e colocaria o cursor do *mouse* sobre o link dos metadados à esquerda para revelar a opção para fazer o *download* como JSON, como observado na seguinte captura de tela. + +{% include figure.html filename="download-ht-json.png" alt="Imagem de uma página web do site HathiTrust com instruções para download de metadados de ficheiros JSON" caption="Captura de tela de como fazer o *download* dos metadados de coleções no formato JSON." %} + +Assim que o leitor tiver feito o *download* do ficheiro JSON, basta movê-lo para o diretório onde colocou os *Jupyter notebooks*. Substitua o nome do ficheiro JSON no *notebook* do HT com o nome do ficheiro da sua coleção. + +O *notebook* mostra como usar *list comprehension* para obter todas as *strings* `htitem_id` dentro do objeto `gathers` que contem todas as informações da coleção. + +```python +# O leitor pode especificar o ficheiro de metadados da sua coleção aqui +metadata_path = "554050894-1535834127.json" + +with open(metadata_path, "r") as fp: + data = json.load(fp) + +# Uma lista de todas as IDs exclusivas na coleção +vol_ids = [item['htitem_id'] for item in data['gathers']] +``` + +
    +Os tutoriais normalmente mostram-lhe como processar um item de exemplo (muitas vezes de tamanho ou complexidade trivial). Isto é pedagogicamente conveniente, mas significa que o leitor está menos equipado para aplicar esse código a múltiplos itens—de longe o caso de uso mais comum. Nos notebooks, o leitor verá como encapsular transformações aplicadas a um item em funções que podem ser usadas num loop sobre uma coleção de itens. +
    + +## Característica Visual: IMAGE_ON_PAGE + +Dada uma lista de volumes, nós queremos explorar que características visuais eles têm ao nível da página. A [documentação mais recente](https://perma.cc/Y6UU-G9HZ) (2015) para o API de dados descreve um objeto metadados chamado `htd:pfeat` nas páginas 9-10. `htd:pfeat` é a abreviação para "HathiTrust Data API: Page Features". + +> * `htd:pfeat`­ - the page feature key (if available): +> - CHAPTER_START +> - COPYRIGHT +> - FIRST_CONTENT_CHAPTER_START +> - FRONT_COVER +> - INDEX +> - REFERENCES +> - TABLE_OF_CONTENTS +> - TITLE + +O que o *wrapper* `hathitrust-api` faz é disponibilizar os metadados completos para um volume do HT como um objeto Python. Dado o identificador dum volume, nós podemos pedir os seus metadados e, depois, fazer o *drill down* através da *sequência* de páginas até à informação ao nível da página. A *lista* `htd:pfeat` está associada com cada página num volume e, em teoria, contém todas as características que se aplicam a essa página. Na prática, existem mais algumas *tags* de características do que as oito listadas em cima. Aquela com a qual nós iremos trabalhar chama-se `IMAGE_ON_PAGE` e é mais abstratamente visual que *tags* estruturais como `CHAPTER_START`. + +Tom Burton-West, um bibliotecário pesquisador na biblioteca da *University of Michigan*, trabalha em estreita colaboração com o HathiTrust e o HTRC, o Centro de Pesquisa do HathiTrust. O Tom disse-me por e-mail que o HathiTrust recebe a informação `htd:pfeat` via o Google, com o qual trabalham proximamente desde a fundação do HT, em 2008. Um contacto no Google deu permissão ao Tom para partilhar o seguinte: + +> Estas *tags* são derivadas duma combinação de Heurística, de aprendizagem de máquina e de anotação humana. + +Um exemplo heurístico pode ser o facto do primeiro elemento na sequência de páginas do volume ser quase sempre a `FRONT_COVER`. A aprendizagem de máquina pode ser usada para treinar modelos a discriminar, digamos, entre dados de imagem que são mais típicos das linhas de prosa numa escrita ocidental ou das linhas numa gravura. A anotação humana é a atribuição manual de etiquetas a imagens. A habilidade de ver as ilustrações dum volume nos bancos de dados do EEBO e do ECCO é um exemplo de anotação humana. + +O uso da "aprendizagem de máquina" pelo Google parece um pouco misterioso. Até o Google publicitar os seus métodos, é impossível saber todos os detalhes. No entanto, é provável que as *tags* `IMAGE_ON_PAGE` tenham sido propostas pela primeira vez após a deteção de blocos de "Imagens" nos ficheiros de _output_ do OCR (um processo discutido em baixo, na secção do Internet Archive). Mais filtragem pode, então, ser aplicada. + +## Passo a Passo Para o Código + +### Encontrar as imagens + +Nós vimos como criar uma lista de volumes e observámos que a API de dados pode ser usada para obter objetos metadados contendo características experimentais ao nível da página. A função essencial no *notebook* do HT tem a assinatura digital `ht_picture_download(item_id, out_dir=None)`. Dado um identificador exclusivo e um diretório de destino opcional, esta função irá, em primeiro lugar, obter os metadados do volume a partir da API e convertê-los num formato JSON. Depois, percorre a sequência de páginas e verifica se a *tag* `IMAGE_ON_PAGE` está na lista `htd:pfeat` (se a mesma existir). + +```python +# Metadados da API no formato JSON (diferente dos metadados da coleção do HT) +meta = json.loads(data_api.getmeta(item_id, json=True)) + +# A sequência dá-nos cada página do item digitalizado em ordem, com qualquer +# informação adicional que lhe pode estar disponível +sequence = meta['htd:seqmap'][0]['htd:seq'] + +# A lista de páginas com imagens (vazio para a iniciação) +img_pages = [] + +# O bloco try/except lida com situações onde nenhuma "pfeats" existe OU +# os números da sequência não são numéricos +for page in sequence: + try: + if 'IMAGE_ON_PAGE' in page['htd:pfeat']: + img_pages.append(int(page['pseq'])) + except (KeyError, TypeError) as e: + continue +``` + +Note que nós precisamos de fazer o *drill down* por vários níveis até ao objeto do nível de topo para obter o objeto `htd:seq`, sobre o qual nós podemos iterar. + +As duas exceções que eu quero evitar são o `KeyError`, que ocorre quando a página não tem qualquer característica ao nível da página a si associada, e o `TypeError`, que ocorre quando o campo `pseq` para a página é, por alguma razão, não numérico e, portanto, não pode ser destinado a um `int`. Se algo correr mal com uma página, nós simplesmente executamos `continue` para passar à próxima. O plano é obter todos os dados bons que conseguirmos. Não é limpar inconsistências ou falhas nos metadados do item. + +### Fazer o *Download* das Imagens + +Assim que `img_pages` contém a lista completa de páginas com a *tag* `IMAGE_ON_PAGE`, nós podemos fazer o download dessas páginas. Note que, se nenhum `out_dir` for fornecido a `ht_picture_download()`, então a função simplesmente retorna a lista `img_pages` e NÃO faz o *download* do quer que seja. + +A chamada da API `getpageimage()` retorna um JPEG por predefinição. Nós simplesmente colocamos os bytes do JPEG num ficheiro na forma normal. Dentro da subpasta do volume (ela própria dentro do `out_dir`), as páginas serão nomeadas `1.jpg` para a página 1 e assim sucessivamente. + +Uma coisa a considerar é a nossa taxa de uso da API. Nós não queremos abusar do nosso acesso ao fazer centenas de pedidos por minuto. Para estar a salvo, especialmente se pretendermos executar grandes trabalhos, nós esperamos dois segundos antes de fazer cada pedido de página. Isto pode ser frustrante a curto prazo, mas ajuda a evitar o sufocamento ou a suspenção da API. + + +```python +for i, page in enumerate(img_pages): + try: + # Uma simples mensagem de estado + print("[{}] Downloading page {} ({}/{})".format(item_id, page, i+1, total_pages)) + + img = data_api.getpageimage(item_id, page) + + # N.B.: O loop só é executado se "out_dir" não for "None" + img_out = os.path.join(out_dir, str(page) + ".jpg") + + # Escreva a imagem + with open(img_out, 'wb') as fp: + fp.write(img) + + # Para evitar exceder o uso da API permitido + time.sleep(2) + + except Exception as e: + print("[{}] Error downloading page {}: {}".format(item_id, page,e)) +``` + +# Internet Archive + +## Acesso à API + +Nós conectamos à biblioteca API do Python usando uma conta no Archive.org com e-mail e palavra-chave ao invés das chaves de acesso do API. Isto é discutido no [Guia Quickstart](https://archive.org/services/docs/api/internetarchive/quickstart.html). Se não tiver uma conta, [registre-se](https://archive.org/account/login.createaccount.php) para obter o seu "Virtual Library Card". + +Na primeira célula do *notebook* `internetarchive.ipynb`, introduza as suas credenciais como indicado. Execute a célula para autenticar-se perante a API. + +> **Nota de tradução**: O comando `ia.configure(ia_email, ia_password)` é atualmente desnecessário e pode gerar um erro extenso, em cuja mensagem final consta: `InvalidURL: Invalid URL 'https:///services/xauthn/': No host supplied`. Sugerimos que o mesmo não seja executado no ficheiro IPYNB. + +## Criar uma Lista de Volumes + +A biblioteca IA do Python permite-lhe submeter *query strings* e receber uma lista de pares chave-valor correspondentes na qual a palavra "*identifier*", ou identificador, em português, é a chave e o verdadeiro identificador é o valor. A sintaxe para uma *query* é explicada na [página de Advanced Search](https://archive.org/advancedsearch.php) para o IA. O leitor pode especificar parâmetros ao usar uma palavra-chave como "*date*" ou "*mediatype*" seguida de dois pontos e o valor que quer atribuir a esse parâmetro. Por exemplo, eu só quero resultados que são *textos* (em oposição a vídeos, *etc.*). Certifique-se que os parâmetros e as opções que está a tentar usar são suportadas pela funcionalidade de pesquisa do IA. Caso contrário, pode perder ou obter resultados estranhos e não saber porquê. + +No *notebook*, eu gero uma lista de IDs do IA com o seguinte código: + +```python +# Uma pesquisa de amostra (deve gerar dois resultados) +query = "peter parley date:[1825 TO 1830] mediatype:texts" +vol_ids = [result['identifier'] for result in ia.search_items(query)] +``` + +## Característica Visual: Blocos de Imagens + +O Internet Archive não apresenta quaisquer características ao nível da página. Ao invés, disponibiliza um certo número de ficheiros brutos do processo de digitalização aos utilizadores. O mais importante destes para os nossos propósitos é o ficheiro XML Abbyy. Abbyy é uma empresa russa cujo *software* FineReader domina o mercado do OCR. + +Todas as versões recentes do FineReader produzem um [documento XML](https://perma.cc/83EK-LXP2) que associa diferentes "blocos" com cada página no documento digitalizado. O tipo de bloco mais comum é `Text` mas também existem blocos `Picture` ou "Imagem", em português. Aqui está um bloco de exemplo tirado dum ficheiro de XML Abbyy do IA. Os cantos superior esquerdo ("t" e "l") e inferior direito ("b" e "r") são suficientes para identificar a região de bloco retangular. + +```xml + + + +``` + +O equivalente no IA a ver as *tags* `IMAGE_ON_PAGE` no HT é a análise do ficheiro XML Abbyy e a iteração sobre cada página. Se existir pelo menos um bloco `Picture` nessa página, a página é sinalizada como possivelmente contendo uma imagem. + +Enquanto a característica `IMAGE_ON_PAGE` do HT não contém informação sobre a *localização* dessa imagem, os blocos `Picture` no ficheiro XML estão associados a uma região retangular na página. No entanto, porque o FineReader se especializa no reconhecimento de letras de conjuntos de caracteres ocidentais, é muito menos preciso a identificar regiões de imagem. O projeto de Leetaru (veja *Visão Geral*) usou as coordenadas da região para cortar imagens, mas nesta lição nós iremos simplesmente fazer o *download* da página inteira. + +Parte da diversão intelectual desta lição é usar um *dataset* (*tags* de bloco do OCR) por vezes confuso para um propósito largamente não intencional: identificar imagens e não palavras. A certa altura, tornar-se-á computacionalmente viável executar modelos de aprendizagem aprofundada em todas as páginas ilustradas nuas num volume e escolher o(s) tipo(s) de imagem(/ns) desejada(s). Mas, como a maior parte das páginas na maioria dos volumes não são ilustradas, esta é uma tarefa dispendiosa. Por agora, faz mais sentido aproveitar os dados existentes que nós detemos do processo de ingestão do OCR. + +Para mais informações sobre como o próprio OCR funciona e interage com o processo de digitalização, por favor, veja a lição do *PH* de Mila Oiva, [OCR With Tesseract and ScanTailor](/en/lessons/retired/OCR-with-Tesseract-and-ScanTailor) (atenção que esta lição já não é actualizada). Erros podem surgir por causa de distorções, artefactos e muitos outros problemas. Estes erros acabam por afetar a fiabilidade e a precisão dos blocos "Picture". Em muitos casos, o Abbyy estimará que páginas em branco ou descoloridas são, na realidade, imagens. Estas *tags* de bloco incorretas, ainda que indesejadas, podem ser combatidas com o uso de redes neurais convolucionais retreinadas. Pense nas páginas com imagens cujo download foi feito nesta lição como um primeiro passo num processo mais longo para obter um *dataset* limpo e útil de ilustrações históricas. + +## Passo a Passo do Código + +### Encontrar as Imagens + +Tal como com o HT, a função principal para o IA é `ia_picture_download(item_id, out_dir=None)`. + +Visto que envolve o I/O dum ficheiro, o processo para obter a lista `img_pages` é mais complicado do que o do HT. Usando a utilidade `ia` (que é instalada com a biblioteca) da linha de comando, o leitor pode obter uma ideia dos ficheiros de metadados disponíveis para um volume. Com muitas poucas exceções, um ficheiro com o formato "Abbyy GZ" deveria estar disponível para volumes com o tipo de *media* `text` no Internet Archive. + +Estes ficheiros, mesmo quando comprimidos, podem facilmente ter centenas de megabytes de tamanho! Se existir um ficheiro Abbyy para o volume, nós obtemos o seu nome e depois fazemos o *download*. A chamada `ia.download()` usa alguns parâmetros úteis para ignorar a solicitação se o ficheiro já existe e, se não, para fazer o seu *download* sem criar um diretório aninhado. Para salvar espaço, nós eliminamos o ficheiro Abbyy depois de o analisar. + +```python +# Use o cliente da linha de comandos para ver os formatos de metadados disponíveis: +# `ia metadata formats VOLUME_ID` + +# Para esta lição, só o ficheiro Abbyy é necessário +returned_files = list(ia.get_files(item_id, formats=["Abbyy GZ"])) + +# Certifique-se de que algo é devolvido +if len(returned_files) > 0: + abbyy_file = returned_files[0].name +else: + print("[{}] Could not get Abbyy file".format(item_id)) + return None + +# Faça o download do ficheiro Abbyy para o CWD +ia.download(item_id, formats=["Abbyy GZ"], ignore_existing=True, \ + destdir=os.getcwd(), no_directory=True) +``` + +Assim que nós tivermos o ficheiro, nós precisamos de analisar o XML usando a biblioteca padrão do Python. Nós tomamos vantagem do facto de que nós podemos abrir o ficheiro comprimido diretamente com a biblioteca `gzip`. Os ficheiros Abbyy são indexadas a partir do zero, por isso a primeira página na sequência digitalizada tem o índice de 0. No entanto, nós temos que filtrar 0 porque não pode ser exigido do IA. A exclusão do índice 0 por parte do IA não está documentada em qualquer lugar; em vez disso, eu descobri através de tentativa e erro. Se o leitor ver uma mensagem de erro de explicação difícil, tente rastrear a origem e não tenha medo em pedir ajuda, seja a alguém com experiência relevante, seja a alguém da própria organização. + +```python +# Colecione as páginas com pelo menos um bloco de imagem +img_pages = [] + +with gzip.open(abbyy_file) as fp: + tree = ET.parse(fp) + document = tree.getroot() + for i, page in enumerate(document): + for block in page: + try: + if block.attrib['blockType'] == 'Picture': + img_pages.append(i) + break + except KeyError: + continue + +# 0 não é uma página válida para a realização de solicitações GET ao IA, mas às vezes +# está no ficheiro Abbyy comprimido +img_pages = [page for page in img_pages if page > 0] + +# Acompanhe o progresso do download +total_pages = len(img_pages) + +# Os ficheiros do OCR são pesados, por isso elimine assim que tivermos a lista de páginas +os.remove(abbyy_file) +``` + +### Fazer o *Download* das Imagens + +O *wrapper* do IA incorporado no Python não providencia uma função de download de páginas únicas—apenas em massa. Isto significa que nós usaremos a RESTful API do IA para obter páginas específicas. Primeiro, nós construímos um URL para cada página de que nós precisamos. Depois, nós usamos a biblioteca `requests` para enviar uma solicitação `GET` de HTTP e, se tudo correr bem (*i.e.* o código 200 é enviado na resposta), nós escrevemos o conteúdo da resposta num ficheiro JPEG. + +O IA tem estado a trabalhar numa [versão *alpha*](https://perma.cc/F6HJ-YGM7) duma API para o corte e redimensionamento de imagens que obedeça às exigências do International Image Interoperability Framework ([IIIF](https://perma.cc/7ABF-GGJM)). O IIIF representa uma profunda melhoria face ao antigo método para *downloads* de páginas únicas que requeriam a realização do *download* de ficheiros JP2, um formato de ficheiro largamente não suportado. Agora, é extremamente simples obter um só JPEG duma página: + +```python +# Veja: https://iiif.archivelab.org/iiif/documentation +urls = ["https://iiif.archivelab.org/iiif/{}${}/full/full/0/default.jpg".format(item_id, page) + for page in img_pages] + +# Sem download de página direto a partir da biblioteca do Python, construa uma solicitação GET +for i, page, url in zip(range(1,total_pages), img_pages, urls): + + rsp = requests.get(url, allow_redirects=True) + + if rsp.status_code == 200: + print("[{}] Downloading page {} ({}/{})".format(item_id, \ + page, i+1, total_pages)) + + with open(os.path.join(out_dir, str(page) + ".jpg"), "wb") as fp: + fp.write(rsp.content) +``` + +# Próximos Passos + +Assim que o leitor tiver entendido as principais funções e o código de *unpacking* dos dados nos *notebooks*, sinta-se livre para executar as células em sequência ou carregar em "*Run All*" e ver as páginas ilustradas a entrar nas pastas. O leitor é encorajado a adaptar estes *scripts* e funções para as suas próprias questões de pesquisa. + +[^1]: **Nota de tradução**: Aconselhamos o leitor a adicionar o Python ao PATH, processo que pode ser feito na ocasião da sua instalação. Isto irá suavizar a incorporação das dependências (veja *Dependências*). + +[^2]: **Nota de tradução**: Inicialmente, aparece uma página de transição, a qual deverá remeter rapidamente para o Jupyter. Caso tal não aconteça, basta seguir as instruções nesta página. diff --git a/pt/licoes/extrair-palavras-chave.md b/pt/licoes/extrair-palavras-chave.md index 8d7a0cb4d5..b6555e5e90 100644 --- a/pt/licoes/extrair-palavras-chave.md +++ b/pt/licoes/extrair-palavras-chave.md @@ -48,7 +48,7 @@ Se tem uma cópia de um texto armazenada no seu computador, é relativamente fá Esta lição é útil para qualquer um que trabalhe com fontes históricas armazenadas no seu próprio computador e que estejam transcritas em formatos mutáveis de texto eletrónico. É particularmente útil para pessoas interessadas em delimitar subgrupos de documentos que contenham uma ou mais de um grande número de palavras-chave. Isto pode ser útil para identificar um subconjunto relevante para leitura atenta ou para extrair e estruturar as palavras-chave num formato que possa ser usado noutra ferramenta digital como, por exemplo, dados de entrada para um exercício de mapeamento. -O presente tutorial mostrará aos usuários como extrair todas as menções a condados ingleses e gauleses de uma série de 6,692 minibiografias de indivíduos que ingressaram na Universidade de Oxford durante o reinado de Jaime I de Inglaterra (1603-1625). Estes registos foram transcritos pela [British History Online](http://www.british-history.ac.uk/alumni-oxon/1500-1714) (em inglês), através da versão impressa de *Alumni Oxonienses, 1500-1714*. Estas biografias contêm informação sobre cada aluno, incluindo a data dos seus estudos e a faculdade ou faculdades que frequentaram. Muitas vezes incluem informações adicionais, quando conhecidas, como a data de nascimento e morte, o nome e ocupação do pai, a sua naturalidade, e o percurso profissional posterior. As biografias são fontes ricas, das quais provêm informações relativamente comparáveis sobre um grande número de indivíduos semelhantes (homens ricos que frequentaram Oxford). Os 6,692 registos foram pré-processados pelo autor e salvos num [ficheiro CSV](https://perma.cc/MLL8-8BG4) (em inglês), com uma entrada por linha. +O presente tutorial mostrará aos usuários como extrair todas as menções a condados ingleses e gauleses de uma série de 6,692 minibiografias de indivíduos que ingressaram na Universidade de Oxford durante o reinado de Jaime I de Inglaterra (1603-1625). Estes registos foram transcritos pela [British History Online](https://www.british-history.ac.uk/alumni-oxon/1500-1714) (em inglês), através da versão impressa de *Alumni Oxonienses, 1500-1714*. Estas biografias contêm informação sobre cada aluno, incluindo a data dos seus estudos e a faculdade ou faculdades que frequentaram. Muitas vezes incluem informações adicionais, quando conhecidas, como a data de nascimento e morte, o nome e ocupação do pai, a sua naturalidade, e o percurso profissional posterior. As biografias são fontes ricas, das quais provêm informações relativamente comparáveis sobre um grande número de indivíduos semelhantes (homens ricos que frequentaram Oxford). Os 6,692 registos foram pré-processados pelo autor e salvos num [ficheiro CSV](https://perma.cc/MLL8-8BG4) (em inglês), com uma entrada por linha. Neste tutorial, o ["dataset"](https://perma.cc/V2B9-WVAK) envolve palavras-chave geográficas. Uma vez extraídas, os nomes de localidades podem ser georreferenciados para o seu local no globo e, depois, mapeados, recorrendo ao mapeamento digital. Isto torna possível determinar quais as faculdades que atraíam estudantes de determinadas partes do país, e se estes padrões se alteraram ao longo do tempo. Para um tutorial prático sobre como aplicar este próximo passo, veja a [lição de Fred Gibbs](https://perma.cc/64YX-2E2V) mencionada no final desta lição. Os leitores também podem estar interessados em ler [Georreferenciamento com o QGIS 3.20](/pt/licoes/georreferenciamento-qgis), também disponível no *Programming Historian*. diff --git a/pt/licoes/geocodificando-qgis.md b/pt/licoes/geocodificando-qgis.md index 94f2014cb4..e447da16c5 100644 --- a/pt/licoes/geocodificando-qgis.md +++ b/pt/licoes/geocodificando-qgis.md @@ -83,7 +83,7 @@ Também precisará de utilizar uma base de dados relacional, como Microsoft Acce Atenção: O LibreOffice requer uma instalação completa de Java para utilizar o aplicativo Base. Isto é facil de realizar fazendo o download e instalando o Java 8 Development Kit no seu sistema operacional pelo Oracle. O Java 8 Runtime Environment NÃO funciona com o LibreOffice no macOS 10.11.
    -O tutorial irá mapear os dados extraídos do [*Alumni Oxonienses*](http://www.british-history.ac.uk/alumni-oxon/1500-1714) (em inglês), da lição do *Programming Historian*, [Using Gazetteers to Extract Sets of Keywords from Free-Flowing Texts](/en/lessons/extracting-keywords) (em inglês), utilizando mapas de condados históricos da Inglaterra e do País de Gales, mapas estes disponíveis publicamente. Completar esse tutorial primeiro ajudará a compreender os dados mapeados aqui. Esses dados são oferecidos tanto como um conjunto de dados completo, quanto como um arquivo à parte que reúne os nomes de ex-alunos de Oxford pelos condados de origem, criado a partir do primeiro arquivo utilizando uma PivotTable do Excel. +O tutorial irá mapear os dados extraídos do [*Alumni Oxonienses*](https://www.british-history.ac.uk/alumni-oxon/1500-1714) (em inglês), da lição do *Programming Historian*, [Using Gazetteers to Extract Sets of Keywords from Free-Flowing Texts](/en/lessons/extracting-keywords) (em inglês), utilizando mapas de condados históricos da Inglaterra e do País de Gales, mapas estes disponíveis publicamente. Completar esse tutorial primeiro ajudará a compreender os dados mapeados aqui. Esses dados são oferecidos tanto como um conjunto de dados completo, quanto como um arquivo à parte que reúne os nomes de ex-alunos de Oxford pelos condados de origem, criado a partir do primeiro arquivo utilizando uma PivotTable do Excel. ## Os dados @@ -126,7 +126,7 @@ Atenção: O QGIS é bastante sensível ao corrigir arquivos CSV (Comma Separate Existe uma diferença importante entre Sistemas de Coordenadas Geográficas, que meramente definem as unidades de medida e o datum, e Sistemas de Coordenadas Projetadas, que também definem a maneira com a qual o globo é “achatado” sobre um mapa. O [OSGB](https://perma.cc/6U2D-V8SZ) (em inglês) está disponível em ambas as variantes do QGIS, então escolha a versão “projetada” que lhe dará um mapa no qual o Reino Unido apareça da maneira esperada. Para mais detalhes sobre projeções em SIG, veja o [tutorial Working with Projections in QGIS.](https://perma.cc/U47A-7CGG) (em inglês). -* Faça download de um shapefile contendo polígonos dos condados históricos da Inglaterra e do País de Gales em [http://www.county-borders.co.uk](http://www.county-borders.co.uk/) (em inglês) (selecione o arquivo `Definition A: SHP OSGB36 Simplified`, que é uma versão das fronteiras entre os condados da Grã-Bretanha, pré-1843, projetada sobre o OSGB, sem porções destacadas dos condados). Extraia o conteúdo do arquivo ZIP para a mesma pasta do seu projeto +* Faça download de um shapefile contendo polígonos dos condados históricos da Inglaterra e do País de Gales em [https://www.county-borders.co.uk](https://www.county-borders.co.uk/) (em inglês) (selecione o arquivo `Definition A: SHP OSGB36 Simplified`, que é uma versão das fronteiras entre os condados da Grã-Bretanha, pré-1843, projetada sobre o OSGB, sem porções destacadas dos condados). Extraia o conteúdo do arquivo ZIP para a mesma pasta do seu projeto * Clique no botão _Adicionar Camada Vetorial_ (remete a uma linha de gráfico), na barra de ferramentas Administrar Camadas, e então em _Explorar_ para selecionar e adicionar o shapefile `UKDefinitionA.shp` da pasta extraída. @@ -165,7 +165,7 @@ Ao alterar qualquer uma destas configurações contidas na página estilo gradua A geocodificação é uma técnica para além da simplesmente unir tabelas, pois cada linha individual dos seus dados mantém-se visível e passível de análise dentro do próprio software SIG, como pontos individuais no mapa (como na tabela 2). A princípio, o objetivo é atribuir a cada dado um par de coordenadas. A maior parte dos dados históricos não podem ser geocodificados automaticamente por meio de ferramentas online ou plugins do QGIS. Portanto, o processo de geocodificação deve ser realizado manualmente para combinar cada linha de dados a uma localização. Isso é uma tarefa operacional simples, unindo (combinando) os seus dados com um gazetteer (uma lista de lugares com suas coordenadas). Vários dicionários geográficos estão disponíveis, mas apenas alguns são pertinentes em relação a dados históricos. Por exemplo, para a Inglaterra: -- [Association of British Counties Gazetteer](http://www.gazetteer.org.uk/index.php) (em inglês) (dados disponíveis para compra) +- [Association of British Counties Gazetteer](https://www.gazetteer.org.uk/index.php) (em inglês) (dados disponíveis para compra) - [The Historical Gazetteer of England's Place Names](https://placenames.org.uk/) (em inglês) permite geocodificar as localizações individuais apenas online. Infelizmente, a API para acessar esses dados para geocodificação automática, conhecida como DEEP, parte do Unlock, já não está disponível (final de 2016). Uma melhor interface de navegação está disponível para aqueles com logins ingleses de Ensino Superior, em [Survey of English Place-Names](https://epns.nottingham.ac.uk/browse) (em inglês). Caso não tenha nenhum gazetteer pertinente para a sua área ou período de estudo, é possível facilmente criar o seu próprio através de um mapa vetorial, criando uma camada de pontos contendo a informação necessária dentro do QGIS (talvez ao mesclar as informações de camadas pré-existentes) e exportando o resultando com coordenadas XY. Para determinadas partes do mundo não existem nem dicionários geográficos históricos, nem mapas vetoriais adequados para certos períodos históricos. Nesses casos, terá que se aventurar a criar seu próprio vetor e a sua camada de pontos; consulte o tutorial [Criar novas camadas vetoriais com o QGIS 2.0](/pt/licoes/camadas-vetoriais-qgis). @@ -175,7 +175,7 @@ Caso não tenha nenhum gazetteer pertinente para a sua área ou período de estu Uma vez completa a primeira parte, pode-se avançar e seguir os passos abaixo no mesmo projeto. Caso contrário, ou caso deseje criar um novo projeto em branco, siga as instruções da primeira seção para: * Criar um novo arquivo de projeto no QGIS, e configurar o Sistema de Referência Coordenado para `OSGB 1936/the British National Grid` com a ID de autoridade `ESPG:27700` como um sistema de projeção de coordenadas usando **Projeto** > **Propriedades** > **SRC**; -* Faça o download de um shapefile contendo polígonos dos condados históricos da Inglaterra e do País de Gales em [http://www.county-borders.co.uk/](http://www.county-borders.co.uk/) (em inglês) (selecione a definição A e o OSGB). +* Faça o download de um shapefile contendo polígonos dos condados históricos da Inglaterra e do País de Gales em [https://www.county-borders.co.uk/](https://www.county-borders.co.uk/) (em inglês) (selecione a definição A e o OSGB). No seu projeto pré-existente, pode então começar a adicionar mais camadas para criar o gazetteer: diff --git a/pt/licoes/georreferenciamento-qgis.md b/pt/licoes/georreferenciamento-qgis.md index 9935e8822f..0508a43cc2 100644 --- a/pt/licoes/georreferenciamento-qgis.md +++ b/pt/licoes/georreferenciamento-qgis.md @@ -1,209 +1,209 @@ ---- -title: Georreferenciamento com o QGIS 3.20 -layout: lesson -collection: lessons -slug: georreferenciamento-qgis -original: georeferencing-qgis -date: 2013-12-13 -translation_date: 2023-05-01 -authors: -- Jim Clifford -- Josh MacFadyen -- Daniel Macfarlane -reviewers: -- Finn Arne Jørgensen -- Peter Webster -- Abby Schreiber -editors: -- Adam Crymble -translator: -- Ângela Pité -translation-editor: -- Joana Vieira Paulino -translation-reviewer: -- Luis Ferla -- Ana Sofia Ribeiro -difficulty: 2 -review-ticket: https://github.com/programminghistorian/ph-submissions/issues/434 -activity: transforming -topics: [mapping, data-visualization] -abstract: "Nesta lição aprenderá como georreferenciar mapas históricos para que possam ser adicionados a um SIG como uma camada raster." -avatar_alt: Mapa de uma cidade no topo de uma montanha -doi: 10.46430/phpt0039 ---- - -{% include toc.html %} - - -> Nota de tradução 1: Embora a lição original em inglês se refira à versão 2.0 do Quantum GIS (QGIS), na presente tradução da lição foi tomada a opção de usar uma versão mais recente do QGIS - a 3.20 - tendo-se efetuado as modificações necessárias para adaptar a lição a esta versão do software. -Tenha em atenção que, nos links que remetem para outras lições sobre o QGIS, a versão utilizada nestas será diferente da utilizada nesta tradução. - -> Nota de tradução 2: Na tradução desta lição usou-se a versão em pt-pt podendo-se, no entanto, optar também pela versão em pt-br do QGIS. - - -Objetivos da lição ------------- - -Nesta lição aprenderá como georreferenciar mapas históricos para que possam ser adicionados a um SIG como uma camada raster. O georreferenciamento é importante para quem queira digitalizar com precisão dados presentes num mapa em suporte papel e, visto que os historiadores trabalham sobretudo no domínio do documento em papel, o georreferenciamento é uma das ferramentas que mais frequentemente utilizamos. Esta técnica utiliza uma série de pontos de controlo para proporcionar a um objeto bidimensional, como um mapa em suporte papel, as coordenadas geográficas reais de que necessita para se alinhar com as características tridimensionais da terra no software SIG (em [Introdução ao Google Maps e Google Earth](/en/lessons/googlemaps-googleearth) (em inglês) vimos uma 'sobreposição', que é uma versão mais simplificada de georreferenciamento do Google Earth). - -O georreferenciamento de um mapa histórico requer um conhecimento tanto da geografia como da história do local que se está a estudar, de modo a garantir exatidão. As paisagens construídas e naturais mudaram ao longo do tempo e é importante confirmar se a localização dos seus pontos de controlo - quer sejam casas, intersecções ou mesmo cidades - tem permanecido constante. Introduzir pontos de controlo num SIG é fácil, mas nos bastidores o georreferenciamento usa processos complexos de transformação e compressão. Estes são utilizados para corrigir as distorções e imprecisões encontradas em muitos mapas históricos e ‘esticar’ os mapas para que se ajustem às coordenadas geográficas. Em cartografia isto é conhecido como [*rubber-sheeting*](https://perma.cc/4554-EWZB) (em inglês) - uma correção geométrica - pois trata o mapa como se fosse feito de borracha (*rubber*, em inglês) e os pontos de controlo como se fossem tachas 'fixando' o documento histórico a uma superfície tridimensional como o globo. - -## Começando - -Antes de começar a georreferenciar no QGIS é necessário ativar os Plugins apropriados (Módulos na versão do software em pt-pt). Na barra de ferramentas vá a Módulos (Plugins) -> Gerir e instalar módulos (plugins). - -{% include figure.html filename="tr-pt-georeferencing-qgis-1.png" alt="Imagem com detalhe do menu para gerir e instalar módulos" caption="Figura 1" %} - -Irá abrir uma janela intitulada "Módulos" (Plugins). Desça até *Georeferencer* GDAL, marque a caixa ao lado e clique "OK". - -{% include figure.html filename="tr-pt-georeferencing-qgis-2.png" alt="Imagem com lista dos módulos disponíveis" caption="Figura 2" %} - -- Neste ponto é preciso encerrar e reabrir o QGIS. Para o propósito deste exemplo, e para manter as coisas tão simples quanto possível, não reinicie o seu projeto existente e, em vez disso, inicie um novo projeto. -- Configure corretamente o [Sistema de Referência de Coordenadas (SRC) - *Coordenate Reference System (CRS)*](https://perma.cc/58HF-WURV) (em inglês). (Veja [Instalação do QGIS 2.0 e adição de camadas](/en/lessons/qgis-layers) (em inglês) para se relembrar. Tenha em mente que a versão do QGIS dessa lição será diferente da utilizada nesta tradução.) -- Guarde este novo projeto (no menu "Ficheiro", selecione "Guardar") e nomeie-o 'georreferenciamento'. -- Adicione a camada 'coastine_polygon'. (Veja [Instalação do QGIS 2.0 e adição de camadas](/en/lessons/qgis-layers) (em inglês) para relembrar. Tenha em atenção que a versão do QGIS dessa lição será diferente da utilizada nesta tradução.) - -## Abrir as Camadas SIG necessárias - -Para o estudo de caso da Ilha do Príncipe Eduardo (*Prince Edward Island* (PEI), em inglês) - utilizaremos os limites da cidade como pontos de controlo, pois estes foram estabelecidos em 1764 por Samuel Holland, para além de estarem identificados na maioria dos mapas da PEI e terem mudado pouco desde a sua criação. - -*Faça o download de 'lot_township_polygon':* - -Este é o *shapefile* que contém a camada vetorial atual que iremos usar para georreferenciar o mapa histórico. Note que, em 1764, não foram dados nomes aos municípios, mas um número de lote, pelo que normalmente são referidos na PEI como "Lotes" (*lots*, em inglês). Daí o nome do ficheiro 'lot_township_polygon'. - -- Navegue para o link abaixo no seu navegador de internet e faça o download do ficheiro 'lot_township_polygon': - -[http://www.gov.pe.ca/gis/license_agreement.php3?name=lot_town&file_format=SHP](http://www.gov.pe.ca/gis/license_agreement.php3?name=lot_town&file_format=SHP) - -- Depois de fazer o download do ficheiro coloque-o numa pasta que possa encontrar mais tarde e descompacte o ficheiro. (Lembre-se de manter todos os ficheiros juntos, uma vez que todos são necessários para abrir a camada no seu SIG). - -{% include figure.html filename="geo310.png" alt="Imagem da página com informação SIG no website Prince Edward Island" caption="Figura 3" %} - -*Adicione 'lot_township_polygon' ao QGIS:* - -- Em "Camada" no menu superior escolha "Adicionar" e "Adicionar Camada Vetorial" (alternativamente, o mesmo ícone que vê ao lado de "Adicionar Camada Vetorial" também pode ser selecionado a partir da barra de ferramentas). -- Clique em "Procurar". Navegue até ao seu ficheiro descompactado e selecione o ficheiro intitulado 'lot_township_polygon.shp'. -- Clique em "Abrir". - -{% include figure.html filename="geo41.png" alt="Imagem do ícone de menu Adicionar Camada Vetorial" caption="Figura 4" %} - -Para mais informações sobre como adicionar e visualizar camadas veja [Instalação do QGIS 2.0 e adição de camadas](/en/lessons/qgis-layers) (em inglês). Tenha em atenção que a versão do QGIS dessa lição será diferente da utilizada nesta tradução. - -{% include figure.html filename="tr-pt-georeferencing-qgis-5.png" alt="Imagem da área de trabalho do QGIS com os shapefiles incluídos" caption="Figura 5" %} - -## Abrir a ferramenta *Georeferencer* / Georreferenciador - -*Georeferencer* está agora disponível em "Raster" no menu superior - selecione-a. A ferramenta irá agora ter o título de "Georreferenciador". - -{% include figure.html filename="tr-pt-georeferencing-qgis-6.png" alt="Imagem com as opções do menu Raster" caption="Figura 6" %} - -*Adicione o seu mapa histórico:* - -- Na janela que surgirá clique no botão "Abrir Raster" no canto superior esquerdo (que é idêntico ao botão de "Adicionar camada raster"). - -{% include figure.html filename="geo71.png" alt="Imagem do ícone de menu Adicionar camada raster" caption="Figura 7" %} - -- Procure o ficheiro intitulado 'PEI_LakeMap1863.jpg' no seu computador e selecione "Abrir". [O download do ficheiro pode ser realizado aqui](https://geospatialhistorian.files.wordpress.com/2013/02/pei_lakemap1863.jpg), sendo que a sua localização original era no antigo repositório de mapas online *[Island Imagined](https://islandimagined.ca/islandora/object/imagined:208687)* (em inglês). -- Deverá, em seguida, definir o sistema de coordenadas desta camada. Na caixa "Filtro" procure por '2291′, e depois na caixa abaixo selecione 'NAD83 (CSRS98)/Príncipe Eduardo ...'. - -O resultado será o seguinte: - -{% include figure.html filename="tr-pt-georeferencing-qgis-8.png" alt="Imagem com visualização do ficheiro raster incluído" caption="Figura 8" %} - -*Adicionar pontos de controlo:* - -Planeie previamente as localizações que vai utilizar como pontos de controlo antes dos passos que se seguem. É muito mais fácil explorar primeiro todo o mapa histórico, e obter assim uma boa ideia dos melhores pontos a utilizar para os ter em conta mais tarde. - -Algumas sugestões para escolher os pontos de controlo: - -- **Quantos** pontos precisa? Normalmente quantos mais pontos atribuir, mais preciso será o seu mapa georreferenciado. Dois pontos de controlo indicarão ao SIG para escalar e rodar o mapa em relação a esses dois pontos, mas para se conseguir verdadeiramente executar um *rubbersheet* do mapa histórico é necessário adicionar mais pontos. -- **Onde** deve colocar os pontos de controlo? Escolha áreas tão próximas quanto possível dos quatro cantos do seu mapa para que essas áreas nas extremidades não sejam omitidas no *rubbersheeting*. -- Selecione pontos de controlo adicionais perto da sua área de interesse. Tudo entre os quatro pontos de controlo dos cantos deve ser georreferenciado de forma uniforme, mas se estiver preocupado com a precisão de um lugar em particular certifique-se de que seleciona pontos de controlo adicionais nessa área. -- Escolha o meio de cruzamentos e estradas, porque as margens das estradas mudaram ligeiramente ao longo do tempo à medida que as melhorias nestas iam sendo efetuadas. -- Verifique se os seus pontos de controlo não mudaram de localização ao longo do tempo. As estradas foram frequentemente redirecionadas, e mesmo casas e outros edifícios podem ter sido deslocados, especialmente nas [regiões atlânticas do Canadá](https://perma.cc/H8DK-KBXC) (em inglês). - -*Adicione o seu primeiro ponto de controlo:* - -**Primeiro**, navegue até a localização do seu primeiro ponto de controlo no **mapa histórico**. - -- Clique na lupa de zoom na barra de ferramentas da janela ou utilize a roda do mouse para fazer zoom. - -{% include figure.html filename="tr-pt-georeferencing-qgis-9.png" alt="Imagem com opções zoom no menu de ferramentas" caption="Figura 9" %} - -- Amplie para um ponto que possa reconhecer, tanto no seu mapa impresso como no seu SIG. - -- Clique em "Adicionar Ponto" na barra de ferramentas. - -{% include figure.html filename="tr-pt-georeferencing-qgis-10.png" alt="Imagem com opções de pontos de controlo no menu de ferramentas" caption="Figura 10" %} - -- Clique no local no mapa impresso que pode localizar no seu SIG (ou seja, o ponto de controlo). Uma janela abrirá para introduzir as coordenadas X e Y que correspondam ao ponto indicado ou, então, selecionar um ponto correspondente "A partir da tela do mapa". Clique nessa segunda opção. - -{% include figure.html filename="tr-pt-georeferencing-qgis-11.png" alt="Imagem com visualização do mapa e com janela de menu para introdução de coordenadas" caption="Figura 11" %} - -- A janela do "Georreferenciador" irá minimizar automaticamente. Clique no local do mapa no QGIS que coincida com o ponto de controlo. -- As coordenadas X e Y do ponto selecionado serão adicionadas imediatamente à janela "Introduza as coordenadas do mapa", assim como o SRC que lhes está associado. Se estiver satisfeito com o ponto selecionado clique em "OK" para criar o seu primeiro ponto de controlo. - -- Nesta fase identificámos um problema nos limites dos lotes. Planeámos utilizar a localização onde o limite sul do Lote 1 no extremo oeste da Província contém uma curva pronunciada perto do centro da massa terrestre. No entanto, nota-se que nem todas estas curvas pronunciadas nos limites dos lotes coincidem com o mapa histórico. É possível que os limites dos lotes tenham mudado um pouco nos 250 anos desde que foram estabelecidos, por isso é melhor escolher o ponto do qual se tem mais certezas. Neste caso a curva pronunciada entre o Lote 2 e o Lote 3 estava bem (veja a seta na imagem abaixo). Foi o limite dos Lotes 3 e 4 que mudou. A discrepância entre os limites dos lotes 1 e 2 mostra a necessidade de inserir mais pontos de controlo para executar corretamente um *rubbersheeting* neste mapa parcialmente distorcido de 1863, de forma a corresponder à camada da província no SIG. - -{% include figure.html filename="geo121.png" alt="Imagem com visualização da sobreposição dos mapas raster e vectorial" caption="Figura 12" %} - -*Adicione, pelo menos, mais um ponto de controlo:* - -- Regresse à janela do "Georreferenciador" e repita os passos em "*Adicione o seu primeiro ponto de controlo*" descritos acima, de modo a acrescentar mais pontos de controlo. -- Adicione um ponto perto do lado oposto do seu mapa impresso (quanto mais afastados estiverem os seus pontos de controlo, mais preciso é o processo de georreferenciamento) e outro perto de Charlottetown. -- Regresse à janela do "Georreferenciador". Deverá agora ver três pontos vermelhos no mapa impresso e três registos na tabela GCP (*Ground Control Points* - Pontos de Controlo no Terreno) na parte inferior da janela. - -{% include figure.html filename="tr-pt-georeferencing-qgis-13.png" alt="Imagem com visualização do mapa raster e respetivos pontos de controlo" caption="Figura 13" %} - -*Determine as configurações da transformação:* - -Antes de clicar em "Iniciar georreferenciamento" e começar o processo de georreferenciamento automático, especifique ao QGIS onde guardar o ficheiro (que será um ficheiro raster), como deve interpretar os seus pontos de controlo e como deve comprimir a imagem. - -- Clique no botão "Configuração da Transformação". - -{% include figure.html filename="geo141.png" alt="Imagem com ícone do botão Configuração da Transformação" caption="Figura 14" %} - -A maioria destas opções de configuração pode ser deixada como está predefinida. Neste exemplo foi usado: tipo de transformação "linear", método de reamostragem "vizinho mais próximo" e compressão "LZW". O SRC (Sistema de Referência de Coordenadas) de destino pode ficar o do projeto, mas pode também usar esta função para dar ao novo raster um sistema de referência diferente. - -- O seu novo ficheiro raster georreferenciado será guardado por predefinição na pasta do projeto. [Tif](https://perma.cc/WZ6W-J4YF) é o formato predefinido para rasters georreferenciados no QGIS. -- Tenha em mente que um ficheiro Tif vai ser muito mais pesado que o seu mapa original, mesmo com compressão LZW. Por isso, certifique-se de que tem espaço suficiente se estiver a utilizar, por exemplo, uma USB pen drive. (*Aviso*: o ficheiro Tif produzido a partir deste 6.8 Mb .jpg será **maior que 1GB** depois de georreferenciado). Uma forma de controlar o tamanho do ficheiro raster georreferenciado e manter uma resolução suficientemente alta para ter legibilidade é recortar apenas a área do mapa importante para o projeto. Poderá também procurar se está disponível uma versão de menor resolução da imagem do mapa histórico. - -- Não será necessário um [*world file*](https://perma.cc/A9RZ-J8VG) (em inglês), a menos que queira georreferenciar novamente a mesma imagem noutro SIG ou se alguém precisar de georreferenciar a imagem e não tiver acesso aos seus dados SIG, Sistema de Referência de Coordenadas, *etc.*,... -- É possível selecionar 'Use 0 para transparência quando necessário' de forma a eliminar espaços negros à volta das margens do mapa, mas não é essencial, e pode experimentar conforme precisar. -- Não será necessário definir a resolução de saída. -- Certifique-se de que "Carregar no QGIS quando concluído" está selecionado de modo a poupar um passo. Assim irá adicionar automaticamente o novo ficheiro ao seu SIG para que mais tarde não tenha de procurar o ficheiro Tif. Depois de configurada a transformação clique em "OK". - -{% include figure.html filename="tr-pt-georeferencing-qgis-15.png" alt="Imagem da janela de configurações da transformação" caption="Figura 15" %} - -## Georreferenciar! - -- Clique no botão "Iniciar georreferenciamento" na barra de ferramentas (ao lado de "Abrir Raster") - o que dá início ao processo de georreferenciamento. - -{% include figure.html filename="geo161.png" alt="Imagem do ícone do botão Iniciar georreferenciamento" caption="Figura 16" %} - -{% include figure.html filename="tr-pt-georeferencing-qgis-17.png" alt="Imagem de janela com barra de indicação de progresso do georreferenciamento" caption="Figura 17" %} - -{% include figure.html filename="tr-pt-georeferencing-qgis-18.png" alt="Imagem da área de trabalho do QGIS com o raster resultante do processo de georreferenciamento" caption="Figura 18" %} - -*Explore o seu mapa:* - -- Arraste a nova camada 'PEI_LakeMap1863_alterado' para o final do seu índice de camadas (ou seja, abaixo da camada 'lot_township_polygon'). - -{% include figure.html filename="tr-pt-georeferencing-qgis-19.png" alt="Imagem da área de trabalho do QGIS com o shapefile dos polígonos por cima do raster" caption="Figura 19" %} - -- Mude o preenchimento da camada 'lot_township_polygon' para "Sem preenchimento", selecionando a camada e depois em "Propriedades" escolher Simbologia -> Preenchimento Simples -> Estilo de Preenchimento -> Sem preenchimento. Clique em "OK". - -{% include figure.html filename="tr-pt-georeferencing-qgis-20.png" alt="Imagem com a janela das configurações de simbologia do shapefile" caption="Figura 20" %} - -- Agora deve conseguir ver a camada SIG atual com o mapa histórico no fundo. - -{% include figure.html filename="tr-pt-georeferencing-qgis-21.png" alt="Imagem da área de trabalho do QGIS com o shapefile dos polígonos transparentes por cima do raster" caption="Figura 21" %} - -Como já tem um mapa georreferenciado no seu SIG pode explorar a camada, ajustar a transparência, o contraste e o brilho e, novamente, [Criar novas camadas vetoriais com o QGIS 2.0](/pt/licoes/camadas-vetoriais-qgis) para digitalizar parte da informação histórica que foi criada. (Tenha em mente que a versão do QGIS da lição no link será diferente da utilizada nesta tradução.) -Por exemplo, este mapa georreferenciado da PEI mostra a localização de todas as habitações em 1863, incluindo o nome do chefe de família. Através da atribuição de pontos no mapa é possível introduzir as localizações das habitações e nomes dos proprietários e, a seguir, analisar ou partilhar essa nova camada geo-espacial como um *shapefile*. - -Ao digitalizar vetores de linhas, tais como estradas ou linhas costeiras, pode comparar a localização destes elementos com outros dados históricos ou simplesmente compará-los visualmente com a camada 'lot_township_polygon' neste SIG. - -Em processos mais avançados pode, inclusivamente, sobrepor esta imagem georreferenciada com um DEM (*Digital Elevation Model* - Modelo de Elevação Digital) para proporcionar-lhe um efeito de altura através de sombras (*hillshade*) ou um efeito 3D e, assim, realizar um '*fly-over*' e ter uma perspetiva aérea das habitações da PEI no século XIX. - -*Esta lição é parte do [Geospatial Historian](https://perma.cc/6AN6-N7LX).* +--- +title: Georreferenciamento com o QGIS 3.20 +layout: lesson +collection: lessons +slug: georreferenciamento-qgis +original: georeferencing-qgis +date: 2013-12-13 +translation_date: 2023-05-01 +authors: +- Jim Clifford +- Josh MacFadyen +- Daniel Macfarlane +reviewers: +- Finn Arne Jørgensen +- Peter Webster +- Abby Schreiber +editors: +- Adam Crymble +translator: +- Ângela Pité +translation-editor: +- Joana Vieira Paulino +translation-reviewer: +- Luis Ferla +- Ana Sofia Ribeiro +difficulty: 2 +review-ticket: https://github.com/programminghistorian/ph-submissions/issues/434 +activity: transforming +topics: [mapping, data-visualization] +abstract: "Nesta lição aprenderá como georreferenciar mapas históricos para que possam ser adicionados a um SIG como uma camada raster." +avatar_alt: Mapa de uma cidade no topo de uma montanha +doi: 10.46430/phpt0039 +--- + +{% include toc.html %} + + +> Nota de tradução 1: Embora a lição original em inglês se refira à versão 2.0 do Quantum GIS (QGIS), na presente tradução da lição foi tomada a opção de usar uma versão mais recente do QGIS - a 3.20 - tendo-se efetuado as modificações necessárias para adaptar a lição a esta versão do software. +Tenha em atenção que, nos links que remetem para outras lições sobre o QGIS, a versão utilizada nestas será diferente da utilizada nesta tradução. + +> Nota de tradução 2: Na tradução desta lição usou-se a versão em pt-pt podendo-se, no entanto, optar também pela versão em pt-br do QGIS. + + +Objetivos da lição +------------ + +Nesta lição aprenderá como georreferenciar mapas históricos para que possam ser adicionados a um SIG como uma camada raster. O georreferenciamento é importante para quem queira digitalizar com precisão dados presentes num mapa em suporte papel e, visto que os historiadores trabalham sobretudo no domínio do documento em papel, o georreferenciamento é uma das ferramentas que mais frequentemente utilizamos. Esta técnica utiliza uma série de pontos de controlo para proporcionar a um objeto bidimensional, como um mapa em suporte papel, as coordenadas geográficas reais de que necessita para se alinhar com as características tridimensionais da terra no software SIG (em [Introdução ao Google Maps e Google Earth](/en/lessons/googlemaps-googleearth) (em inglês) vimos uma 'sobreposição', que é uma versão mais simplificada de georreferenciamento do Google Earth). + +O georreferenciamento de um mapa histórico requer um conhecimento tanto da geografia como da história do local que se está a estudar, de modo a garantir exatidão. As paisagens construídas e naturais mudaram ao longo do tempo e é importante confirmar se a localização dos seus pontos de controlo - quer sejam casas, intersecções ou mesmo cidades - tem permanecido constante. Introduzir pontos de controlo num SIG é fácil, mas nos bastidores o georreferenciamento usa processos complexos de transformação e compressão. Estes são utilizados para corrigir as distorções e imprecisões encontradas em muitos mapas históricos e ‘esticar’ os mapas para que se ajustem às coordenadas geográficas. Em cartografia isto é conhecido como [*rubber-sheeting*](https://perma.cc/4554-EWZB) (em inglês) - uma correção geométrica - pois trata o mapa como se fosse feito de borracha (*rubber*, em inglês) e os pontos de controlo como se fossem tachas 'fixando' o documento histórico a uma superfície tridimensional como o globo. + +## Começando + +Antes de começar a georreferenciar no QGIS é necessário ativar os Plugins apropriados (Módulos na versão do software em pt-pt). Na barra de ferramentas vá a Módulos (Plugins) -> Gerir e instalar módulos (plugins). + +{% include figure.html filename="tr-pt-georeferencing-qgis-1.png" alt="Imagem com detalhe do menu para gerir e instalar módulos" caption="Figura 1" %} + +Irá abrir uma janela intitulada "Módulos" (Plugins). Desça até *Georeferencer* GDAL, marque a caixa ao lado e clique "OK". + +{% include figure.html filename="tr-pt-georeferencing-qgis-2.png" alt="Imagem com lista dos módulos disponíveis" caption="Figura 2" %} + +- Neste ponto é preciso encerrar e reabrir o QGIS. Para o propósito deste exemplo, e para manter as coisas tão simples quanto possível, não reinicie o seu projeto existente e, em vez disso, inicie um novo projeto. +- Configure corretamente o [Sistema de Referência de Coordenadas (SRC) - *Coordenate Reference System (CRS)*](https://perma.cc/58HF-WURV) (em inglês). (Veja [Instalação do QGIS 2.0 e adição de camadas](/en/lessons/qgis-layers) (em inglês) para se relembrar. Tenha em mente que a versão do QGIS dessa lição será diferente da utilizada nesta tradução.) +- Guarde este novo projeto (no menu "Ficheiro", selecione "Guardar") e nomeie-o 'georreferenciamento'. +- Adicione a camada 'coastine_polygon'. (Veja [Instalação do QGIS 2.0 e adição de camadas](/en/lessons/qgis-layers) (em inglês) para relembrar. Tenha em atenção que a versão do QGIS dessa lição será diferente da utilizada nesta tradução.) + +## Abrir as Camadas SIG necessárias + +Para o estudo de caso da Ilha do Príncipe Eduardo (*Prince Edward Island* (PEI), em inglês) - utilizaremos os limites da cidade como pontos de controlo, pois estes foram estabelecidos em 1764 por Samuel Holland, para além de estarem identificados na maioria dos mapas da PEI e terem mudado pouco desde a sua criação. + +*Faça o download de 'lot_township_polygon':* + +Este é o *shapefile* que contém a camada vetorial atual que iremos usar para georreferenciar o mapa histórico. Note que, em 1764, não foram dados nomes aos municípios, mas um número de lote, pelo que normalmente são referidos na PEI como "Lotes" (*lots*, em inglês). Daí o nome do ficheiro 'lot_township_polygon'. + +- Navegue para o link abaixo no seu navegador de internet e faça o download do ficheiro 'lot_township_polygon': + +[https://www.gov.pe.ca/gis/license_agreement.php3?name=lot_town&file_format=SHP](https://www.gov.pe.ca/gis/license_agreement.php3?name=lot_town&file_format=SHP) + +- Depois de fazer o download do ficheiro coloque-o numa pasta que possa encontrar mais tarde e descompacte o ficheiro. (Lembre-se de manter todos os ficheiros juntos, uma vez que todos são necessários para abrir a camada no seu SIG). + +{% include figure.html filename="geo310.png" alt="Imagem da página com informação SIG no website Prince Edward Island" caption="Figura 3" %} + +*Adicione 'lot_township_polygon' ao QGIS:* + +- Em "Camada" no menu superior escolha "Adicionar" e "Adicionar Camada Vetorial" (alternativamente, o mesmo ícone que vê ao lado de "Adicionar Camada Vetorial" também pode ser selecionado a partir da barra de ferramentas). +- Clique em "Procurar". Navegue até ao seu ficheiro descompactado e selecione o ficheiro intitulado 'lot_township_polygon.shp'. +- Clique em "Abrir". + +{% include figure.html filename="geo41.png" alt="Imagem do ícone de menu Adicionar Camada Vetorial" caption="Figura 4" %} + +Para mais informações sobre como adicionar e visualizar camadas veja [Instalação do QGIS 2.0 e adição de camadas](/en/lessons/qgis-layers) (em inglês). Tenha em atenção que a versão do QGIS dessa lição será diferente da utilizada nesta tradução. + +{% include figure.html filename="tr-pt-georeferencing-qgis-5.png" alt="Imagem da área de trabalho do QGIS com os shapefiles incluídos" caption="Figura 5" %} + +## Abrir a ferramenta *Georeferencer* / Georreferenciador + +*Georeferencer* está agora disponível em "Raster" no menu superior - selecione-a. A ferramenta irá agora ter o título de "Georreferenciador". + +{% include figure.html filename="tr-pt-georeferencing-qgis-6.png" alt="Imagem com as opções do menu Raster" caption="Figura 6" %} + +*Adicione o seu mapa histórico:* + +- Na janela que surgirá clique no botão "Abrir Raster" no canto superior esquerdo (que é idêntico ao botão de "Adicionar camada raster"). + +{% include figure.html filename="geo71.png" alt="Imagem do ícone de menu Adicionar camada raster" caption="Figura 7" %} + +- Procure o ficheiro intitulado 'PEI_LakeMap1863.jpg' no seu computador e selecione "Abrir". [O download do ficheiro pode ser realizado aqui](https://geospatialhistorian.files.wordpress.com/2013/02/pei_lakemap1863.jpg), sendo que a sua localização original era no antigo repositório de mapas online *[Island Imagined](https://islandimagined.ca/islandora/object/imagined:208687)* (em inglês). +- Deverá, em seguida, definir o sistema de coordenadas desta camada. Na caixa "Filtro" procure por '2291′, e depois na caixa abaixo selecione 'NAD83 (CSRS98)/Príncipe Eduardo ...'. + +O resultado será o seguinte: + +{% include figure.html filename="tr-pt-georeferencing-qgis-8.png" alt="Imagem com visualização do ficheiro raster incluído" caption="Figura 8" %} + +*Adicionar pontos de controlo:* + +Planeie previamente as localizações que vai utilizar como pontos de controlo antes dos passos que se seguem. É muito mais fácil explorar primeiro todo o mapa histórico, e obter assim uma boa ideia dos melhores pontos a utilizar para os ter em conta mais tarde. + +Algumas sugestões para escolher os pontos de controlo: + +- **Quantos** pontos precisa? Normalmente quantos mais pontos atribuir, mais preciso será o seu mapa georreferenciado. Dois pontos de controlo indicarão ao SIG para escalar e rodar o mapa em relação a esses dois pontos, mas para se conseguir verdadeiramente executar um *rubbersheet* do mapa histórico é necessário adicionar mais pontos. +- **Onde** deve colocar os pontos de controlo? Escolha áreas tão próximas quanto possível dos quatro cantos do seu mapa para que essas áreas nas extremidades não sejam omitidas no *rubbersheeting*. +- Selecione pontos de controlo adicionais perto da sua área de interesse. Tudo entre os quatro pontos de controlo dos cantos deve ser georreferenciado de forma uniforme, mas se estiver preocupado com a precisão de um lugar em particular certifique-se de que seleciona pontos de controlo adicionais nessa área. +- Escolha o meio de cruzamentos e estradas, porque as margens das estradas mudaram ligeiramente ao longo do tempo à medida que as melhorias nestas iam sendo efetuadas. +- Verifique se os seus pontos de controlo não mudaram de localização ao longo do tempo. As estradas foram frequentemente redirecionadas, e mesmo casas e outros edifícios podem ter sido deslocados, especialmente nas [regiões atlânticas do Canadá](https://perma.cc/H8DK-KBXC) (em inglês). + +*Adicione o seu primeiro ponto de controlo:* + +**Primeiro**, navegue até a localização do seu primeiro ponto de controlo no **mapa histórico**. + +- Clique na lupa de zoom na barra de ferramentas da janela ou utilize a roda do mouse para fazer zoom. + +{% include figure.html filename="tr-pt-georeferencing-qgis-9.png" alt="Imagem com opções zoom no menu de ferramentas" caption="Figura 9" %} + +- Amplie para um ponto que possa reconhecer, tanto no seu mapa impresso como no seu SIG. + +- Clique em "Adicionar Ponto" na barra de ferramentas. + +{% include figure.html filename="tr-pt-georeferencing-qgis-10.png" alt="Imagem com opções de pontos de controlo no menu de ferramentas" caption="Figura 10" %} + +- Clique no local no mapa impresso que pode localizar no seu SIG (ou seja, o ponto de controlo). Uma janela abrirá para introduzir as coordenadas X e Y que correspondam ao ponto indicado ou, então, selecionar um ponto correspondente "A partir da tela do mapa". Clique nessa segunda opção. + +{% include figure.html filename="tr-pt-georeferencing-qgis-11.png" alt="Imagem com visualização do mapa e com janela de menu para introdução de coordenadas" caption="Figura 11" %} + +- A janela do "Georreferenciador" irá minimizar automaticamente. Clique no local do mapa no QGIS que coincida com o ponto de controlo. +- As coordenadas X e Y do ponto selecionado serão adicionadas imediatamente à janela "Introduza as coordenadas do mapa", assim como o SRC que lhes está associado. Se estiver satisfeito com o ponto selecionado clique em "OK" para criar o seu primeiro ponto de controlo. + +- Nesta fase identificámos um problema nos limites dos lotes. Planeámos utilizar a localização onde o limite sul do Lote 1 no extremo oeste da Província contém uma curva pronunciada perto do centro da massa terrestre. No entanto, nota-se que nem todas estas curvas pronunciadas nos limites dos lotes coincidem com o mapa histórico. É possível que os limites dos lotes tenham mudado um pouco nos 250 anos desde que foram estabelecidos, por isso é melhor escolher o ponto do qual se tem mais certezas. Neste caso a curva pronunciada entre o Lote 2 e o Lote 3 estava bem (veja a seta na imagem abaixo). Foi o limite dos Lotes 3 e 4 que mudou. A discrepância entre os limites dos lotes 1 e 2 mostra a necessidade de inserir mais pontos de controlo para executar corretamente um *rubbersheeting* neste mapa parcialmente distorcido de 1863, de forma a corresponder à camada da província no SIG. + +{% include figure.html filename="geo121.png" alt="Imagem com visualização da sobreposição dos mapas raster e vectorial" caption="Figura 12" %} + +*Adicione, pelo menos, mais um ponto de controlo:* + +- Regresse à janela do "Georreferenciador" e repita os passos em "*Adicione o seu primeiro ponto de controlo*" descritos acima, de modo a acrescentar mais pontos de controlo. +- Adicione um ponto perto do lado oposto do seu mapa impresso (quanto mais afastados estiverem os seus pontos de controlo, mais preciso é o processo de georreferenciamento) e outro perto de Charlottetown. +- Regresse à janela do "Georreferenciador". Deverá agora ver três pontos vermelhos no mapa impresso e três registos na tabela GCP (*Ground Control Points* - Pontos de Controlo no Terreno) na parte inferior da janela. + +{% include figure.html filename="tr-pt-georeferencing-qgis-13.png" alt="Imagem com visualização do mapa raster e respetivos pontos de controlo" caption="Figura 13" %} + +*Determine as configurações da transformação:* + +Antes de clicar em "Iniciar georreferenciamento" e começar o processo de georreferenciamento automático, especifique ao QGIS onde guardar o ficheiro (que será um ficheiro raster), como deve interpretar os seus pontos de controlo e como deve comprimir a imagem. + +- Clique no botão "Configuração da Transformação". + +{% include figure.html filename="geo141.png" alt="Imagem com ícone do botão Configuração da Transformação" caption="Figura 14" %} + +A maioria destas opções de configuração pode ser deixada como está predefinida. Neste exemplo foi usado: tipo de transformação "linear", método de reamostragem "vizinho mais próximo" e compressão "LZW". O SRC (Sistema de Referência de Coordenadas) de destino pode ficar o do projeto, mas pode também usar esta função para dar ao novo raster um sistema de referência diferente. + +- O seu novo ficheiro raster georreferenciado será guardado por predefinição na pasta do projeto. [Tif](https://perma.cc/WZ6W-J4YF) é o formato predefinido para rasters georreferenciados no QGIS. +- Tenha em mente que um ficheiro Tif vai ser muito mais pesado que o seu mapa original, mesmo com compressão LZW. Por isso, certifique-se de que tem espaço suficiente se estiver a utilizar, por exemplo, uma USB pen drive. (*Aviso*: o ficheiro Tif produzido a partir deste 6.8 Mb .jpg será **maior que 1GB** depois de georreferenciado). Uma forma de controlar o tamanho do ficheiro raster georreferenciado e manter uma resolução suficientemente alta para ter legibilidade é recortar apenas a área do mapa importante para o projeto. Poderá também procurar se está disponível uma versão de menor resolução da imagem do mapa histórico. + +- Não será necessário um [*world file*](https://perma.cc/A9RZ-J8VG) (em inglês), a menos que queira georreferenciar novamente a mesma imagem noutro SIG ou se alguém precisar de georreferenciar a imagem e não tiver acesso aos seus dados SIG, Sistema de Referência de Coordenadas, *etc.*,... +- É possível selecionar 'Use 0 para transparência quando necessário' de forma a eliminar espaços negros à volta das margens do mapa, mas não é essencial, e pode experimentar conforme precisar. +- Não será necessário definir a resolução de saída. +- Certifique-se de que "Carregar no QGIS quando concluído" está selecionado de modo a poupar um passo. Assim irá adicionar automaticamente o novo ficheiro ao seu SIG para que mais tarde não tenha de procurar o ficheiro Tif. Depois de configurada a transformação clique em "OK". + +{% include figure.html filename="tr-pt-georeferencing-qgis-15.png" alt="Imagem da janela de configurações da transformação" caption="Figura 15" %} + +## Georreferenciar! + +- Clique no botão "Iniciar georreferenciamento" na barra de ferramentas (ao lado de "Abrir Raster") - o que dá início ao processo de georreferenciamento. + +{% include figure.html filename="geo161.png" alt="Imagem do ícone do botão Iniciar georreferenciamento" caption="Figura 16" %} + +{% include figure.html filename="tr-pt-georeferencing-qgis-17.png" alt="Imagem de janela com barra de indicação de progresso do georreferenciamento" caption="Figura 17" %} + +{% include figure.html filename="tr-pt-georeferencing-qgis-18.png" alt="Imagem da área de trabalho do QGIS com o raster resultante do processo de georreferenciamento" caption="Figura 18" %} + +*Explore o seu mapa:* + +- Arraste a nova camada 'PEI_LakeMap1863_alterado' para o final do seu índice de camadas (ou seja, abaixo da camada 'lot_township_polygon'). + +{% include figure.html filename="tr-pt-georeferencing-qgis-19.png" alt="Imagem da área de trabalho do QGIS com o shapefile dos polígonos por cima do raster" caption="Figura 19" %} + +- Mude o preenchimento da camada 'lot_township_polygon' para "Sem preenchimento", selecionando a camada e depois em "Propriedades" escolher Simbologia -> Preenchimento Simples -> Estilo de Preenchimento -> Sem preenchimento. Clique em "OK". + +{% include figure.html filename="tr-pt-georeferencing-qgis-20.png" alt="Imagem com a janela das configurações de simbologia do shapefile" caption="Figura 20" %} + +- Agora deve conseguir ver a camada SIG atual com o mapa histórico no fundo. + +{% include figure.html filename="tr-pt-georeferencing-qgis-21.png" alt="Imagem da área de trabalho do QGIS com o shapefile dos polígonos transparentes por cima do raster" caption="Figura 21" %} + +Como já tem um mapa georreferenciado no seu SIG pode explorar a camada, ajustar a transparência, o contraste e o brilho e, novamente, [Criar novas camadas vetoriais com o QGIS 2.0](/pt/licoes/camadas-vetoriais-qgis) para digitalizar parte da informação histórica que foi criada. (Tenha em mente que a versão do QGIS da lição no link será diferente da utilizada nesta tradução.) +Por exemplo, este mapa georreferenciado da PEI mostra a localização de todas as habitações em 1863, incluindo o nome do chefe de família. Através da atribuição de pontos no mapa é possível introduzir as localizações das habitações e nomes dos proprietários e, a seguir, analisar ou partilhar essa nova camada geo-espacial como um *shapefile*. + +Ao digitalizar vetores de linhas, tais como estradas ou linhas costeiras, pode comparar a localização destes elementos com outros dados históricos ou simplesmente compará-los visualmente com a camada 'lot_township_polygon' neste SIG. + +Em processos mais avançados pode, inclusivamente, sobrepor esta imagem georreferenciada com um DEM (*Digital Elevation Model* - Modelo de Elevação Digital) para proporcionar-lhe um efeito de altura através de sombras (*hillshade*) ou um efeito 3D e, assim, realizar um '*fly-over*' e ter uma perspetiva aérea das habitações da PEI no século XIX. + +*Esta lição é parte do [Geospatial Historian](https://perma.cc/6AN6-N7LX).* diff --git a/pt/licoes/instalacao-mac.md b/pt/licoes/instalacao-mac.md index a7f7467add..5aeabdd582 100644 --- a/pt/licoes/instalacao-mac.md +++ b/pt/licoes/instalacao-mac.md @@ -1,129 +1,129 @@ ---- -title: Configurar um ambiente de desenvolvimento integrado para Python (Mac) -slug: instalacao-mac -layout: lesson -date: 2012-07-17 -tested_date: 2023-11-16 -translation_date: 2021-05-13 -authors: -- William J. Turkel -- Adam Crymble -reviewers: -- Jim Clifford -- Amanda Morton -editors: -- Miriam Posner -translator: -- Josir C. Gomes -translation-editor: -- Danielle Sanches -translation-reviewer: -- Bruno Martins -- Renato Rocha Souza -difficulty: 1 -review-ticket: https://github.com/programminghistorian/ph-submissions/issues/323 -activity: transforming -topics: [get-ready, python] -abstract: "Esta lição irá auxiliar na configuração de um ambiente de desenvolvimento integrado para o Python num computador com o Sistema Operacional Mac." -python_warning: false -original: mac-installation -avatar_alt: Uma banda com três músicos -doi: 10.46430/phpt0005 ---- - -{% include toc.html %} - - - - - -## Faça um backup do seu computador - -É sempre importante garantir que você tenha backups regulares e recentes do seu computador. Este é um bom conselho que serve para a vida toda e não se limita à pratica específica de programação. Usuários do Mac podem recorrer ao [Time Machine][] para isso. - -## Instale o Python 3 - -Você ainda pode ter o Python 2 na sua máquina. Como essa versão do Python foi descontinuada no fim de 2019, é importante que você instale o Python 3. Faça o download da versão mais estável da linguagem de programação Python (Version 3.8 de Novembro de 2019) e instale o software a partir do [site do Python][]. - -## Crie um diretório - -Para que você se organize, o ideal é que você tenha um diretório (i.e., pasta) no seu computador onde você irá armazenar os seus programas em Python (por exemplo, `programming-historian`). Crie esse diretório em qualquer outra pasta do seu computador. - -## Instale um editor de texto - -Existem vários editores de texto que você pode utilizar para escrever, armazenar e executar comandos em Python. O Sublime Text é utilizado nessa lição. Se vosê preferir usar outro editor, existem muitas outras [opções de editores de texto][]. Alguns dos nossos usuários preferem um programa chamado [BBEdit][]. A escolha é sua. Pode descarregar uma cópia do Sublime Text a partir do [website do Sublime Text][]. - -#### Configurar no Sublime Text - -Deve agora configurar o editor para que seja possível executar programas em Python. - -A partir do menu, escolha `Tools -> Build System -> Python`. - -## Passo 2 – “Olá Mundo” em Python --------------------------------- - -É uma tradição para quem está começando a programar em uma nova linguagem que o primeiro programa a ser construído emita a frase "Olá Mundo". - -O Python é uma boa linguagem de programação para iniciantes porque ela é de alto-nível. -Isto quer dizer que é possível escrever pequenos programas que realizam muitas funcionalidades. -Quanto menor o programa, mais provável que ele caiba em apenas um ecrã, e mais fácil será manter o controle dele em sua mente. - -O Python é uma lingugagem 'interpretada'. Isto significa que existe um programa especial (conhecido como Interpretador) que sabe como seguir as instruções da linguagem. Uma forma de utilizar o interpretador é guardar todas as instruções a executar em um ficheiro para, em seguida, solicitar ao interpretador que ele interprete o conteúdo desse ficheiro. - -Um ficheiro que contém instruções de linguagem de programação é conhecido como um programa. O interpretador irá executar cada uma das instruções que você incluiu no seu programa e no final irá parar. Vamos experimentar como isto funciona. - -No seu editor de texto, crie um novo ficheiro, entre o seguinte programa de duas linhas, e salve-o na pasta `programming-historian`: - -`ola-mundo.py` - -``` python -# ola-mundo.py -print('Olá Mundo') -``` - -O comando “*Run Python*” permite que você execute o seu programa. Se você escolheu um outro editor, este deve ter uma funcionalidade semelhante. Se está a usar Sublime Text, clique em `Tools -> Build` (ou digite `⌘B`). Se está a usar o BBEdit, clique em “#!” e no botão *Run*. Se tudo correu bem, o ecrã deverá mostrar algo como apresentado de seguida: - -{% include figure.html filename="BBEdit-ola-mundo.png" caption="Olá Mundo em Python no Mac, com BBEdit" %} - -Ou, com Sublime Text: - -{% include figure.html filename="pt-tr-sublimetext-ola-mundo.png" caption="Olá Mundo em Python no Mac, com Sublime Text" %} - -## Interagindo com a linha de comandos do Python - -Uma outra forma de interagir com o interpretador é utilizar o que é denominado por linha de comandos. Você pode digitar um comando na linha de comandos e pressionar a tecla Enter, sendo-lhe apresentada a resposta ao seu comando. Usar a linha de comandos é um ótimo método para testar os comandos, por forma a certificar que eles realmente fazem o que você está imaginando. - -Abra o *Finder*, faça duplo-clique em `Applications -> Utilities -> Terminal` e, em seguida, digite “`python3`” - -Este comando irá abrir a linha de comandos do Python, indicando assim que você já pode executar comandos Python. De seguida, digite: - -``` python -print('Olá Mundo') -``` -e pressione Enter. O computador irá responder com: - -``` python -Olá Mundo -``` - -Quando quisermos representar uma interação na linha de comandos, utilizaremos o símbolo `->` para indicar a resposta para o nosso comando, tal como no exemplo abaixo: - -``` python -print('Olá Mundo') --> Olá Mundo -``` - -No seu ecrã, você verá algo como: - -{% include figure.html filename="ola-mundo-terminal.png" caption="Olá Mundo em Python no Terminal do Mac" %} - -Agora que você e o seu computador estão preparados, podemos seguir para tarefas mais interessantes. Se você está seguindo as lições do Python, a nossa sugestão é que tente a próxima lição ‘[Noções básicas de páginas web e HTML][]‘ - - [Time Machine]: http://support.apple.com/kb/ht1427 - [site do Python]: https://www.python.org/downloads/mac-osx/ - [Beautiful Soup]: http://www.crummy.com/software/BeautifulSoup/ - [opções de editores de texto]: https://wiki.python.org/moin/PythonEditors/ - [website do Sublime Text]: https://www.sublimetext.com/download - [BBEdit]: https://www.barebones.com/products/bbedit/ - [site do Komodo Edit]: https://www.activestate.com/products/komodo-ide/downloads/edit/ - [Noções básicas de páginas web e HTML]: nocoes-basicas-paginas-web-html +--- +title: Configurar um ambiente de desenvolvimento integrado para Python (Mac) +slug: instalacao-mac +layout: lesson +date: 2012-07-17 +tested_date: 2023-11-16 +translation_date: 2021-05-13 +authors: +- William J. Turkel +- Adam Crymble +reviewers: +- Jim Clifford +- Amanda Morton +editors: +- Miriam Posner +translator: +- Josir C. Gomes +translation-editor: +- Danielle Sanches +translation-reviewer: +- Bruno Martins +- Renato Rocha Souza +difficulty: 1 +review-ticket: https://github.com/programminghistorian/ph-submissions/issues/323 +activity: transforming +topics: [get-ready, python] +abstract: "Esta lição irá auxiliar na configuração de um ambiente de desenvolvimento integrado para o Python num computador com o Sistema Operacional Mac." +python_warning: false +original: mac-installation +avatar_alt: Uma banda com três músicos +doi: 10.46430/phpt0005 +--- + +{% include toc.html %} + + + + + +## Faça um backup do seu computador + +É sempre importante garantir que você tenha backups regulares e recentes do seu computador. Este é um bom conselho que serve para a vida toda e não se limita à pratica específica de programação. Usuários do Mac podem recorrer ao [Time Machine][] para isso. + +## Instale o Python 3 + +Você ainda pode ter o Python 2 na sua máquina. Como essa versão do Python foi descontinuada no fim de 2019, é importante que você instale o Python 3. Faça o download da versão mais estável da linguagem de programação Python (Version 3.8 de Novembro de 2019) e instale o software a partir do [site do Python][]. + +## Crie um diretório + +Para que você se organize, o ideal é que você tenha um diretório (i.e., pasta) no seu computador onde você irá armazenar os seus programas em Python (por exemplo, `programming-historian`). Crie esse diretório em qualquer outra pasta do seu computador. + +## Instale um editor de texto + +Existem vários editores de texto que você pode utilizar para escrever, armazenar e executar comandos em Python. O Sublime Text é utilizado nessa lição. Se vosê preferir usar outro editor, existem muitas outras [opções de editores de texto][]. Alguns dos nossos usuários preferem um programa chamado [BBEdit][]. A escolha é sua. Pode descarregar uma cópia do Sublime Text a partir do [website do Sublime Text][]. + +#### Configurar no Sublime Text + +Deve agora configurar o editor para que seja possível executar programas em Python. + +A partir do menu, escolha `Tools -> Build System -> Python`. + +## Passo 2 – “Olá Mundo” em Python +-------------------------------- + +É uma tradição para quem está começando a programar em uma nova linguagem que o primeiro programa a ser construído emita a frase "Olá Mundo". + +O Python é uma boa linguagem de programação para iniciantes porque ela é de alto-nível. +Isto quer dizer que é possível escrever pequenos programas que realizam muitas funcionalidades. +Quanto menor o programa, mais provável que ele caiba em apenas um ecrã, e mais fácil será manter o controle dele em sua mente. + +O Python é uma lingugagem 'interpretada'. Isto significa que existe um programa especial (conhecido como Interpretador) que sabe como seguir as instruções da linguagem. Uma forma de utilizar o interpretador é guardar todas as instruções a executar em um ficheiro para, em seguida, solicitar ao interpretador que ele interprete o conteúdo desse ficheiro. + +Um ficheiro que contém instruções de linguagem de programação é conhecido como um programa. O interpretador irá executar cada uma das instruções que você incluiu no seu programa e no final irá parar. Vamos experimentar como isto funciona. + +No seu editor de texto, crie um novo ficheiro, entre o seguinte programa de duas linhas, e salve-o na pasta `programming-historian`: + +`ola-mundo.py` + +``` python +# ola-mundo.py +print('Olá Mundo') +``` + +O comando “*Run Python*” permite que você execute o seu programa. Se você escolheu um outro editor, este deve ter uma funcionalidade semelhante. Se está a usar Sublime Text, clique em `Tools -> Build` (ou digite `⌘B`). Se está a usar o BBEdit, clique em “#!” e no botão *Run*. Se tudo correu bem, o ecrã deverá mostrar algo como apresentado de seguida: + +{% include figure.html filename="BBEdit-ola-mundo.png" caption="Olá Mundo em Python no Mac, com BBEdit" %} + +Ou, com Sublime Text: + +{% include figure.html filename="pt-tr-sublimetext-ola-mundo.png" caption="Olá Mundo em Python no Mac, com Sublime Text" %} + +## Interagindo com a linha de comandos do Python + +Uma outra forma de interagir com o interpretador é utilizar o que é denominado por linha de comandos. Você pode digitar um comando na linha de comandos e pressionar a tecla Enter, sendo-lhe apresentada a resposta ao seu comando. Usar a linha de comandos é um ótimo método para testar os comandos, por forma a certificar que eles realmente fazem o que você está imaginando. + +Abra o *Finder*, faça duplo-clique em `Applications -> Utilities -> Terminal` e, em seguida, digite “`python3`” + +Este comando irá abrir a linha de comandos do Python, indicando assim que você já pode executar comandos Python. De seguida, digite: + +``` python +print('Olá Mundo') +``` +e pressione Enter. O computador irá responder com: + +``` python +Olá Mundo +``` + +Quando quisermos representar uma interação na linha de comandos, utilizaremos o símbolo `->` para indicar a resposta para o nosso comando, tal como no exemplo abaixo: + +``` python +print('Olá Mundo') +-> Olá Mundo +``` + +No seu ecrã, você verá algo como: + +{% include figure.html filename="ola-mundo-terminal.png" caption="Olá Mundo em Python no Terminal do Mac" %} + +Agora que você e o seu computador estão preparados, podemos seguir para tarefas mais interessantes. Se você está seguindo as lições do Python, a nossa sugestão é que tente a próxima lição ‘[Noções básicas de páginas web e HTML][]‘ + + [Time Machine]: https://support.apple.com/kb/ht1427 + [site do Python]: https://www.python.org/downloads/mac-osx/ + [Beautiful Soup]: https://www.crummy.com/software/BeautifulSoup/ + [opções de editores de texto]: https://wiki.python.org/moin/PythonEditors/ + [website do Sublime Text]: https://www.sublimetext.com/download + [BBEdit]: https://www.barebones.com/products/bbedit/ + [site do Komodo Edit]: https://www.activestate.com/products/komodo-ide/downloads/edit/ + [Noções básicas de páginas web e HTML]: nocoes-basicas-paginas-web-html diff --git a/pt/licoes/introducao-ao-markdown.md b/pt/licoes/introducao-ao-markdown.md index 3022a23da0..94a714b737 100644 --- a/pt/licoes/introducao-ao-markdown.md +++ b/pt/licoes/introducao-ao-markdown.md @@ -1,318 +1,318 @@ ---- -title: Introdução ao Markdown -slug: introducao-ao-markdown -layout: lesson -date: 2015-11-13 -translation_date: 2021-03-30 -authors: -- Sarah Simpkin -reviewers: -- John Fink -- Nancy Lemay -editors: -- Ian Milligan -translator: -- João Gilberto Neves Saraiva -translation-editor: -- Joana Vieira Paulino -translation-reviewer: -- Josir Cardoso Gomes -- Bruno Martins -difficulty: 1 -review-ticket: https://github.com/programminghistorian/ph-submissions/issues/363 -activity: presenting -topics: [data-management] -abstract: "Nesta lição é apresentado o Markdown, uma sintaxe baseada em texto simples para formatação de documentos. É explicado porque ele é usado, como formatar ficheiros Markdown e como pré-visualizar documentos formatados em Markdown na web." -original: getting-started-with-markdown -avatar_alt: Letras ornamentadas num manual tipográfico -doi: 10.46430/phpt0008 ---- - -{% include toc.html %} - - - - -### Objetivos da lição -Nesta lição, é apresentado o Markdown, uma sintáxe baseada em texto simples para formatação de documentos. É explicado porque ele é usado, como formatar ficheiros Markdown e como visualizar documentos formatados em Markdown na web. - -Como as lições do *Programming Historian em português* são submetidas em ficheiros Markdown, incluí exemplos do *Programming Historian* sempre que possível. Espero que este guia seja útil para quem estiver pensando em criar uma lição para este site. - -## O que é Markdown? - -Criado em 2004 por [John Gruber](http://daringfireball.net/projects/markdown/ "Markdown on Daring Fireball"), Markdown se refere a: (1) um modo de formatação de ficheiros de texto, e também (2) uma [ferramenta Perl](https://pt.wikipedia.org/wiki/Perl) para converter ficheiros Markdown em HTML. Nesta lição, nosso foco será na primeira parte, aprender a escrever ficheiros utilizando a sintaxe Markdown. - -Ficheiros de texto simples têm muitas vantagens sobre outros formatos. Uma delas é que são legíveis em praticamente qualquer dispositivo. Eles também resistem ao tempo melhor do que outros tipos de ficheiro - se abrir um documento salvo num formato de um processador de texto legado (como docx), estará familiarizado com os desafios de compatibilidade envolvidos. - -Utilizando a sintaxe Markdown, você será capaz de produzir ficheiros que são legíveis como texto simples e também prontos para ser estilizados em outras plataformas. Vários sistemas de blogs, geradores de sites estáticos e sites como o [GitHub](http://github.com "GitHub") também suportam Markdown, e renderizam esses ficheiros em HTML para exibição na web. Além disso, ferramentas como o Pandoc podem converter ficheiros de Markdown para outros formatos e vice-versa. Para mais informações sobre o Pandoc, visite a lição (em inglês) [Sustainable authorship in plain text using Pandoc and Markdown](/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown), produzida por Dennis Tenen e Grant Wythoff. - -## Sintaxe Markdown -Ficheiros Markdown são salvos com a extensão `.md` e podem ser abertos num editor de texto como TextEdit, Notepad, Sublime Text ou Vim. Diversos websites e plataformas de publicação dispôem de editores web e/ou extensões para entrada de texto utilizando sintaxe Markdown. - -Neste tutorial, vamos praticar a sintaxe Markdown no navegador utilizando o [StackEdit](https://stackedit.io). Nele é possível inserir um texto formatado em Markdown na esquerda e ver imediatamente a versão renderizada dele à direita. - -Como todas as lições do *Programming Historian em português* são escritas em Markdown, é possível examinar esses ficheiros no StackEdit também. No [StackEdit editor](https://stackedit.io/app), clique no `#` no canto superior direito para abrir o menu. Escolha `Import/Export` e depois `Import Markdown`, então cole o conteúdo da URL a seguir na janela do lado esquerdo para exibir a lição "Preservar os seus dados de investigação" no editor: - -``` -https://raw.githubusercontent.com/programminghistorian/jekyll/gh-pages/pt/licoes/preservar-os-seus-dados-de-investigacao.md -``` -Note que enquanto o painel direito apresenta uma renderização mais elegante do texto, o ficheiro original à esquerda fica ainda bem legível. - -Agora, vamos apronfundar conhecimentos escrevendo nós mesmos com a sintaxe Markdown. Crie um novo documento no StackEdit clicando no ícone de pasta no canto superior esquerdo e escolha a opção `New file`. Você pode inserir um título para o documento na caixa de texto no topo da página. - -### Cabeçalhos -Quatro níveis de cabeçalho estão disponíveis no Markdown e são indicatos pelo número de `#` antes do texto do título. Copie os exemplos a seguir na caixa de texto à sua esquerda. - -``` -# Primeiro nível de cabeçalho -## Segundo nível de cabeçalho -### Terceiro nível de cabeçalho -#### Quarto nível de cabeçalho -``` - -O primeiro e segundo níveis de cabeçalho podem ser inseridos da seguinte forma: - -``` -Primeiro nível de cabeçalho -======= - -Segundo nível de cabeçalho ----------- -``` - -**Eles serão renderizados como:** - -# Primeiro nível de cabeçalho - -## Segundo nível de cabeçalho - -### Terceiro nível de cabeçalho - -#### Quarto nível de cabeçalho - - -Observe como a sintaxe do Markdown permanece compreensível mesmo na versão de texto simples. - - -### Parágrafos & Quebras de linha - -Escreva a frase a seguir na caixa de texto: - -``` -Bem-vindo ao Programming Historian em português. - -Hoje vamos aprender sobre a sintaxe Markdown. -Esta frase é separada da anterior por uma quebra de linha simples. -``` -**Isso é renderizado como** - -Bem-vindo ao Programming Historian em português. - -Hoje vamos aprender sobre a sintaxe Markdown. -Esta frase é separada da anterior por uma quebra de linha simples. - - -Os parágrafos devem ser separados por uma linha vazia. Deixe uma linha em branco entre `Markdown.` e `Esta` para ver como isso funciona. Em algumas implementações de Markdown, uma quebra de linha simples pode ser indicada com dois espaços vazios no fim de uma linha. Isso não é aplicado na formatação Markdown do [GitHub](https://docs.github.com/pt/github/writing-on-github/basic-writing-and-formatting-syntax) que o StackEdit utiliza como padrão. - - -### Acrescentando Ênfase - -O texto pode ser posto em itálico colocando a palavra entre os símbolos `*` ou `_`. Da mesma forma, o texto em negrito pode ser escrito colocando a palavra entre `**` ou `__`. - -Tente adicionar ênfase à frase usando estes métodos: - -``` -Estou **muito** animado com os tutoriais do _Programming Historian_. -``` - -**Isto é renderizado como:** - -Estou **muito** animado com os tutoriais do _Programming Historian_. - -### Criando Listas - -Markdown inclui suporte para listas ordenadas ou não. Tente digitar a lista a seguir na caixa de texto: - -``` -Lista de compras ----------- -* Frutas - * Maçãs - * Laranjas - * Uvas -* Laticínios - * Leite - * Queijo -``` -Identar o `*` permite criar itens alinhados. - -**Isso é renderizado como:** - -Lista de compras ----------- -* Frutas - * Maçãs - * Laranjas - * Uvas -* Laticínios - * Leite - * Queijo - -Listas ordenadas são escritas numerando cada linha. Mais uma vez, o objetivo do Markdown é produzir documentos que sejam legíveis como texto simples e que possam ser transformados noutros formatos. - -``` -Lista de afazeres ----------- -1. Terminar o tutorial de Markdown -2. Ir fazer compras -3. Preparar o almoço -``` - -**Isso é renderizado como:** - -Lista de afazeres ----------- -1. Terminar o tutorial de Markdown -2. Ir fazer compras -3. Preparar o almoço - -### Trechos de código -Representar trechos de código de maneira diferente do resto de um documento é uma boa prática pois melhora a legibilidade. Comumente, códigos são representandos em Markdown com texto monoespaçado. Uma vez que o Markdown não faz distinção entre fontes, codígos são representandos entre caractéres de crase como `` ` ``. Por exemplo, `` `
    ` ``. Blocos inteiros de código são escritos digitando três caracteres `` ` `` antes e depois de cada bloco. Na janela de visualização do StackEdit, isso será renderizado como uma caixa sombreada com texto em uma fonte monoespaçada. - -Digite o trecho a seguir na caixa de texto: - - ``` - - - Título do Website - - - - - ``` - -**Isso é renderizado como:** - -``` - - - Título do Website - - - - -``` - -Observe como o bloco de código é renderizado em uma fonte monoespaçada. - -### Blocos de citações - -Adicionar um `>` antes de qualquer parágrafo para renderizá-lo como um elemento de bloco de citação. - -Tente digitar o seguinte texto na caixa de texto: - -``` -> Olá, sou um parágrafo de texto encerrado em um bloco de citação. Observe como estou deslocado da margem esquerda. -``` - -**Isso é renderizado como:** - -> Olá, sou um parágrafo de texto encerrado em um bloco de citação. Observe como estou deslocado da margem esquerda. - -### Links - -Os links podem ser escritos em dois estilos. - -Os links embutidos são escritos colocando o texto do link entre colchetes primeiro e, em seguida, incluindo a URL e o texto alternativo opcional entre parêntesis curvos. - -`Para mais tutoriais, por favor visite o [Programming Historian em português](/pt/).` - -**Isso é renderizado como:** - -Para mais tutoriais, por favor visite o [Programming Historian em português](/pt/) - -Os links de referência são úteis para notas de rodapé e podem manter seu documento de texto simples mais organizado. Eles são escritos com um conjunto adicional de colchetes para estabelecer um rótulo de ID de link. - -`Um exemplo é o website do [Programming Historian em português][1].` - -Você deve então adicionar o URL a outra parte do documento: - -`[1]: http://programminghistorian.org/pt/ "The Programming Historian em português".` - -**Isso é renderizado como:** - -Um exemplo é o website do [_Programming Historian em português_][1] - -[1]: /pt/ "The Programming Historian em português" - - -### Imagens - -As imagens podem ser referenciadas usando `!` seguido por algum texto alternativo entre colchetes. Depois, a URL da imagem e um título opcional. Eles não serão exibidos em seu documento de texto simples, mas serão incorporados em uma página HTML renderizada. - -`![Wikipedia logo](https://upload.wikimedia.org/wikipedia/en/8/80/Wikipedia-logo-v2.svg "Wikipedia logo")` - -**Isso é renderizado como:** - -![Wikipedia logo](https://upload.wikimedia.org/wikipedia/en/8/80/Wikipedia-logo-v2.svg "Wikipedia logo") - -#### Linhas Horizontais - -Linhas horizontais são produzidas quando três ou mais `-`,` * `ou` _` são incluídos em sequência, independentemente do número de espaços entre eles. Todas as combinações a seguir renderizarão linhas horizontais: - -``` -___ -* * * -- - - - - - -``` - -**Isso é renderizado como:** - ---- -*** -- - - - - - - - -### Tabelas - -Originalmente o Markdown não inclui tabelas. No entanto, alguns sites e aplicativos usam variantes do Markdown que podem incluir tabelas e outros recursos especiais. É o caso da formatação utilizada no [GitHub](https://docs.github.com/pt/github/writing-on-github/organizing-information-with-tables) que é usada para renderizar arquivos `.md` a partir do GitHub. - -Para criar uma tabela dentro do GitHub, use barras `|` para separar colunas e hifens `-` entre seus cabeçalhos e o resto do conteúdo da tabela. Embora as barras sejam realmente necessárias entre as colunas, é possível usá-las em qualquer lado da tabela para obter uma aparência melhor. As células podem conter qualquer comprimento de conteúdo e não é necessário que as barras sejam alinhadas verticalmente umas com as outras. - -``` -| Título 1 | Título 2 | Título 3 | -| --------- | --------- | --------- | -| Linha 1, coluna 1 | Linha 1, coluna 2 | Linha 1, coluna 3| -| Linha 2, coluna 1 | Linha 2, coluna 2 | Linha 2, coluna 3| -| Linha 3, coluna 1 | Linha 3, coluna 2 | Linha 3, coluna 3| -``` - -**Isso é renderizado como:** - -| Título 1 | Título 2 | Título 3 | -| --------- | --------- | --------- | -| Linha 1, coluna 1 | Linha 1, coluna 2 | Linha 1, coluna 3| -| Linha 2, coluna 1 | Linha 2, coluna 2 | Linha 2, coluna 3| -| Linha 3, coluna 1 | Linha 3, coluna 2 | Linha 3, coluna 3| - -Para especificar o alinhamento de cada coluna, dois pontos `:` podem ser adicionados à linha do cabeçalho da seguinte forma: - -``` -| Alinhado à esquerda | Centralizado | Alinhado à direita | -| :-------- | :-------: | --------: | -| Maçãs | Vermelho | 5000 | -| Bananas | Amarelo| 75 | -``` -**Isso é renderizado como:** - -| Alinhado à esquerda | Centralizado | Alinhado à direita | -| :-------- | :-------: | --------: | -| Maçãs | Vermelho | 5000 | -| Bananas | Amarelo| 75 | - - -## Limitações do Markdown -Embora o Markdown esteja se tornando cada vez mais popular, principalmente para estilizar documentos que podem ser visualizados na web, muitas pessoas e editores ainda esperam documentos tradicionais do Word, PDFs e outros formatos de arquivo. Isso pode ser atenuado parcialmente com ferramentas de conversão de linha de comandos, como o [Pandoc](https://pandoc.org/); no entanto, certos recursos do processador de texto, como o controle de alterações, ainda não são suportados. Visite a lição do Programming Historian (em inglês) de título [Sustainable authorship in plain text using Pandoc and Markdown](/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown) para obter mais informações sobre Pandoc. - - -## Conclusão -Markdown é uma ferramenta útil e um meio-termo entre arquivos de texto simples não estilizados e documentos legados de processadores de texto. Sua sintaxe simples é rápida de aprender e legível por si só e também quando renderizada em HTML e outros tipos de documentos. Por fim, escolher escrever seus próprios documentos em Markdown significa que eles serão utilizáveis e legíveis a longo prazo. +--- +title: Introdução ao Markdown +slug: introducao-ao-markdown +layout: lesson +date: 2015-11-13 +translation_date: 2021-03-30 +authors: +- Sarah Simpkin +reviewers: +- John Fink +- Nancy Lemay +editors: +- Ian Milligan +translator: +- João Gilberto Neves Saraiva +translation-editor: +- Joana Vieira Paulino +translation-reviewer: +- Josir Cardoso Gomes +- Bruno Martins +difficulty: 1 +review-ticket: https://github.com/programminghistorian/ph-submissions/issues/363 +activity: presenting +topics: [data-management] +abstract: "Nesta lição é apresentado o Markdown, uma sintaxe baseada em texto simples para formatação de documentos. É explicado porque ele é usado, como formatar ficheiros Markdown e como pré-visualizar documentos formatados em Markdown na web." +original: getting-started-with-markdown +avatar_alt: Letras ornamentadas num manual tipográfico +doi: 10.46430/phpt0008 +--- + +{% include toc.html %} + + + + +### Objetivos da lição +Nesta lição, é apresentado o Markdown, uma sintáxe baseada em texto simples para formatação de documentos. É explicado porque ele é usado, como formatar ficheiros Markdown e como visualizar documentos formatados em Markdown na web. + +Como as lições do *Programming Historian em português* são submetidas em ficheiros Markdown, incluí exemplos do *Programming Historian* sempre que possível. Espero que este guia seja útil para quem estiver pensando em criar uma lição para este site. + +## O que é Markdown? + +Criado em 2004 por [John Gruber](https://daringfireball.net/projects/markdown/ "Markdown on Daring Fireball"), Markdown se refere a: (1) um modo de formatação de ficheiros de texto, e também (2) uma [ferramenta Perl](https://pt.wikipedia.org/wiki/Perl) para converter ficheiros Markdown em HTML. Nesta lição, nosso foco será na primeira parte, aprender a escrever ficheiros utilizando a sintaxe Markdown. + +Ficheiros de texto simples têm muitas vantagens sobre outros formatos. Uma delas é que são legíveis em praticamente qualquer dispositivo. Eles também resistem ao tempo melhor do que outros tipos de ficheiro - se abrir um documento salvo num formato de um processador de texto legado (como docx), estará familiarizado com os desafios de compatibilidade envolvidos. + +Utilizando a sintaxe Markdown, você será capaz de produzir ficheiros que são legíveis como texto simples e também prontos para ser estilizados em outras plataformas. Vários sistemas de blogs, geradores de sites estáticos e sites como o [GitHub](https://github.com "GitHub") também suportam Markdown, e renderizam esses ficheiros em HTML para exibição na web. Além disso, ferramentas como o Pandoc podem converter ficheiros de Markdown para outros formatos e vice-versa. Para mais informações sobre o Pandoc, visite a lição (em inglês) [Sustainable authorship in plain text using Pandoc and Markdown](/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown), produzida por Dennis Tenen e Grant Wythoff. + +## Sintaxe Markdown +Ficheiros Markdown são salvos com a extensão `.md` e podem ser abertos num editor de texto como TextEdit, Notepad, Sublime Text ou Vim. Diversos websites e plataformas de publicação dispôem de editores web e/ou extensões para entrada de texto utilizando sintaxe Markdown. + +Neste tutorial, vamos praticar a sintaxe Markdown no navegador utilizando o [StackEdit](https://stackedit.io). Nele é possível inserir um texto formatado em Markdown na esquerda e ver imediatamente a versão renderizada dele à direita. + +Como todas as lições do *Programming Historian em português* são escritas em Markdown, é possível examinar esses ficheiros no StackEdit também. No [StackEdit editor](https://stackedit.io/app), clique no `#` no canto superior direito para abrir o menu. Escolha `Import/Export` e depois `Import Markdown`, então cole o conteúdo da URL a seguir na janela do lado esquerdo para exibir a lição "Preservar os seus dados de investigação" no editor: + +``` +https://raw.githubusercontent.com/programminghistorian/jekyll/gh-pages/pt/licoes/preservar-os-seus-dados-de-investigacao.md +``` +Note que enquanto o painel direito apresenta uma renderização mais elegante do texto, o ficheiro original à esquerda fica ainda bem legível. + +Agora, vamos apronfundar conhecimentos escrevendo nós mesmos com a sintaxe Markdown. Crie um novo documento no StackEdit clicando no ícone de pasta no canto superior esquerdo e escolha a opção `New file`. Você pode inserir um título para o documento na caixa de texto no topo da página. + +### Cabeçalhos +Quatro níveis de cabeçalho estão disponíveis no Markdown e são indicatos pelo número de `#` antes do texto do título. Copie os exemplos a seguir na caixa de texto à sua esquerda. + +``` +# Primeiro nível de cabeçalho +## Segundo nível de cabeçalho +### Terceiro nível de cabeçalho +#### Quarto nível de cabeçalho +``` + +O primeiro e segundo níveis de cabeçalho podem ser inseridos da seguinte forma: + +``` +Primeiro nível de cabeçalho +======= + +Segundo nível de cabeçalho +---------- +``` + +**Eles serão renderizados como:** + +# Primeiro nível de cabeçalho + +## Segundo nível de cabeçalho + +### Terceiro nível de cabeçalho + +#### Quarto nível de cabeçalho + + +Observe como a sintaxe do Markdown permanece compreensível mesmo na versão de texto simples. + + +### Parágrafos & Quebras de linha + +Escreva a frase a seguir na caixa de texto: + +``` +Bem-vindo ao Programming Historian em português. + +Hoje vamos aprender sobre a sintaxe Markdown. +Esta frase é separada da anterior por uma quebra de linha simples. +``` +**Isso é renderizado como** + +Bem-vindo ao Programming Historian em português. + +Hoje vamos aprender sobre a sintaxe Markdown. +Esta frase é separada da anterior por uma quebra de linha simples. + + +Os parágrafos devem ser separados por uma linha vazia. Deixe uma linha em branco entre `Markdown.` e `Esta` para ver como isso funciona. Em algumas implementações de Markdown, uma quebra de linha simples pode ser indicada com dois espaços vazios no fim de uma linha. Isso não é aplicado na formatação Markdown do [GitHub](https://docs.github.com/pt/github/writing-on-github/basic-writing-and-formatting-syntax) que o StackEdit utiliza como padrão. + + +### Acrescentando Ênfase + +O texto pode ser posto em itálico colocando a palavra entre os símbolos `*` ou `_`. Da mesma forma, o texto em negrito pode ser escrito colocando a palavra entre `**` ou `__`. + +Tente adicionar ênfase à frase usando estes métodos: + +``` +Estou **muito** animado com os tutoriais do _Programming Historian_. +``` + +**Isto é renderizado como:** + +Estou **muito** animado com os tutoriais do _Programming Historian_. + +### Criando Listas + +Markdown inclui suporte para listas ordenadas ou não. Tente digitar a lista a seguir na caixa de texto: + +``` +Lista de compras +---------- +* Frutas + * Maçãs + * Laranjas + * Uvas +* Laticínios + * Leite + * Queijo +``` +Identar o `*` permite criar itens alinhados. + +**Isso é renderizado como:** + +Lista de compras +---------- +* Frutas + * Maçãs + * Laranjas + * Uvas +* Laticínios + * Leite + * Queijo + +Listas ordenadas são escritas numerando cada linha. Mais uma vez, o objetivo do Markdown é produzir documentos que sejam legíveis como texto simples e que possam ser transformados noutros formatos. + +``` +Lista de afazeres +---------- +1. Terminar o tutorial de Markdown +2. Ir fazer compras +3. Preparar o almoço +``` + +**Isso é renderizado como:** + +Lista de afazeres +---------- +1. Terminar o tutorial de Markdown +2. Ir fazer compras +3. Preparar o almoço + +### Trechos de código +Representar trechos de código de maneira diferente do resto de um documento é uma boa prática pois melhora a legibilidade. Comumente, códigos são representandos em Markdown com texto monoespaçado. Uma vez que o Markdown não faz distinção entre fontes, codígos são representandos entre caractéres de crase como `` ` ``. Por exemplo, `` `
    ` ``. Blocos inteiros de código são escritos digitando três caracteres `` ` `` antes e depois de cada bloco. Na janela de visualização do StackEdit, isso será renderizado como uma caixa sombreada com texto em uma fonte monoespaçada. + +Digite o trecho a seguir na caixa de texto: + + ``` + + + Título do Website + + + + + ``` + +**Isso é renderizado como:** + +``` + + + Título do Website + + + + +``` + +Observe como o bloco de código é renderizado em uma fonte monoespaçada. + +### Blocos de citações + +Adicionar um `>` antes de qualquer parágrafo para renderizá-lo como um elemento de bloco de citação. + +Tente digitar o seguinte texto na caixa de texto: + +``` +> Olá, sou um parágrafo de texto encerrado em um bloco de citação. Observe como estou deslocado da margem esquerda. +``` + +**Isso é renderizado como:** + +> Olá, sou um parágrafo de texto encerrado em um bloco de citação. Observe como estou deslocado da margem esquerda. + +### Links + +Os links podem ser escritos em dois estilos. + +Os links embutidos são escritos colocando o texto do link entre colchetes primeiro e, em seguida, incluindo a URL e o texto alternativo opcional entre parêntesis curvos. + +`Para mais tutoriais, por favor visite o [Programming Historian em português](/pt/).` + +**Isso é renderizado como:** + +Para mais tutoriais, por favor visite o [Programming Historian em português](/pt/) + +Os links de referência são úteis para notas de rodapé e podem manter seu documento de texto simples mais organizado. Eles são escritos com um conjunto adicional de colchetes para estabelecer um rótulo de ID de link. + +`Um exemplo é o website do [Programming Historian em português][1].` + +Você deve então adicionar o URL a outra parte do documento: + +`[1]: http://programminghistorian.org/pt/ "The Programming Historian em português".` + +**Isso é renderizado como:** + +Um exemplo é o website do [_Programming Historian em português_][1] + +[1]: /pt/ "The Programming Historian em português" + + +### Imagens + +As imagens podem ser referenciadas usando `!` seguido por algum texto alternativo entre colchetes. Depois, a URL da imagem e um título opcional. Eles não serão exibidos em seu documento de texto simples, mas serão incorporados em uma página HTML renderizada. + +`![Wikipedia logo](https://upload.wikimedia.org/wikipedia/en/8/80/Wikipedia-logo-v2.svg "Wikipedia logo")` + +**Isso é renderizado como:** + +![Wikipedia logo](https://upload.wikimedia.org/wikipedia/en/8/80/Wikipedia-logo-v2.svg "Wikipedia logo") + +#### Linhas Horizontais + +Linhas horizontais são produzidas quando três ou mais `-`,` * `ou` _` são incluídos em sequência, independentemente do número de espaços entre eles. Todas as combinações a seguir renderizarão linhas horizontais: + +``` +___ +* * * +- - - - - - +``` + +**Isso é renderizado como:** + +--- +*** +- - - - - - - + +### Tabelas + +Originalmente o Markdown não inclui tabelas. No entanto, alguns sites e aplicativos usam variantes do Markdown que podem incluir tabelas e outros recursos especiais. É o caso da formatação utilizada no [GitHub](https://docs.github.com/pt/github/writing-on-github/organizing-information-with-tables) que é usada para renderizar arquivos `.md` a partir do GitHub. + +Para criar uma tabela dentro do GitHub, use barras `|` para separar colunas e hifens `-` entre seus cabeçalhos e o resto do conteúdo da tabela. Embora as barras sejam realmente necessárias entre as colunas, é possível usá-las em qualquer lado da tabela para obter uma aparência melhor. As células podem conter qualquer comprimento de conteúdo e não é necessário que as barras sejam alinhadas verticalmente umas com as outras. + +``` +| Título 1 | Título 2 | Título 3 | +| --------- | --------- | --------- | +| Linha 1, coluna 1 | Linha 1, coluna 2 | Linha 1, coluna 3| +| Linha 2, coluna 1 | Linha 2, coluna 2 | Linha 2, coluna 3| +| Linha 3, coluna 1 | Linha 3, coluna 2 | Linha 3, coluna 3| +``` + +**Isso é renderizado como:** + +| Título 1 | Título 2 | Título 3 | +| --------- | --------- | --------- | +| Linha 1, coluna 1 | Linha 1, coluna 2 | Linha 1, coluna 3| +| Linha 2, coluna 1 | Linha 2, coluna 2 | Linha 2, coluna 3| +| Linha 3, coluna 1 | Linha 3, coluna 2 | Linha 3, coluna 3| + +Para especificar o alinhamento de cada coluna, dois pontos `:` podem ser adicionados à linha do cabeçalho da seguinte forma: + +``` +| Alinhado à esquerda | Centralizado | Alinhado à direita | +| :-------- | :-------: | --------: | +| Maçãs | Vermelho | 5000 | +| Bananas | Amarelo| 75 | +``` +**Isso é renderizado como:** + +| Alinhado à esquerda | Centralizado | Alinhado à direita | +| :-------- | :-------: | --------: | +| Maçãs | Vermelho | 5000 | +| Bananas | Amarelo| 75 | + + +## Limitações do Markdown +Embora o Markdown esteja se tornando cada vez mais popular, principalmente para estilizar documentos que podem ser visualizados na web, muitas pessoas e editores ainda esperam documentos tradicionais do Word, PDFs e outros formatos de arquivo. Isso pode ser atenuado parcialmente com ferramentas de conversão de linha de comandos, como o [Pandoc](https://pandoc.org/); no entanto, certos recursos do processador de texto, como o controle de alterações, ainda não são suportados. Visite a lição do Programming Historian (em inglês) de título [Sustainable authorship in plain text using Pandoc and Markdown](/en/lessons/sustainable-authorship-in-plain-text-using-pandoc-and-markdown) para obter mais informações sobre Pandoc. + + +## Conclusão +Markdown é uma ferramenta útil e um meio-termo entre arquivos de texto simples não estilizados e documentos legados de processadores de texto. Sua sintaxe simples é rápida de aprender e legível por si só e também quando renderizada em HTML e outros tipos de documentos. Por fim, escolher escrever seus próprios documentos em Markdown significa que eles serão utilizáveis e legíveis a longo prazo. diff --git a/pt/licoes/introducao-dados-abertos-conectados.md b/pt/licoes/introducao-dados-abertos-conectados.md index d1b9122faa..daf44e2a2d 100644 --- a/pt/licoes/introducao-dados-abertos-conectados.md +++ b/pt/licoes/introducao-dados-abertos-conectados.md @@ -1,420 +1,420 @@ ---- -title: Introdução aos Dados Abertos Conectados -layout: lesson -collection: lessons -slug: introducao-dados-abertos-conectados -original: intro-to-linked-data -date: 2013-08-05 -translation_date: 2022-11-21 -authors: -- Jonathan Blaney -reviewers: -- Terhi Nurmikko-Fuller -- Matthew Lincoln -editors: -- Adam Crymble -translator: -- Francisco Nabais -translation-editor: -- Joana Vieira Paulino -translation-reviewer: -- Bruno Almeida -- Daniel Bonatto Seco -lesson-testers: David Valentine -tested_date: 2025-02-28 -difficulty: 1 -review-ticket: https://github.com/programminghistorian/ph-submissions/issues/428 -activity: acquiring -topics: [lod] -abstract: "Este tutorial apresenta os principais conceitos de dados abertos conectados (*Linked Open Data*), incluindo URIs, ontologias, formatos RDF e uma breve introdução à linguagem de consulta de gráficos SPARQL." -avatar_alt: Um homem velho com uma mulher em cada braço -doi: 10.46430/phpt0033 ---- - -{% include toc.html %} - -Nota de Tradução: Alguns termos, por aparecerem constantemente e facilitarem a interpretação das imagens, apenas foram propositadamente traduzidos uma vez e serão colocados entre parênteses. Alertamos também para a existência de alguns exemplos que não foram propositadamente traduzidos para facilitar a sua introdução nos programas apresentados. - - -Introdução e Âmbito da lição ------------------------------ - -Esta lição oferece uma breve e concisa introdução aos [dados abertos conectados](https://pt.wikipedia.org/wiki/Linked_data#The_Linking_Open_Data_Project) (*Linked Open Data* ou LOD). Não é necessário conhecimento prévio para realizar este tutorial. Os leitores deverão obter uma compreensão clara dos conceitos por detrás dos dados abertos conectados, como são utilizados e como são criados. O tutorial está dividido em cinco partes, além de leituras adicionais: - -1. Dados abertos conectados: o que são? -2. O papel do [Identificador Uniforme de Recurso](https://pt.wikipedia.org/wiki/URI) (*Uniform Resource Identifier* ou URI) -3. Como o LOD organiza o conhecimento: [ontologias](https://pt.wikipedia.org/wiki/Ontologia_(ci%C3%AAncia_da_computa%C3%A7%C3%A3o)) -4. A [Estrutura de Descrição de Recursos](https://pt.wikipedia.org/wiki/Resource_Description_Framework) (*Resource Description Framework* ou RDF) e formatos de dados -5. Consulta de dados abertos conectados com [SPARQL](https://pt.wikipedia.org/wiki/SPARQL) -6. Outras leituras e recursos - -A conclusão deste tutorial poderá levar algumas horas e poderá ser útil reler algumas secções para solidificar a sua compreensão. Os termos técnicos foram ligados à sua página correspondente na Wikipedia e encoraja-se a que faça uma pausa e leia sobre termos que considere desafiadores. Depois de ter aprendido alguns dos princípios-chave do LOD, a melhor maneira de melhorar e solidificar esse conhecimento é praticar. Este tutorial fornece oportunidades para fazê-lo. No final da lição, deverá compreender os princípios básicos de LOD, incluindo termos e conceitos-chave. - -Se precisar aprender a como explorar LOD usando a linguagem de consulta [SPARQL](https://pt.wikipedia.org/wiki/SPARQL), recomenda-se a lição de Matthew Lincoln ['*Using SPARQL to access Linked Open Data*'](/en/lessons/retired/graph-databases-and-SPARQL) (em inglês) (Nota: a lição deste link encontra-se desatualizada e já não é mantida pelo _Programming Historian_. Por favor veja a nota inicial dessa página sobre a razão dessa lição ter sido retirada), que segue praticamente a visão geral fornecida nesta lição. - -Para proporcionar aos leitores uma base sólida dos princípios básicos de LOD, este tutorial não oferecerá uma cobertura abrangente de todos os seus conceitos. Estes **não** serão o foco desta lição: - -1. [Web Semântica](https://pt.wikipedia.org/wiki/Web_sem%C3%A2ntica) e [raciocínio semântico](https://en.wikipedia.org/wiki/Semantic_reasoner) (em inglês) de [datasets](https://pt.wikipedia.org/wiki/Conjunto_de_dados). Um raciocinador semântico deduziria que Jorge VI é o irmão ou meio-irmão de Eduardo VIII, dado que: a) Eduardo VIII é o filho de Jorge V e b) Jorge VI é o filho de Jorge V. Este tutorial não se foca neste tipo de tarefa. -2. Criação e *upload* de conjuntos de dados abertos conectados ligados à [Nuvem de dados conectados](http://linkeddatacatalog.dws.informatik.uni-mannheim.de/state/) (em inglês). Partilhar LOD é um princípio importante, que é encorajado abaixo. Contudo, os aspetos práticos de contribuir com LOD para a nuvem de dados conectados estão além do âmbito desta lição. Alguns recursos que podem ajudar a começar esta tarefa estão disponíveis no final deste tutorial. - -## Dados abertos conectados: O que são? - -LOD é informação estruturada num formato destinado a máquinas e, por isso, não é necessariamente um conceito de fácil definição. É importante não perder a motivação com esta informação já que, ao compreender os princípios, pode colocar uma máquina a fazer uma leitura autónoma. - -Se todos os datasets fossem publicados abertamente e utilizassem o mesmo formato para estruturar a informação, seria possível interrogá-los todos de uma só vez. A análise de grandes volumes de dados é potencialmente muito mais poderosa do que qualquer pessoa que utilize os seus próprios datasets individuais espalhados pela web nos chamados [silos de informação](https://en.wikipedia.org/wiki/Information_silo) (em inglês). Estes datasets interoperáveis são aquilo para que os profissionais de LOD estão a trabalhar. - -Para atingir este objetivo, ao trabalhar com LOD, é importante recordar três princípios: - -1. **Utilizar um formato padrão de LOD reconhecido**. Para que o LOD funcione, os dados devem ser [estruturados](https://pt.wikipedia.org/wiki/Estrutura_de_dados), utilizando normas reconhecidas para que os computadores que interrogam os dados possam processá-los de forma consistente. Há vários formatos de LOD, alguns dos quais são discutidos abaixo. -2. **Referir uma entidade da mesma forma que outras pessoas o fazem**. Se existirem dados sobre a mesma pessoa/local/coisa em dois ou mais locais, certifique-se de que se refere à pessoa/local/coisa da mesma forma em todos os casos. -3. **Publicar os seus dados abertamente**. Qualquer pessoa deverá poder utilizar os seus dados sem pagar uma taxa e num formato que não exija [software proprietário](https://pt.wikipedia.org/wiki/Software_propriet%C3%A1rio). - -Comecemos com um exemplo de dados sobre uma pessoa, utilizando uma abordagem comum [par atributo-valor](https://en.wikipedia.org/wiki/Attribute%E2%80%93value_pair) (em inglês) típica em computação: - - pessoa=número - -Neste caso, o 'atributo' é uma pessoa. E o valor - ou quem é essa pessoa - é representado por um número. O número pode ser atribuído aleatoriamente ou pode ser utilizado um número que já esteja associado a essa pessoa. Esta última abordagem tem grandes vantagens: se todos os que criarem um dataset que menciona essa pessoa utilizarem *exatamente o mesmo número, exatamente no mesmo formato*, então podemos encontrar esse indivíduo de forma fiável em qualquer dataset aderindo a essas regras. Vamos criar um exemplo usando Jack Straw: tanto o nome de um rebelde inglês do século XIV, como o de um ministro de gabinete britânico proeminente na administração de Tony Blair. É útil ser capaz de diferenciar as duas pessoas que partilham o mesmo nome. - -Utilizando o modelo acima, no qual cada pessoa é representada por um número único, vamos atribuir ao ministro britânico Jack Straw o número `64183282`. O seu par atributo-valor ficaria então com este aspeto: - - pessoa=64183282 - -E vamos atribuir a Jack Straw, descrito no *[Oxford Dictionary of National Biography](http://www.oxforddnb.com)* (em inglês) como 'o enigmático líder rebelde', o número `33059614`, fazendo com que o seu par atributo-valor se pareça com isto: - - pessoa=33059614 - -Desde que todos os que fazem LOD utilizem estes dois números para se referirem aos respetivos Jack Straws, podemos agora procurar a pessoa `64183282` num conjunto de dados abertos conectados e podemos estar confiantes de que estamos a obter a pessoa certa - neste caso, o ministro. - -Os pares atributo-valor também podem armazenar informações sobre outros tipos de entidades: lugares, por exemplo. Jack Straw, o político moderno, era membro do Parlamento britânico, representando o assento de Blackburn. Há mais do que um lugar no Reino Unido chamado Blackburn, para não mencionar outros Blackburn em todo o mundo. Usando os mesmos princípios acima delineados, podemos desambiguar entre os vários Blackburns, atribuindo um identificador único ao lugar correto: Blackburn em Lancashire, Inglaterra. - - Lugar=2655524 - -Neste momento pode estar pensando, "isso é o que um catálogo de biblioteca faz". É verdade que a ideia-chave aqui é a do [ficheiro de autoridade](https://pt.wikipedia.org/wiki/Controle_de_autoridade), central na biblioteconomia (um ficheiro de autoridade é uma lista definitiva de termos que podem ser utilizados num contexto particular, por exemplo, quando se cataloga um livro). Nos dois exemplos acima descritos, utilizamos ficheiros de autoridade para atribuir números (os identificadores únicos) aos Jacks e ao Blackburn. Os números que utilizamos para os dois Jack Straws provêm do [Virtual International Authority File](https://viaf.org) (em inglês) (VIAF) (Arquivo Internacional de Autoridade Virtual), que é mantido por um consórcio de bibliotecas de todo o mundo, de modo a tentar resolver o problema da miríade de maneiras pelas quais a mesma pessoa pode ser referida. O identificador único que utilizamos para o distrito eleitoral de Blackburn provém da [GeoNames](http://www.geonames.org/) (em inglês), uma base de dados geográfica gratuita. - -Vamos tentar ser mais precisos com o que, neste caso, queremos dizer com 'Blackburn'. Jack Straw representou o círculo eleitoral (uma área representada por um único membro do parlamento) de Blackburn, que mudou os seus limites ao longo do tempo. O projeto "[*Digging Into Linked Parliamentary Data*](https://repository.jisc.ac.uk/6544/1/DiLiPaD_final_report_1.pdf)" (Dilipad) (em inglês), no qual trabalhei, produziu identificadores únicos para as filiações partidárias e circunscrições eleitorais para cada membro do parlamento. Neste exemplo, Jack Straw representou o distrito eleitoral conhecido como 'Blackburn' na sua encarnação pós-1955: - - blackburn1955-presente - -Como o VIAF é um ficheiro de autoridade respeitado e bem mantido, fornece um conjunto óbvio de identificadores a utilizar para Jack Straw. Como o distrito eleitoral representado por Straw estava perfeitamente coberto pelos ficheiros de autoridade criados pelo projeto Dilipad, também era um ficheiro de autoridade lógico a utilizar. Infelizmente, nem sempre é tão óbvio qual das listas publicadas online é a melhor para se usar. Uma pode ser mais utilizada do que outra, mas esta última pode ser mais abrangente para um determinado fim. O GeoNames funcionaria melhor do que os identificadores da Dilipad em alguns casos. Haverá também casos em que não se consegue encontrar um dataset com essa informação. Por exemplo, se quiser escrever pares atributo-valor sobre si próprio e as suas relações familiares imediatas terá de inventar os seus próprios identificadores. - -Esta falta de ficheiros de autoridade coerentes é um dos maiores desafios que o LOD enfrenta neste momento. [Tim Berners-Lee](https://pt.wikipedia.org/wiki/Tim_Berners-Lee), que inventou uma forma de ligar documentos em rede e criou assim a World Wide Web, um dos principais proponentes de LOD, para encorajar uma maior utilização de dados conectados, sugeriu um "[sistema de classificação de cinco estrelas](https://www.w3.org/DesignIssues/LinkedData.html)" (em inglês) para que todos avançassem o mais longe possível em direção ao LOD. Essencialmente, Tim Berners-Lee apoia a publicação aberta de dados, especialmente ao utilizar formatos abertos e normas públicas, mas o melhor é que os dados se liguem também aos dados de outras pessoas. - -Com os identificadores únicos atribuídos a todos os elementos, o próximo passo fundamental na criação de LOD é ter uma forma de *descrição* da relação entre Jack Straw (`64183282`) e Blackburn (`blackburn1955-presente`). Em LOD, as relações são expressas utilizando o que é conhecido como '[tripla semântica](https://en.wikipedia.org/wiki/Semantic_triple)' (em inglês). Vamos fazer uma tripla semântica que represente a relação entre Jack Straw e o seu distrito eleitoral: - - pessoa:64183282 papel:representaNoParlamentoBritânicodistritoeleitoral:"blackburn1955-presente" . - -A apresentação (ou [sintaxe](https://pt.wikipedia.org/wiki/Sintaxe)) das triplas semânticas, incluindo a pontuação utilizada acima, será discutida mais tarde, na secção sobre RDF e formatos de dados. Por agora, vamos focar-nos na estrutura básica. A tripla semântica, não surpreendentemente, tem três partes. Estas são convencionalmente referidas como sujeito (*subject*), predicado (*predicate*) e objeto (*object*): - -| o sujeito | o predicado | o objeto | -| --------------- | ------------------------- | ----------------------- | -| pessoa 64183282 | representadaNoParlamentoBritânico | "blackburn1955-presente" | - -A forma tradicional de representar uma tripla semântica em forma esquemática é a seguinte (em inglês): - -{% include figure.html filename="pt-tr-introducao-dados-abertos-conectados-01.png" alt="Imagem com a representação de uma tripla semântica" caption="Figura 1. Forma tradicional de representar uma tripla semântica." %} - -Assim, a nossa tripla semântica do Jack Straw, apresentado de forma mais legível para o ser humano, poderia assumir a seguinte forma: - -{% include figure.html filename="pt-tr-introducao-dados-abertos-conectados-02.png" alt="Imagem com a representação de uma tripla semântica aplicada ao exemplo de Jack Straw" caption="Figura 2. Diagrama da tripla semântica que demonstra que Jack Straw representava Blackburn." %} - -Por enquanto, é importante fixar três pontos-chave: - -- O LOD deve estar aberto e disponível para qualquer pessoa na Internet (caso contrário, não está "aberto") -- Os defensores do LOD têm como objetivo normalizar as formas de referência a entidades únicas -- O LOD consiste em triplas semânticas que descrevem as relações entre entidades - -## O papel do *Uniform Resource Identifier* (URI) - -Uma parte essencial de LOD é o [Identificador Uniforme de Recurso](https://pt.wikipedia.org/wiki/URI)(*Uniform Resource Identifier* ou URI). O URI é uma forma única e fiável de representar uma entidade (uma pessoa, um objeto, uma relação, etc.), de uma forma que é utilizável por todos no mundo. - -Na secção anterior, utilizamos dois números diferentes para identificar os diferentes Jack Straws. - - pessoa="64183282" - - pessoa="33059614" - -O problema é que em todo o mundo existem muitas bases de dados que contêm pessoas com estes números e são, provavelmente, todas pessoas diferentes. Fora do nosso contexto imediato, estes números não identificam indivíduos únicos. Vamos tentar resolver isso. Aqui estão estes mesmos identificadores, mas como URIs: - - http://viaf.org/viaf/64183282/ - - http://viaf.org/viaf/33059614/ - -Tal como o número único desambiguou os nossos dois Jack Straws, o URI completo acima ajuda-nos a desambiguar entre todos os diferentes ficheiros de autoridade lá fora. Neste caso, é evidente que estamos a utilizar o VIAF como o nosso ficheiro de autoridade. Com certeza, já viu esta forma de desambiguação muitas vezes na web. Existem muitos websites em todo o mundo com páginas chamadas `/home` ou `/faq`. Mas não há confusão porque o [domínio](https://pt.wikipedia.org/wiki/Nome_de_dom%C3%ADnio) (a primeira parte do [Localizador Uniforme de Recursos](https://pt.wikipedia.org/wiki/URL) (*Uniform Resource Locator* ou URL) - ex. `bbc.co.uk`) é único, portanto, todas as páginas que fazem parte desse domínio são únicas em outras páginas `/faq` de outros websites. No endereço `http://www.bbc.co.uk/faqs` é a parte `bbc.co.uk` que torna as páginas subsequentes únicas. Isto é tão óbvio para as pessoas que utilizam a web a toda a hora que não pensam sobre isso. Provavelmente, também sabe que se quiser criar um website chamado `bbc.co.uk` não conseguirá, porque esse nome já foi registado com a autoridade apropriada, que é o [Sistema de Nomes de Domínio](https://pt.wikipedia.org/wiki/Sistema_de_Nomes_de_Dom%C3%ADnio) (*Domain Name System*). O registo garante a singularidade. Os URIs também têm de ser únicos. - -Embora os exemplos acima se pareçam com URLs, também é possível construir um URI que não se pareça nada com um URL. Temos muitas formas de identificar pessoas e coisas de forma única e raramente pensamos ou nos preocupamos com isso. Os códigos de barras, números de passaporte, até mesmo os códigos postais são concebidos para serem únicos. Os números de telefone são frequentemente colocados como placas de loja precisamente porque são únicos. Todos eles podem ser utilizados como URIs. - -Quando criamos URIs para as entidades descritas pelo projeto '[Tobias](https://gtr.ukri.org/projects?ref=AH%2FN003446%2F1#/tabOverview)' (em inglês), escolhemos uma estrutura do tipo URL e escolhemos utilizar o nosso espaço web institucional, pondo de lado `data.history.ac.uk/tobias-project/` como um lugar dedicado à hospedagem destes URIs. Ao colocá-lo em `data.history.ac.uk` em vez de `history.ac.uk`, houve uma separação clara entre URIs e as páginas do website. Por exemplo, um dos URIs do projeto Tobias era 'http://data.history.ac.uk/tobias-project/person/15601'. Embora o formato dos URIs acima mencionados seja o mesmo que um URL, eles não se ligam a websites (tente colá-lo num navegador web). Muitas pessoas novas no LOD acham isto confuso. Todos os URLs são URIs, mas nem todos os URIs são URLs. (nota de tradução: tendo em conta que o site original do projeto Tobias já não se encontra disponível, o leitor da lição deve entender os exemplos aqui indicados como meramente ilustrativos daquilo que o autor pretende demonstrar) Um URI pode descrever qualquer coisa, enquanto o URL descreve a localização de algo na web. Assim, um URL diz-lhe a localização de uma página web, de um ficheiro ou algo semelhante. Um URI faz apenas o trabalho de identificar algo. Tal como o Número internacional Normalizado do Livro (International Standard Book Number ou [ISBN](https://www.iso.org/standard/36563.html) (em inglês) `978-0-1-873354-6` identifica exclusivamente uma edição de capa dura de _Baptism, Brotherhood and Belief in Reformation Germany_, de Kat Hill, mas não diz onde obter uma cópia. Para isso precisaria de algo como um [número de acesso](https://pt.wikipedia.org/wiki/N%C3%BAmero_de_acesso_(biblioteconomia)), que lhe dá uma localização exata de um livro numa prateleira de uma biblioteca específica. - -Há um pouco de jargão em torno de URIs. As pessoas falam sobre se são ou não [desreferenciáveis](https://pt.wikipedia.org/wiki/Refer%C3%AAncia_(ci%C3%AAncia_da_computa%C3%A7%C3%A3o)). Isso apenas significa que *podemos transformar uma referência abstrata em algo diferente?* Por exemplo, se colarmos um URI na barra de endereços de um browser, será que ele encontra algo? O VIAF URI para o historiador Simon Schama é: - - http://viaf.org/viaf/46784579 - -Se o colocarmos no browser, receberemos de volta uma página web sobre Simon Schama que contém dados estruturados sobre ele e a sua história editorial. Isto é muito útil por um motivo. A partir do URI não é óbvio quem ou mesmo o que é que está a ser referido. Da mesma forma, se tratarmos um número de telefone (com código internacional) como o URI de uma pessoa, então deve ser desreferenciável. Alguém pode atender o telefone e pode até ser Schama. - -Mas isto não é essencial. Muitos URIs não são desreferenciáveis, como no exemplo acima do projeto Tobias. Não se pode encontrá-lo em lado nenhum; é uma convenção. - -O exemplo do VIAF leva-nos a outra coisa importante sobre os URIs: não os invente a não ser que tenha de o fazer. As pessoas e organizações têm feito esforços para construir boas listas de URI e o LOD não vai funcionar eficazmente se as pessoas duplicarem esse trabalho criando novos URIs desnecessariamente. Por exemplo, o VIAF tem o apoio de muitas bibliotecas internacionais. Se quiser construir URIs para pessoas, o VIAF é uma escolha muito boa. Se não conseguir encontrar algumas pessoas no VIAF, ou noutras listas de autoridade, só então poderá precisar fazer a sua própria. - -## Como o LOD organiza o conhecimento: ontologias - -Pode não ter sido óbvio a partir das triplas semânticas individuais que analisamos na secção anterior, mas o LOD pode responder a perguntas complexas. Quando se juntam as triplas semânticas, estas formam um [Mapa conceitual](https://pt.wikipedia.org/wiki/Mapa_conceitual), devido à forma como as triplas semânticas se interligam. Suponhamos que queremos encontrar uma lista de todas as pessoas que foram alunos do compositor Franz Liszt. Se a informação estiver em triplas semânticas de dados conectados sobre pianistas e os seus professores, podemos descobrir o que procuramos com uma consulta (veremos esta linguagem de consulta, chamada SPARQL, na secção final). - -Por exemplo, o pianista Charles Rosen foi aluno do pianista Moriz Rosenthal, que foi aluno de Franz Liszt. Vamos agora expressar isto em duas triplas semânticas (vamos cingir-nos às sequências de caracteres para os nomes em vez dos números de identificação, para tornar os exemplos mais legíveis): - - "Franz Liszt" ensinouPianoAo "Moriz Rosenthal" . - "Moriz Rosenthal" ensinouPianoAo "Charles Rosen" . - -Poderíamos igualmente ter criado as nossas triplas semânticas desta forma: - - "Charles Rosen" aprendeuPianoCom "Moriz Rosenthal" . - "Moriz Rosenthal" aprendeuPianoCom "Franz Liszt" . - -Estamos a inventar exemplos simplesmente para fins de ilustração, mas se quiser ligar os seus dados a outros datasets na "nuvem de dados conectados" deve olhar para as convenções que são utilizadas nesses datasets e fazer o mesmo. Na verdade, esta é uma das características mais úteis de LOD porque muito do trabalho já foi feito. As pessoas têm passado muito tempo a desenvolver formas de modelar a informação dentro de uma determinada área de estudo e a pensar sobre como as relações dentro dessa área podem ser representadas. Estes modelos são geralmente conhecidos como ontologias. Uma ontologia é uma abstração que permite a representação de um conhecimento particular sobre o mundo. Neste sentido, estas são bastante recentes e foram concebidas para fazer o que uma [taxonomia](https://pt.wikipedia.org/wiki/Taxonomia_(geral)) hierárquica faz (pense na classificação das espécies na [Taxonomia de Lineu](https://pt.wikipedia.org/wiki/Taxonomia_de_Lineu), mas de uma forma mais flexível. - -Uma ontologia é mais flexível porque não é hierárquica. Visa representar a fluidez do mundo real, onde as coisas podem ser relacionadas umas com as outras de formas mais complexas do que quando são representadas por uma estrutura hierárquica em forma de árvore. Em vez disso, uma ontologia é mais parecida com uma teia de aranha. - -O que quer que pretenda representar com LOD, sugerimos que encontre um vocabulário existente e que o utilize, em vez de tentar escrever o seu próprio vocabulário. Esta página tem [uma lista de alguns dos vocabulários mais populares](http://semanticweb.org/wiki/Main_Page.html) (em inglês). - -Uma vez que o nosso exemplo acima se concentra nos pianistas, seria uma boa ideia encontrar uma ontologia apropriada em vez de criar o nosso próprio sistema. De facto, existe [uma ontologia para música](http://web.archive.org/web/20170715094229/http://www.musicontology.com/) (em inglês). Para além de uma especificação bem desenvolvida, esta tem também alguns exemplos úteis da sua utilização. Pode dar uma olhada nas [páginas de iniciação](http://web.archive.org/web/20170718143925/http://musicontology.com/docs/getting-started.html) (em inglês) para ter uma ideia de como se pode utilizar esta ontologia em particular. - -Infelizmente, não conseguimos encontrar nada que descreva a relação entre um professor e um aluno na Ontologia da Música. Mas a ontologia é publicada abertamente, logo podemos utilizá-la para descrever outras características da música e depois criar a nossa própria extensão. Se então publicássemos a nossa extensão abertamente, outros poderiam utilizá-la se assim o desejassem e este ato pode tornar-se num padrão. Embora o projeto *Music Ontology* (Ontologia Musical) não tenha a relação que precisamos, o [projeto *Linked Jazz*](https://linkedjazz.org/) (em inglês) permite o uso de '*mentorOf*', o que parece funcionar bem no nosso caso. Embora esta não seja uma solução ideal, é uma solução que faz um esforço para utilizar o que já existe por aí. - -Agora, se estivéssemos a estudar a história do pianismo, poderíamos querer identificar muitos pianistas que foram ensinados por alunos de Liszt, para estabelecer uma espécie de árvore genealógica e ver se estes 'netos' de Liszt têm algo em comum. Poderíamos pesquisar os alunos de Liszt, fazer uma grande lista deles, depois pesquisar cada um dos alunos e tentar fazer listas de quaisquer alunos que eles tivessem. Com LOD poderíamos (novamente, se as triplas semânticas existissem) escrever uma query semelhante a: - - Dá-me os nomes de todos os pianistas ensinados por x - onde x aprendeu piano com Liszt - -Isto encontraria todas as pessoas do dataset que eram alunos de alunos de Liszt. Não nos entusiasmemos demasiado: esta pergunta não nos dará todos os alunos de todos os alunos de Liszt que já existiram, porque essa informação provavelmente não existe e não existe dentro de nenhum conjunto de triplas semânticas existentes. Lidar com dados do mundo real mostra todo o tipo de omissões e inconsistências, que veremos quando olharmos para o maior conjunto de LOD, a [DBpedia](https://www.dbpedia.org/), na secção final. - -Se tiver utilizado [bases de dados relacionais](https://pt.wikipedia.org/wiki/Banco_de_dados_relacional) poderá estar a pensar que estas podem desempenhar a mesma função. No nosso caso de Liszt, a informação sobre pianistas acima descrita pode estar organizada numa [tabela](https://pt.wikipedia.org/wiki/Tabela_(banco_de_dados)) de base de dados denominada por algo como 'Alunos'. - -| IDaluno | IDprofessor | -| ------- | --------- | -| 31 | 17 | -| 35 | 17 | -| 49 | 28 | -| 56 | 28 | -| 72 | 40 | - -Se não estiver familiarizado com bases de dados não se preocupe. Mas, provavelmente, ainda pode ver que alguns pianistas nesta tabela tinham o mesmo professor (números 17 e 28). Sem entrar em pormenores, se Liszt estiver nesta tabela de bases de dados, seria bastante fácil extrair os alunos de Liszt, ao utilizar um ``Join`` ([*join*](https://pt.wikipedia.org/wiki/Join_(SQL))). - -De facto, as bases de dados relacionais podem oferecer resultados semelhantes ao LOD. A grande diferença é que o LOD pode ir mais longe: pode ligar datasets que foram criados sem intenção explícita de serem ligados entre si. A utilização do [Quadro de Descrição de Recursos](https://pt.wikipedia.org/wiki/Resource_Description_Framework) (*Resource Description Framework* ou RDF) e URIs permite que isto aconteça. - -## RDF e formatos de dados - -LOD utiliza uma norma, definida pelo [Consórcio World Wide Web](https://www.w3.org/) (em inglês) (*World Wide Web Consortium* ou W3C), chamada *[Resource Description Framework](https://pt.wikipedia.org/wiki/Resource_Description_Framework)* ou apenas RDF. As normas são úteis desde que sejam amplamente adotadas - pense no metro ou nos tamanhos de parafuso padrão - mesmo que sejam essencialmente arbitrárias. O RDF tem sido amplamente adotado como a norma LOD. - -Ouvirá frequentemente o LOD referido simplesmente como RDF. Atrasamos a conversa sobre o RDF até agora porque é bastante abstrato. RDF é um [modelo de dados](https://pt.wikipedia.org/wiki/Modelagem_de_dados) que descreve como é que os dados são estruturados num nível teórico. Assim, a insistência na utilização de triplas semânticas (em vez de quatro partes, ou duas ou nove, por exemplo) é uma regra no RDF. Mas quando se trata de questões mais práticas, há algumas escolhas quanto à implementação. Assim, o RDF diz-lhe o que tem de fazer, mas não exatamente como o tem de fazer. Estas escolhas dividem-se em duas áreas: como se escrevem as coisas (serialização) e as relações que as suas triplas semânticas descrevem. - -### Serialização - -A [Serialização](https://pt.wikipedia.org/wiki/Serializa%C3%A7%C3%A3o) é o termo técnico para "como se escrevem as coisas". O chinês padrão (mandarim) pode ser escrito em caracteres tradicionais, caracteres simplificados ou romanização Pinyin e a língua em si não muda. Tal como o mandarim, o RDF pode ser escrito de várias formas. Aqui vamos olhar para duas (há outras, mas por uma questão de simplicidade, vamos concentrar-nos nestas): - -1) [Turtle](https://en.wikipedia.org/wiki/Turtle_(syntax)) (em inglês) -2) [RDF/XML](https://pt.wikipedia.org/wiki/RDF/XML) - -Reconhecer a serialização que está a ser utilizada significa que podemos então escolher ferramentas apropriadas concebidas para esse formato. Por exemplo, o RDF pode vir serializado no formato [XML](https://pt.wikipedia.org/wiki/XML). Podemos então utilizar uma ferramenta ou biblioteca de códigos concebida para analisar esse formato em particular, o que é útil se já souber como trabalhar com ele. O reconhecimento do formato também lhe dá as palavras-chave certas para procurar ajuda online. Muitos recursos permitem descarregar as suas bases de dados LOD, podendo escolher qual a serialização que deseja fazer o *Download*. - -#### Turtle - -'Turtle' é um jogo de palavras. 'Tur' é a abreviatura de 'terse' e 'tle' - é a abreviatura de '*triple language*' (linguagem de triplos). Turtle é uma forma agradavelmente simples de escrever triplas semânticas. - -O Turtle usa apelidos ou atalhos, conhecidos como [prefixos](https://www.w3.org/TeamSubmission/turtle/#sec-tutorial) (em inglês), o que nos poupa ter de escrever URIs completos todas as vezes. Voltemos ao URI que criamos na secção anterior: - - http://data.history.ac.uk/tobias-project/person/15601 - -Não queremos escrever isto cada vez que nos referimos a esta pessoa (lembrar-se-á de Jack Straw). Por isso, só temos de enunciar o nosso atalho: - - @prefix toby: . - -Então Jack é `toby:15601`, que substitui o longo URI e é mais fácil à vista. Eu escolhi 'toby', mas poderia igualmente escolher qualquer sequência de letras. - -Vamos agora passar de Jack Straw para William Shakespeare e utilizar Turtle para descrever algumas coisas sobre as suas obras. Vamos ter de decidir sobre os ficheiros de autoridade a utilizar, um processo que, como mencionado acima, é melhor ser selecionado ao olhar para outros conjuntos de LOD. Aqui usaremos como um dos nossos prefixos [*Dublin Core*](https://pt.wikipedia.org/wiki/Dublin_Core), uma norma de [metadados](https://pt.wikipedia.org/wiki/Metadados) de bibliotecas [(Número de controle da Biblioteca do Congresso](https://en.wikipedia.org/wiki/Library_of_Congress_Control_Number) (*Library of Congress Control Number*) como outro e, o último (VIAF) deverá ser-lhe familiar. Juntos, estes três ficheiros de autoridade fornecem identificadores únicos para todas as entidades que tenciono utilizar neste exemplo: - - @prefix lccn: . - @prefix dc: . - @prefix viaf: . - - lccn:n82011242 dc:creator viaf:96994048 . - -Note o espaçamento do ponto final após a última linha. Esta é a forma de Turtle indicar o fim. Tecnicamente não é necessário ter o espaço, mas facilita a leitura após uma longa sequência de caracteres. - -No exemplo acima, lccn:n82011242 representa Macbeth; dc:creator liga Macbeth ao seu autor; viaf:96994048 representa William Shakespeare. - -O Turtle também permite listar triplas semânticas sem se preocupar em repetir cada URI quando acabou de o usar. Acrescentemos a data em que os estudiosos pensam que Macbeth foi escrito, utilizando o par atributo-valor Dublin Core: `dc:create 'YYYY'`: - - @prefix lccn: . - @prefix dc: . - @prefix viaf: . - - lccn:n82011242 dc:creator viaf:96994048 ; - dc:created "1606" . - -Lembra-se da estrutura da tripla semântica discutida na secção 1? Aí demos este exemplo: - - 1 pessoa 15601 (o sujeito) 2 representadaNoParlamentoBritânico (o predicado) 3 "Blackburn" (o objeto) - -O essencial é que o predicado liga o sujeito e o objeto. Ele descreve a relação entre eles. O sujeito vem primeiro na tripla semântica, mas isso é uma questão de escolha, como discutimos com o exemplo de pessoas que foram ensinadas a tocar piano por Liszt. - -Pode-se usar um ponto e vírgula se o sujeito for o mesmo mas o predicado e o objeto forem diferentes, ou uma vírgula se o sujeito e o predicado forem o mesmo e apenas o objeto for diferente. - - lccn:no2010025398 dc:creator viaf:96994048 , - viaf:12323361 . - -Aqui estamos a dizer que Shakespeare (96994048) e John Fletcher (12323361) foram ambos os criadores da obra *The Two Noble Kinsmen*. - -Quando analisamos as ontologias anteriormente sugeri que visse a [*Music Ontology*](http://web.archive.org/web/20170718143925/http://musicontology.com/docs/getting-started.html) (em inglês). Dê agora uma olhada novamente. Isto ainda é complicado, mas será que agora fazem mais sentido? - -Uma das ontologias mais acessíveis é a '*Friend of a Friend*' (amigo de um amigo) ou [FOAF](https://en.wikipedia.org/wiki/FOAF_(ontology)) (em inglês). Esta é concebida para descrever pessoas e, talvez por essa razão, é bastante intuitiva. Se, por exemplo, quiser escrever-me para me dizer que este curso é a melhor coisa que já leu, aqui está o meu email expresso como triplas semânticas em FOAF: - - @prefix foaf: . - - :"Jonathan Blaney" foaf:mbox . - -#### RDF/XML - -Em contraste com o Turtle, o RDF/XML pode parecer um pouco pesado. Para começar, vamos apenas converter uma tripla semântica da Turtle acima, aquela que refere que Shakespeare foi o criador de *The Two Noble Kinsmen*: - - lccn:no2010025398 dc:creator viaf:96994048 . - -Em RDF/XML, com os prefixos declarados dentro do trecho de código de XML, fica: - -``` xml - - - - - -``` - -O formato RDF/XML tem a mesma informação básica que o formato Turtle, mas é escrito de forma muito diferente, baseando-se nos princípios das etiquetas XML encaixadas. - -Passemos a um exemplo diferente para mostrar como o RDF/XML combina triplas semânticas e, ao mesmo tempo, introduz o [*Simple Knowledge Organization System*](https://pt.wikipedia.org/wiki/Simple_Knowledge_Organization_System) (SKOS) (Sistema Simples de Organização do Conhecimento), que foi concebido para codificar tesauros ou taxonomias. - - - Abdication - - -Aqui estamos a dizer que o conceito SKOS `21250`, *markdown abdication*, tem um rótulo preferido de "*abdication*". A forma como funciona é que o elemento sujeito (incluindo a parte da '*abdication*', que é um valor de atributo em termos de XML) tem o predicado e o objeto encaixados no seu interior. O elemento encaixado é o predicado e [o nó folha](https://pt.wikipedia.org/wiki/%C3%81rvore_(estrutura_de_dados)#Terminologia) (em inglês), é o objeto. Este exemplo é retirado de um projeto para publicar um [*Tesauro de História Britânica e Irlandesa*](https://www.history.ac.uk/research/digital-history) (em inglês). - -Tal como com o Turtle, podemos acrescentar mais triplas semânticas. Portanto, vamos declarar que o termo mais restrito na nossa hierarquia de sujeitos, um abaixo de *Abdication*, vai ser *Abdication crisis (1936)*. - - - Abdication - - - - - - -Lembra-se de como os predicados e os objetos são encaixados dentro do sujeito? Aqui já o fizemos duas vezes com o mesmo sujeito, para que possamos tornar isto menos prolixo, aninhando ambos os conjuntos de predicados como objetos dentro do mesmo sujeito: - - - Abdication - - - -Se estiver familiarizado com XML isto será fácil. Se não estiver, talvez prefira um formato como o Turtle. Mas a vantagem aqui é que ao criar o seu RDF/XML pode usar as ferramentas habituais disponíveis com XML, como editores e analisadores dedicados ao XML, para verificar se o seu RDF/XML está corretamente formatado. Se não for uma pessoa que use o XML recomendo o Turtle, podendo usar uma [ferramenta online](http://www.easyrdf.org/converter) (em inglês) para verificar se a sua sintaxe está correta. - -## Consulta de RDF com SPARQL - -Para esta secção final iremos interrogar algum LOD e ver o que poderá ser feito com ele. - -A linguagem de consulta que usamos para LOD é chamada [SPARQL](https://pt.wikipedia.org/wiki/SPARQL). É um daqueles acrónimos recursivos amados pelos técnicos: ***S**PARQL **P**rotocol **a**nd **R**DF **Q**uery **L**anguage* (Protocolo SPARQL e Linguagem de Consulta RDF). - -Como mencionado no início, o *Programming Historian* tem [uma lição completa](/en/lessons/retired/graph-databases-and-SPARQL) (em inglês), de Matthew Lincoln, sobre a utilização do SPARQL (embora não seja já mantida (ver nota no início desta tradução). A secção final aqui presente é apenas uma visão geral dos conceitos básicos. Se o SPARQL despertar o seu interesse, pode obter uma fundamentação completa no tutorial de Lincoln. - -Vamos realizar as nossas consultas SPARQL na [DBpedia](https://www.dbpedia.org/), que é um enorme conjunto de LOD derivado da Wikipedia. Além de estar cheio de informação que é muito difícil de encontrar através da habitual interface da Wikipédia, tem vários "pontos de extremidade" (end points) SPARQL - interfaces onde se podem digitar as consultas SPARQL e obter resultados a partir das triplas semânticas da DBpedia. - -O end point de consulta SPARQL que é utilizado chama-se [snorql](http://dbpedia.org/snorql/) (em inglês). Estes end points ocasionalmente ficam offline. Se for o seu caso, tente procurar por *dbpedia sparql* e deverá encontrar um substituto semelhante. - -Se for ao URL snorql acima verá, no início, um número de prefixos que já nos foram declarados, o que é útil. Agora também irá reconhecer alguns dos prefixos. - -{% include figure.html filename="en-or-intro-to-linked-data-03.png" alt="Captura de tela com a interface de criação de consultas snorql" caption="Figura 3. Caixa de consulta padrão do snorql, com alguns prefixos declarados para si." %} - -Na caixa de consulta abaixo das declarações de prefixo, deverá ver o seguinte: - - SELECT * WHERE { - ... - } - -Se alguma vez escreveu uma consulta de bases de dados em *Structured Query Language*, [mais conhecida como SQL](https://pt.wikipedia.org/wiki/SQL), isto vai parecer-lhe bastante familiar e vai ajudá-lo a aprender SPARQL. Se não, não se preocupe. As palavras-chave aqui utilizadas, ``SELECT`` (SELECIONAR) e ``WHERE`` (ONDE) não são sensíveis a maiúsculas e minúsculas, mas algumas partes de uma consulta SPARQL podem ser (ver abaixo), por isso recomendo que se cinja ao caso dado ao longo das consultas neste curso. - -Aqui `SELECT` significa "encontrar alguma coisa" e `*` significa "dá-me tudo". `WHERE` introduz uma condição, que é onde vamos colocar os detalhes de que tipo de coisas queremos que a consulta encontre. - -Vamos começar com algo simples para ver como é que isto funciona. Cole (ou, melhor, escreva) isto na caixa de consulta: - - SELECT * WHERE { - :Lyndal_Roper ?b ?c - } - -Clique em '*go*' (ir). Se deixar o menu *drop-down* como '*browse*' (navegar) deverá obter duas colunas com os rótulos "b" e "c". (Note que aqui, as maiúsculas/minúsculas importam: lyndal_roper não lhe dará resultados). - - -{% include figure.html filename="en-or-intro-to-linked-data-04.png" alt="Captura de tela com a interface de resultados de consultas snorql" caption="Figura 4. Topo das listas de resultados de uma consulta com todas as triplas semânticas com 'Lyndal_Roper' como sujeito." %} - -Então o que é que acabou de acontecer? E como é que soubemos o que escrever? - -Na verdade, não sabíamos. Esse é um dos problemas com end points do SPARQL. Quando se conhece um dataset, é preciso experimentar coisas e descobrir que termos são utilizados. Porque isto vem da *Wikipedia* e nós estávamos interessados sobre que informação sobre historiadores podíamos encontrar. Então vamos à página da *Wikipedia* da historiadora [Lyndal Roper](https://en.wikipedia.org/wiki/Lyndal_Roper) (em inglês). - -A parte final do URL é `Lyndal_Roper` e concluímos então que é provável que esta cadeia de caracteres seja a forma como Roper é referida na DBpedia. Porque não sabemos o que mais poderia estar em triplas semânticas que mencionam Roper, nós utilizamos `?b` e `?c`: estes são apenas marcadores de posição. Poderia igualmente ter digitado `?whatever` e `?you_like` e as colunas teriam esses rótulos. Quando quiser ser mais preciso sobre o que se está a pesquisar, será importante etiquetar as colunas de forma significativa. - -Experimente agora a sua própria consulta SPARQL: escolha uma página *Wikipedia* e copie a parte final do URL, após a barra final, e coloque-a no lugar de Lyndal_Roper. Depois clique em 'go'. - -A partir da informação que se obtém destes resultados é possível gerar *queries* mais precisas. Isto pode ser pouco fiável, por isso não se preocupe se algumas não funcionarem. - -Vamos voltar aos resultados para a consulta que fizemos há momentos: - - SELECT * WHERE { - :Lyndal_Roper ?b ?c - } - -Podemos ver uma longa lista na coluna etiquetada _c_. Estes são todos os atributos que Roper tem na *DBpedia* e que nos ajudarão a encontrar outras pessoas com estes atributos. Por exemplo, podemos ver ```http://dbpedia.org/class/yago/Historian110177150```. Poderemos utilizar isto para obter uma lista de historiadores? Vamos colocá-lo na nossa pergunta, mas em terceiro lugar, porque era onde estava quando a encontrei nos resultados da Lyndal Roper. A minha consulta tem este aspecto: - - SELECT * WHERE { - ?historian_name ?predicate - } - -Fizemos uma pequena mudança aqui. Se esta consulta funcionar de todo, então esperemos que os nossos historiadores estejam na primeira coluna, porque 'historiador' não parece poder ser um predicado: não funciona como um verbo numa frase; por isso vamos chamar à nossa primeira coluna de resultados 'historian_name' e à minha segunda (sobre a qual não sabemos nada) 'predicate' (predicado). - -Execute a *querie*. Deverá encontrar uma grande lista de historiadores. - -{% include figure.html filename="en-or-intro-to-linked-data-05.png" alt="Duas capturas de tela com a interface de consultas snorql e respectivos resultados" caption="Figura 5. Historiadores de acordo com a DBpedia." %} - -Assim, esta ferramenta funciona para criar listas, o que é útil, mas seria muito mais poderoso combinar listas para obter intersecções de conjuntos. Encontrei mais algumas coisas que podem ser interessantes consultar nos atributos DBpedia de Lyndal Roper: e . É muito fácil combiná-los pedindo uma variável a ser devolvida (no nosso caso isto é `?name` (nome)) e depois utilizando-a em múltiplas linhas de uma *querie*. Note também o espaço e o ponto completo no final da primeira linha que começa com `?name`: - - SELECT ?name - WHERE { - ?name ?b . - ?name ?b - } - -Funciona! Devemos obter cinco resultados. Na altura em que escrevo, há cinco historiadoras britânicas na *DBpedia*... - -{% include figure.html filename="en-or-intro-to-linked-data-06.png" alt="Duas capturas de tela com a interface de consultas snorql e respectivos resultados" caption="Figura 6. Historiadoras britânicas segundo a DBpedia." %} - -Apenas cinco historiadoras britânicas? Claro que há, na realidade, muitas mais do que isso, como poderíamos facilmente mostrá-lo substituindo o nome de, digamos, Alison Weir na nossa primeira consulta sobre Lyndal Roper. Isto leva-nos ao problema com a *Dbpedia* que mencionamos anteriormente: não é muito consistentemente marcado com informação estrutural do tipo que a *DBpedia* que utiliza. A nossa consulta pode listar algumas historiadoras britânicas mas acontece que não podemos utilizá-la para gerar uma lista significativa de pessoas nesta categoria. Tudo o que encontrámos foram as pessoas nas entradas da *Wikipedia* que alguém decidiu classificar como "historiadora britânica" e "historiadora". - -Com SPARQL na *DBpedia*, é preciso ter cuidado com as inconsistências do material de origem coletiva. Poderá usar o SPARQL exatamente da mesma forma num dataset mais confiável, por exemplo, os dados do governo britânico: https://data-gov.tw.rpi.edu//sparql (em inglês) e esperar obter resultados mais robustos (há aqui um breve tutorial para este dataset: https://data-gov.tw.rpi.edu/wiki/A_crash_course_in_SPARQL (em inglês). - -No entanto, apesar das suas inconsistências, a *DBpedia* é um ótimo local para aprender SPARQL. Esta foi apenas uma breve introdução, mas há muito mais em [Usando SPARQL para aceder ao Linked Open Data](/en/lessons/retired/graph-databases-and-SPARQL) (em inglês). - - -## Leituras e recursos adicionais - -* Dean Allemang e James Hendler, *Semantic Web for the Working Ontologist*, 2nd edn, Elsevier, 2011 -* Tim Berners-Lee [*Linked Data*](https://www.w3.org/DesignIssues/LinkedData.html) (em inglês) -* Bob DuCharme, *Learning SPARQL*, O'Reilly, 2011 -* [Blog de Bob DuCharme](http://www.snee.com/bobdc.blog/) (em inglês) também vale a pena ler -* Richard Gartner, *Metadata: Shaping Knowledge from Antiquity to the Semantic Web*, Springer, 2016 -* Seth van Hooland and Ruben Verborgh, *Linked Data for Libraries, Archives and Museums*, 2015 -* Matthew Lincoln ['*Using SPARQL to access Linked Open Data*'](/en/lessons/retired/graph-databases-and-SPARQL) (em inglês) -* [*Linked Data guides and tutorials*](http://linkeddata.org/guides-and-tutorials) (em inglês) -* Dominic Oldman, Martin Doerr e Stefan Gradmann, '*Zen and the Art of Linked Data: New Strategies for a Semantic Web of Humanist Knowledge*', em *A New Companion to Digital Humanities*, editado por Susan Schreibman et al. -* Max Schmachtenberg, Christian Bizer e Heiko Paulheim, [*State of the LOD Cloud 2017*](http://linkeddatacatalog.dws.informatik.uni-mannheim.de/state/) (em inglês) -* David Wood, Marsha Zaidman e Luke Ruth, *Linked Data: Structured data on the Web*, Manning, 2014 - -## Agradecimentos - -Gostaria de agradecer aos meus dois colegas revisores, Matthew Lincoln e Terhi Nurmikko-Fuller e ao meu editor, Adam Crymble, por me ajudarem generosamente a melhorar esta lição com numerosas sugestões, esclarecimentos e correções. Este tutorial baseia-se num outro escrito como parte do '*Thesaurus of British and Irish History as SKOS*' [*(Tobias) project*](https://gtr.ukri.org/projects?ref=AH%2FN003446%2F1#/tabOverview) (em inglês), financiado pelo [AHRC](http://www.ahrc.ac.uk/) (em inglês). A lição foi revista para o projeto *Programming Historian*. +--- +title: Introdução aos Dados Abertos Conectados +layout: lesson +collection: lessons +slug: introducao-dados-abertos-conectados +original: intro-to-linked-data +date: 2013-08-05 +translation_date: 2022-11-21 +authors: +- Jonathan Blaney +reviewers: +- Terhi Nurmikko-Fuller +- Matthew Lincoln +editors: +- Adam Crymble +translator: +- Francisco Nabais +translation-editor: +- Joana Vieira Paulino +translation-reviewer: +- Bruno Almeida +- Daniel Bonatto Seco +lesson-testers: David Valentine +tested_date: 2025-02-28 +difficulty: 1 +review-ticket: https://github.com/programminghistorian/ph-submissions/issues/428 +activity: acquiring +topics: [lod] +abstract: "Este tutorial apresenta os principais conceitos de dados abertos conectados (*Linked Open Data*), incluindo URIs, ontologias, formatos RDF e uma breve introdução à linguagem de consulta de gráficos SPARQL." +avatar_alt: Um homem velho com uma mulher em cada braço +doi: 10.46430/phpt0033 +--- + +{% include toc.html %} + +Nota de Tradução: Alguns termos, por aparecerem constantemente e facilitarem a interpretação das imagens, apenas foram propositadamente traduzidos uma vez e serão colocados entre parênteses. Alertamos também para a existência de alguns exemplos que não foram propositadamente traduzidos para facilitar a sua introdução nos programas apresentados. + + +Introdução e Âmbito da lição +----------------------------- + +Esta lição oferece uma breve e concisa introdução aos [dados abertos conectados](https://pt.wikipedia.org/wiki/Linked_data#The_Linking_Open_Data_Project) (*Linked Open Data* ou LOD). Não é necessário conhecimento prévio para realizar este tutorial. Os leitores deverão obter uma compreensão clara dos conceitos por detrás dos dados abertos conectados, como são utilizados e como são criados. O tutorial está dividido em cinco partes, além de leituras adicionais: + +1. Dados abertos conectados: o que são? +2. O papel do [Identificador Uniforme de Recurso](https://pt.wikipedia.org/wiki/URI) (*Uniform Resource Identifier* ou URI) +3. Como o LOD organiza o conhecimento: [ontologias](https://pt.wikipedia.org/wiki/Ontologia_(ci%C3%AAncia_da_computa%C3%A7%C3%A3o)) +4. A [Estrutura de Descrição de Recursos](https://pt.wikipedia.org/wiki/Resource_Description_Framework) (*Resource Description Framework* ou RDF) e formatos de dados +5. Consulta de dados abertos conectados com [SPARQL](https://pt.wikipedia.org/wiki/SPARQL) +6. Outras leituras e recursos + +A conclusão deste tutorial poderá levar algumas horas e poderá ser útil reler algumas secções para solidificar a sua compreensão. Os termos técnicos foram ligados à sua página correspondente na Wikipedia e encoraja-se a que faça uma pausa e leia sobre termos que considere desafiadores. Depois de ter aprendido alguns dos princípios-chave do LOD, a melhor maneira de melhorar e solidificar esse conhecimento é praticar. Este tutorial fornece oportunidades para fazê-lo. No final da lição, deverá compreender os princípios básicos de LOD, incluindo termos e conceitos-chave. + +Se precisar aprender a como explorar LOD usando a linguagem de consulta [SPARQL](https://pt.wikipedia.org/wiki/SPARQL), recomenda-se a lição de Matthew Lincoln ['*Using SPARQL to access Linked Open Data*'](/en/lessons/retired/graph-databases-and-SPARQL) (em inglês) (Nota: a lição deste link encontra-se desatualizada e já não é mantida pelo _Programming Historian_. Por favor veja a nota inicial dessa página sobre a razão dessa lição ter sido retirada), que segue praticamente a visão geral fornecida nesta lição. + +Para proporcionar aos leitores uma base sólida dos princípios básicos de LOD, este tutorial não oferecerá uma cobertura abrangente de todos os seus conceitos. Estes **não** serão o foco desta lição: + +1. [Web Semântica](https://pt.wikipedia.org/wiki/Web_sem%C3%A2ntica) e [raciocínio semântico](https://en.wikipedia.org/wiki/Semantic_reasoner) (em inglês) de [datasets](https://pt.wikipedia.org/wiki/Conjunto_de_dados). Um raciocinador semântico deduziria que Jorge VI é o irmão ou meio-irmão de Eduardo VIII, dado que: a) Eduardo VIII é o filho de Jorge V e b) Jorge VI é o filho de Jorge V. Este tutorial não se foca neste tipo de tarefa. +2. Criação e *upload* de conjuntos de dados abertos conectados ligados à [Nuvem de dados conectados](https://linkeddatacatalog.dws.informatik.uni-mannheim.de/state/) (em inglês). Partilhar LOD é um princípio importante, que é encorajado abaixo. Contudo, os aspetos práticos de contribuir com LOD para a nuvem de dados conectados estão além do âmbito desta lição. Alguns recursos que podem ajudar a começar esta tarefa estão disponíveis no final deste tutorial. + +## Dados abertos conectados: O que são? + +LOD é informação estruturada num formato destinado a máquinas e, por isso, não é necessariamente um conceito de fácil definição. É importante não perder a motivação com esta informação já que, ao compreender os princípios, pode colocar uma máquina a fazer uma leitura autónoma. + +Se todos os datasets fossem publicados abertamente e utilizassem o mesmo formato para estruturar a informação, seria possível interrogá-los todos de uma só vez. A análise de grandes volumes de dados é potencialmente muito mais poderosa do que qualquer pessoa que utilize os seus próprios datasets individuais espalhados pela web nos chamados [silos de informação](https://en.wikipedia.org/wiki/Information_silo) (em inglês). Estes datasets interoperáveis são aquilo para que os profissionais de LOD estão a trabalhar. + +Para atingir este objetivo, ao trabalhar com LOD, é importante recordar três princípios: + +1. **Utilizar um formato padrão de LOD reconhecido**. Para que o LOD funcione, os dados devem ser [estruturados](https://pt.wikipedia.org/wiki/Estrutura_de_dados), utilizando normas reconhecidas para que os computadores que interrogam os dados possam processá-los de forma consistente. Há vários formatos de LOD, alguns dos quais são discutidos abaixo. +2. **Referir uma entidade da mesma forma que outras pessoas o fazem**. Se existirem dados sobre a mesma pessoa/local/coisa em dois ou mais locais, certifique-se de que se refere à pessoa/local/coisa da mesma forma em todos os casos. +3. **Publicar os seus dados abertamente**. Qualquer pessoa deverá poder utilizar os seus dados sem pagar uma taxa e num formato que não exija [software proprietário](https://pt.wikipedia.org/wiki/Software_propriet%C3%A1rio). + +Comecemos com um exemplo de dados sobre uma pessoa, utilizando uma abordagem comum [par atributo-valor](https://en.wikipedia.org/wiki/Attribute%E2%80%93value_pair) (em inglês) típica em computação: + + pessoa=número + +Neste caso, o 'atributo' é uma pessoa. E o valor - ou quem é essa pessoa - é representado por um número. O número pode ser atribuído aleatoriamente ou pode ser utilizado um número que já esteja associado a essa pessoa. Esta última abordagem tem grandes vantagens: se todos os que criarem um dataset que menciona essa pessoa utilizarem *exatamente o mesmo número, exatamente no mesmo formato*, então podemos encontrar esse indivíduo de forma fiável em qualquer dataset aderindo a essas regras. Vamos criar um exemplo usando Jack Straw: tanto o nome de um rebelde inglês do século XIV, como o de um ministro de gabinete britânico proeminente na administração de Tony Blair. É útil ser capaz de diferenciar as duas pessoas que partilham o mesmo nome. + +Utilizando o modelo acima, no qual cada pessoa é representada por um número único, vamos atribuir ao ministro britânico Jack Straw o número `64183282`. O seu par atributo-valor ficaria então com este aspeto: + + pessoa=64183282 + +E vamos atribuir a Jack Straw, descrito no *[Oxford Dictionary of National Biography](https://www.oxforddnb.com)* (em inglês) como 'o enigmático líder rebelde', o número `33059614`, fazendo com que o seu par atributo-valor se pareça com isto: + + pessoa=33059614 + +Desde que todos os que fazem LOD utilizem estes dois números para se referirem aos respetivos Jack Straws, podemos agora procurar a pessoa `64183282` num conjunto de dados abertos conectados e podemos estar confiantes de que estamos a obter a pessoa certa - neste caso, o ministro. + +Os pares atributo-valor também podem armazenar informações sobre outros tipos de entidades: lugares, por exemplo. Jack Straw, o político moderno, era membro do Parlamento britânico, representando o assento de Blackburn. Há mais do que um lugar no Reino Unido chamado Blackburn, para não mencionar outros Blackburn em todo o mundo. Usando os mesmos princípios acima delineados, podemos desambiguar entre os vários Blackburns, atribuindo um identificador único ao lugar correto: Blackburn em Lancashire, Inglaterra. + + Lugar=2655524 + +Neste momento pode estar pensando, "isso é o que um catálogo de biblioteca faz". É verdade que a ideia-chave aqui é a do [ficheiro de autoridade](https://pt.wikipedia.org/wiki/Controle_de_autoridade), central na biblioteconomia (um ficheiro de autoridade é uma lista definitiva de termos que podem ser utilizados num contexto particular, por exemplo, quando se cataloga um livro). Nos dois exemplos acima descritos, utilizamos ficheiros de autoridade para atribuir números (os identificadores únicos) aos Jacks e ao Blackburn. Os números que utilizamos para os dois Jack Straws provêm do [Virtual International Authority File](https://viaf.org) (em inglês) (VIAF) (Arquivo Internacional de Autoridade Virtual), que é mantido por um consórcio de bibliotecas de todo o mundo, de modo a tentar resolver o problema da miríade de maneiras pelas quais a mesma pessoa pode ser referida. O identificador único que utilizamos para o distrito eleitoral de Blackburn provém da [GeoNames](https://www.geonames.org/) (em inglês), uma base de dados geográfica gratuita. + +Vamos tentar ser mais precisos com o que, neste caso, queremos dizer com 'Blackburn'. Jack Straw representou o círculo eleitoral (uma área representada por um único membro do parlamento) de Blackburn, que mudou os seus limites ao longo do tempo. O projeto "[*Digging Into Linked Parliamentary Data*](https://repository.jisc.ac.uk/6544/1/DiLiPaD_final_report_1.pdf)" (Dilipad) (em inglês), no qual trabalhei, produziu identificadores únicos para as filiações partidárias e circunscrições eleitorais para cada membro do parlamento. Neste exemplo, Jack Straw representou o distrito eleitoral conhecido como 'Blackburn' na sua encarnação pós-1955: + + blackburn1955-presente + +Como o VIAF é um ficheiro de autoridade respeitado e bem mantido, fornece um conjunto óbvio de identificadores a utilizar para Jack Straw. Como o distrito eleitoral representado por Straw estava perfeitamente coberto pelos ficheiros de autoridade criados pelo projeto Dilipad, também era um ficheiro de autoridade lógico a utilizar. Infelizmente, nem sempre é tão óbvio qual das listas publicadas online é a melhor para se usar. Uma pode ser mais utilizada do que outra, mas esta última pode ser mais abrangente para um determinado fim. O GeoNames funcionaria melhor do que os identificadores da Dilipad em alguns casos. Haverá também casos em que não se consegue encontrar um dataset com essa informação. Por exemplo, se quiser escrever pares atributo-valor sobre si próprio e as suas relações familiares imediatas terá de inventar os seus próprios identificadores. + +Esta falta de ficheiros de autoridade coerentes é um dos maiores desafios que o LOD enfrenta neste momento. [Tim Berners-Lee](https://pt.wikipedia.org/wiki/Tim_Berners-Lee), que inventou uma forma de ligar documentos em rede e criou assim a World Wide Web, um dos principais proponentes de LOD, para encorajar uma maior utilização de dados conectados, sugeriu um "[sistema de classificação de cinco estrelas](https://www.w3.org/DesignIssues/LinkedData.html)" (em inglês) para que todos avançassem o mais longe possível em direção ao LOD. Essencialmente, Tim Berners-Lee apoia a publicação aberta de dados, especialmente ao utilizar formatos abertos e normas públicas, mas o melhor é que os dados se liguem também aos dados de outras pessoas. + +Com os identificadores únicos atribuídos a todos os elementos, o próximo passo fundamental na criação de LOD é ter uma forma de *descrição* da relação entre Jack Straw (`64183282`) e Blackburn (`blackburn1955-presente`). Em LOD, as relações são expressas utilizando o que é conhecido como '[tripla semântica](https://en.wikipedia.org/wiki/Semantic_triple)' (em inglês). Vamos fazer uma tripla semântica que represente a relação entre Jack Straw e o seu distrito eleitoral: + + pessoa:64183282 papel:representaNoParlamentoBritânicodistritoeleitoral:"blackburn1955-presente" . + +A apresentação (ou [sintaxe](https://pt.wikipedia.org/wiki/Sintaxe)) das triplas semânticas, incluindo a pontuação utilizada acima, será discutida mais tarde, na secção sobre RDF e formatos de dados. Por agora, vamos focar-nos na estrutura básica. A tripla semântica, não surpreendentemente, tem três partes. Estas são convencionalmente referidas como sujeito (*subject*), predicado (*predicate*) e objeto (*object*): + +| o sujeito | o predicado | o objeto | +| --------------- | ------------------------- | ----------------------- | +| pessoa 64183282 | representadaNoParlamentoBritânico | "blackburn1955-presente" | + +A forma tradicional de representar uma tripla semântica em forma esquemática é a seguinte (em inglês): + +{% include figure.html filename="pt-tr-introducao-dados-abertos-conectados-01.png" alt="Imagem com a representação de uma tripla semântica" caption="Figura 1. Forma tradicional de representar uma tripla semântica." %} + +Assim, a nossa tripla semântica do Jack Straw, apresentado de forma mais legível para o ser humano, poderia assumir a seguinte forma: + +{% include figure.html filename="pt-tr-introducao-dados-abertos-conectados-02.png" alt="Imagem com a representação de uma tripla semântica aplicada ao exemplo de Jack Straw" caption="Figura 2. Diagrama da tripla semântica que demonstra que Jack Straw representava Blackburn." %} + +Por enquanto, é importante fixar três pontos-chave: + +- O LOD deve estar aberto e disponível para qualquer pessoa na Internet (caso contrário, não está "aberto") +- Os defensores do LOD têm como objetivo normalizar as formas de referência a entidades únicas +- O LOD consiste em triplas semânticas que descrevem as relações entre entidades + +## O papel do *Uniform Resource Identifier* (URI) + +Uma parte essencial de LOD é o [Identificador Uniforme de Recurso](https://pt.wikipedia.org/wiki/URI)(*Uniform Resource Identifier* ou URI). O URI é uma forma única e fiável de representar uma entidade (uma pessoa, um objeto, uma relação, etc.), de uma forma que é utilizável por todos no mundo. + +Na secção anterior, utilizamos dois números diferentes para identificar os diferentes Jack Straws. + + pessoa="64183282" + + pessoa="33059614" + +O problema é que em todo o mundo existem muitas bases de dados que contêm pessoas com estes números e são, provavelmente, todas pessoas diferentes. Fora do nosso contexto imediato, estes números não identificam indivíduos únicos. Vamos tentar resolver isso. Aqui estão estes mesmos identificadores, mas como URIs: + + https://viaf.org/viaf/64183282/ + + https://viaf.org/viaf/33059614/ + +Tal como o número único desambiguou os nossos dois Jack Straws, o URI completo acima ajuda-nos a desambiguar entre todos os diferentes ficheiros de autoridade lá fora. Neste caso, é evidente que estamos a utilizar o VIAF como o nosso ficheiro de autoridade. Com certeza, já viu esta forma de desambiguação muitas vezes na web. Existem muitos websites em todo o mundo com páginas chamadas `/home` ou `/faq`. Mas não há confusão porque o [domínio](https://pt.wikipedia.org/wiki/Nome_de_dom%C3%ADnio) (a primeira parte do [Localizador Uniforme de Recursos](https://pt.wikipedia.org/wiki/URL) (*Uniform Resource Locator* ou URL) - ex. `bbc.co.uk`) é único, portanto, todas as páginas que fazem parte desse domínio são únicas em outras páginas `/faq` de outros websites. No endereço `http://www.bbc.co.uk/faqs` é a parte `bbc.co.uk` que torna as páginas subsequentes únicas. Isto é tão óbvio para as pessoas que utilizam a web a toda a hora que não pensam sobre isso. Provavelmente, também sabe que se quiser criar um website chamado `bbc.co.uk` não conseguirá, porque esse nome já foi registado com a autoridade apropriada, que é o [Sistema de Nomes de Domínio](https://pt.wikipedia.org/wiki/Sistema_de_Nomes_de_Dom%C3%ADnio) (*Domain Name System*). O registo garante a singularidade. Os URIs também têm de ser únicos. + +Embora os exemplos acima se pareçam com URLs, também é possível construir um URI que não se pareça nada com um URL. Temos muitas formas de identificar pessoas e coisas de forma única e raramente pensamos ou nos preocupamos com isso. Os códigos de barras, números de passaporte, até mesmo os códigos postais são concebidos para serem únicos. Os números de telefone são frequentemente colocados como placas de loja precisamente porque são únicos. Todos eles podem ser utilizados como URIs. + +Quando criamos URIs para as entidades descritas pelo projeto '[Tobias](https://gtr.ukri.org/projects?ref=AH%2FN003446%2F1#/tabOverview)' (em inglês), escolhemos uma estrutura do tipo URL e escolhemos utilizar o nosso espaço web institucional, pondo de lado `data.history.ac.uk/tobias-project/` como um lugar dedicado à hospedagem destes URIs. Ao colocá-lo em `data.history.ac.uk` em vez de `history.ac.uk`, houve uma separação clara entre URIs e as páginas do website. Por exemplo, um dos URIs do projeto Tobias era 'https://data.history.ac.uk/tobias-project/person/15601'. Embora o formato dos URIs acima mencionados seja o mesmo que um URL, eles não se ligam a websites (tente colá-lo num navegador web). Muitas pessoas novas no LOD acham isto confuso. Todos os URLs são URIs, mas nem todos os URIs são URLs. (nota de tradução: tendo em conta que o site original do projeto Tobias já não se encontra disponível, o leitor da lição deve entender os exemplos aqui indicados como meramente ilustrativos daquilo que o autor pretende demonstrar) Um URI pode descrever qualquer coisa, enquanto o URL descreve a localização de algo na web. Assim, um URL diz-lhe a localização de uma página web, de um ficheiro ou algo semelhante. Um URI faz apenas o trabalho de identificar algo. Tal como o Número internacional Normalizado do Livro (International Standard Book Number ou [ISBN](https://www.iso.org/standard/36563.html) (em inglês) `978-0-1-873354-6` identifica exclusivamente uma edição de capa dura de _Baptism, Brotherhood and Belief in Reformation Germany_, de Kat Hill, mas não diz onde obter uma cópia. Para isso precisaria de algo como um [número de acesso](https://pt.wikipedia.org/wiki/N%C3%BAmero_de_acesso_(biblioteconomia)), que lhe dá uma localização exata de um livro numa prateleira de uma biblioteca específica. + +Há um pouco de jargão em torno de URIs. As pessoas falam sobre se são ou não [desreferenciáveis](https://pt.wikipedia.org/wiki/Refer%C3%AAncia_(ci%C3%AAncia_da_computa%C3%A7%C3%A3o)). Isso apenas significa que *podemos transformar uma referência abstrata em algo diferente?* Por exemplo, se colarmos um URI na barra de endereços de um browser, será que ele encontra algo? O VIAF URI para o historiador Simon Schama é: + + https://viaf.org/viaf/46784579 + +Se o colocarmos no browser, receberemos de volta uma página web sobre Simon Schama que contém dados estruturados sobre ele e a sua história editorial. Isto é muito útil por um motivo. A partir do URI não é óbvio quem ou mesmo o que é que está a ser referido. Da mesma forma, se tratarmos um número de telefone (com código internacional) como o URI de uma pessoa, então deve ser desreferenciável. Alguém pode atender o telefone e pode até ser Schama. + +Mas isto não é essencial. Muitos URIs não são desreferenciáveis, como no exemplo acima do projeto Tobias. Não se pode encontrá-lo em lado nenhum; é uma convenção. + +O exemplo do VIAF leva-nos a outra coisa importante sobre os URIs: não os invente a não ser que tenha de o fazer. As pessoas e organizações têm feito esforços para construir boas listas de URI e o LOD não vai funcionar eficazmente se as pessoas duplicarem esse trabalho criando novos URIs desnecessariamente. Por exemplo, o VIAF tem o apoio de muitas bibliotecas internacionais. Se quiser construir URIs para pessoas, o VIAF é uma escolha muito boa. Se não conseguir encontrar algumas pessoas no VIAF, ou noutras listas de autoridade, só então poderá precisar fazer a sua própria. + +## Como o LOD organiza o conhecimento: ontologias + +Pode não ter sido óbvio a partir das triplas semânticas individuais que analisamos na secção anterior, mas o LOD pode responder a perguntas complexas. Quando se juntam as triplas semânticas, estas formam um [Mapa conceitual](https://pt.wikipedia.org/wiki/Mapa_conceitual), devido à forma como as triplas semânticas se interligam. Suponhamos que queremos encontrar uma lista de todas as pessoas que foram alunos do compositor Franz Liszt. Se a informação estiver em triplas semânticas de dados conectados sobre pianistas e os seus professores, podemos descobrir o que procuramos com uma consulta (veremos esta linguagem de consulta, chamada SPARQL, na secção final). + +Por exemplo, o pianista Charles Rosen foi aluno do pianista Moriz Rosenthal, que foi aluno de Franz Liszt. Vamos agora expressar isto em duas triplas semânticas (vamos cingir-nos às sequências de caracteres para os nomes em vez dos números de identificação, para tornar os exemplos mais legíveis): + + "Franz Liszt" ensinouPianoAo "Moriz Rosenthal" . + "Moriz Rosenthal" ensinouPianoAo "Charles Rosen" . + +Poderíamos igualmente ter criado as nossas triplas semânticas desta forma: + + "Charles Rosen" aprendeuPianoCom "Moriz Rosenthal" . + "Moriz Rosenthal" aprendeuPianoCom "Franz Liszt" . + +Estamos a inventar exemplos simplesmente para fins de ilustração, mas se quiser ligar os seus dados a outros datasets na "nuvem de dados conectados" deve olhar para as convenções que são utilizadas nesses datasets e fazer o mesmo. Na verdade, esta é uma das características mais úteis de LOD porque muito do trabalho já foi feito. As pessoas têm passado muito tempo a desenvolver formas de modelar a informação dentro de uma determinada área de estudo e a pensar sobre como as relações dentro dessa área podem ser representadas. Estes modelos são geralmente conhecidos como ontologias. Uma ontologia é uma abstração que permite a representação de um conhecimento particular sobre o mundo. Neste sentido, estas são bastante recentes e foram concebidas para fazer o que uma [taxonomia](https://pt.wikipedia.org/wiki/Taxonomia_(geral)) hierárquica faz (pense na classificação das espécies na [Taxonomia de Lineu](https://pt.wikipedia.org/wiki/Taxonomia_de_Lineu), mas de uma forma mais flexível. + +Uma ontologia é mais flexível porque não é hierárquica. Visa representar a fluidez do mundo real, onde as coisas podem ser relacionadas umas com as outras de formas mais complexas do que quando são representadas por uma estrutura hierárquica em forma de árvore. Em vez disso, uma ontologia é mais parecida com uma teia de aranha. + +O que quer que pretenda representar com LOD, sugerimos que encontre um vocabulário existente e que o utilize, em vez de tentar escrever o seu próprio vocabulário. Esta página tem [uma lista de alguns dos vocabulários mais populares](https://semanticweb.org/wiki/Main_Page.html) (em inglês). + +Uma vez que o nosso exemplo acima se concentra nos pianistas, seria uma boa ideia encontrar uma ontologia apropriada em vez de criar o nosso próprio sistema. De facto, existe [uma ontologia para música](https://web.archive.org/web/20170715094229/https://www.musicontology.com/) (em inglês). Para além de uma especificação bem desenvolvida, esta tem também alguns exemplos úteis da sua utilização. Pode dar uma olhada nas [páginas de iniciação](https://web.archive.org/web/20170718143925/https://musicontology.com/docs/getting-started.html) (em inglês) para ter uma ideia de como se pode utilizar esta ontologia em particular. + +Infelizmente, não conseguimos encontrar nada que descreva a relação entre um professor e um aluno na Ontologia da Música. Mas a ontologia é publicada abertamente, logo podemos utilizá-la para descrever outras características da música e depois criar a nossa própria extensão. Se então publicássemos a nossa extensão abertamente, outros poderiam utilizá-la se assim o desejassem e este ato pode tornar-se num padrão. Embora o projeto *Music Ontology* (Ontologia Musical) não tenha a relação que precisamos, o [projeto *Linked Jazz*](https://linkedjazz.org/) (em inglês) permite o uso de '*mentorOf*', o que parece funcionar bem no nosso caso. Embora esta não seja uma solução ideal, é uma solução que faz um esforço para utilizar o que já existe por aí. + +Agora, se estivéssemos a estudar a história do pianismo, poderíamos querer identificar muitos pianistas que foram ensinados por alunos de Liszt, para estabelecer uma espécie de árvore genealógica e ver se estes 'netos' de Liszt têm algo em comum. Poderíamos pesquisar os alunos de Liszt, fazer uma grande lista deles, depois pesquisar cada um dos alunos e tentar fazer listas de quaisquer alunos que eles tivessem. Com LOD poderíamos (novamente, se as triplas semânticas existissem) escrever uma query semelhante a: + + Dá-me os nomes de todos os pianistas ensinados por x + onde x aprendeu piano com Liszt + +Isto encontraria todas as pessoas do dataset que eram alunos de alunos de Liszt. Não nos entusiasmemos demasiado: esta pergunta não nos dará todos os alunos de todos os alunos de Liszt que já existiram, porque essa informação provavelmente não existe e não existe dentro de nenhum conjunto de triplas semânticas existentes. Lidar com dados do mundo real mostra todo o tipo de omissões e inconsistências, que veremos quando olharmos para o maior conjunto de LOD, a [DBpedia](https://www.dbpedia.org/), na secção final. + +Se tiver utilizado [bases de dados relacionais](https://pt.wikipedia.org/wiki/Banco_de_dados_relacional) poderá estar a pensar que estas podem desempenhar a mesma função. No nosso caso de Liszt, a informação sobre pianistas acima descrita pode estar organizada numa [tabela](https://pt.wikipedia.org/wiki/Tabela_(banco_de_dados)) de base de dados denominada por algo como 'Alunos'. + +| IDaluno | IDprofessor | +| ------- | --------- | +| 31 | 17 | +| 35 | 17 | +| 49 | 28 | +| 56 | 28 | +| 72 | 40 | + +Se não estiver familiarizado com bases de dados não se preocupe. Mas, provavelmente, ainda pode ver que alguns pianistas nesta tabela tinham o mesmo professor (números 17 e 28). Sem entrar em pormenores, se Liszt estiver nesta tabela de bases de dados, seria bastante fácil extrair os alunos de Liszt, ao utilizar um ``Join`` ([*join*](https://pt.wikipedia.org/wiki/Join_(SQL))). + +De facto, as bases de dados relacionais podem oferecer resultados semelhantes ao LOD. A grande diferença é que o LOD pode ir mais longe: pode ligar datasets que foram criados sem intenção explícita de serem ligados entre si. A utilização do [Quadro de Descrição de Recursos](https://pt.wikipedia.org/wiki/Resource_Description_Framework) (*Resource Description Framework* ou RDF) e URIs permite que isto aconteça. + +## RDF e formatos de dados + +LOD utiliza uma norma, definida pelo [Consórcio World Wide Web](https://www.w3.org/) (em inglês) (*World Wide Web Consortium* ou W3C), chamada *[Resource Description Framework](https://pt.wikipedia.org/wiki/Resource_Description_Framework)* ou apenas RDF. As normas são úteis desde que sejam amplamente adotadas - pense no metro ou nos tamanhos de parafuso padrão - mesmo que sejam essencialmente arbitrárias. O RDF tem sido amplamente adotado como a norma LOD. + +Ouvirá frequentemente o LOD referido simplesmente como RDF. Atrasamos a conversa sobre o RDF até agora porque é bastante abstrato. RDF é um [modelo de dados](https://pt.wikipedia.org/wiki/Modelagem_de_dados) que descreve como é que os dados são estruturados num nível teórico. Assim, a insistência na utilização de triplas semânticas (em vez de quatro partes, ou duas ou nove, por exemplo) é uma regra no RDF. Mas quando se trata de questões mais práticas, há algumas escolhas quanto à implementação. Assim, o RDF diz-lhe o que tem de fazer, mas não exatamente como o tem de fazer. Estas escolhas dividem-se em duas áreas: como se escrevem as coisas (serialização) e as relações que as suas triplas semânticas descrevem. + +### Serialização + +A [Serialização](https://pt.wikipedia.org/wiki/Serializa%C3%A7%C3%A3o) é o termo técnico para "como se escrevem as coisas". O chinês padrão (mandarim) pode ser escrito em caracteres tradicionais, caracteres simplificados ou romanização Pinyin e a língua em si não muda. Tal como o mandarim, o RDF pode ser escrito de várias formas. Aqui vamos olhar para duas (há outras, mas por uma questão de simplicidade, vamos concentrar-nos nestas): + +1) [Turtle](https://en.wikipedia.org/wiki/Turtle_(syntax)) (em inglês) +2) [RDF/XML](https://pt.wikipedia.org/wiki/RDF/XML) + +Reconhecer a serialização que está a ser utilizada significa que podemos então escolher ferramentas apropriadas concebidas para esse formato. Por exemplo, o RDF pode vir serializado no formato [XML](https://pt.wikipedia.org/wiki/XML). Podemos então utilizar uma ferramenta ou biblioteca de códigos concebida para analisar esse formato em particular, o que é útil se já souber como trabalhar com ele. O reconhecimento do formato também lhe dá as palavras-chave certas para procurar ajuda online. Muitos recursos permitem descarregar as suas bases de dados LOD, podendo escolher qual a serialização que deseja fazer o *Download*. + +#### Turtle + +'Turtle' é um jogo de palavras. 'Tur' é a abreviatura de 'terse' e 'tle' - é a abreviatura de '*triple language*' (linguagem de triplos). Turtle é uma forma agradavelmente simples de escrever triplas semânticas. + +O Turtle usa apelidos ou atalhos, conhecidos como [prefixos](https://www.w3.org/TeamSubmission/turtle/#sec-tutorial) (em inglês), o que nos poupa ter de escrever URIs completos todas as vezes. Voltemos ao URI que criamos na secção anterior: + + https://data.history.ac.uk/tobias-project/person/15601 + +Não queremos escrever isto cada vez que nos referimos a esta pessoa (lembrar-se-á de Jack Straw). Por isso, só temos de enunciar o nosso atalho: + + @prefix toby: . + +Então Jack é `toby:15601`, que substitui o longo URI e é mais fácil à vista. Eu escolhi 'toby', mas poderia igualmente escolher qualquer sequência de letras. + +Vamos agora passar de Jack Straw para William Shakespeare e utilizar Turtle para descrever algumas coisas sobre as suas obras. Vamos ter de decidir sobre os ficheiros de autoridade a utilizar, um processo que, como mencionado acima, é melhor ser selecionado ao olhar para outros conjuntos de LOD. Aqui usaremos como um dos nossos prefixos [*Dublin Core*](https://pt.wikipedia.org/wiki/Dublin_Core), uma norma de [metadados](https://pt.wikipedia.org/wiki/Metadados) de bibliotecas [(Número de controle da Biblioteca do Congresso](https://en.wikipedia.org/wiki/Library_of_Congress_Control_Number) (*Library of Congress Control Number*) como outro e, o último (VIAF) deverá ser-lhe familiar. Juntos, estes três ficheiros de autoridade fornecem identificadores únicos para todas as entidades que tenciono utilizar neste exemplo: + + @prefix lccn: . + @prefix dc: . + @prefix viaf: . + + lccn:n82011242 dc:creator viaf:96994048 . + +Note o espaçamento do ponto final após a última linha. Esta é a forma de Turtle indicar o fim. Tecnicamente não é necessário ter o espaço, mas facilita a leitura após uma longa sequência de caracteres. + +No exemplo acima, lccn:n82011242 representa Macbeth; dc:creator liga Macbeth ao seu autor; viaf:96994048 representa William Shakespeare. + +O Turtle também permite listar triplas semânticas sem se preocupar em repetir cada URI quando acabou de o usar. Acrescentemos a data em que os estudiosos pensam que Macbeth foi escrito, utilizando o par atributo-valor Dublin Core: `dc:create 'YYYY'`: + + @prefix lccn: . + @prefix dc: . + @prefix viaf: . + + lccn:n82011242 dc:creator viaf:96994048 ; + dc:created "1606" . + +Lembra-se da estrutura da tripla semântica discutida na secção 1? Aí demos este exemplo: + + 1 pessoa 15601 (o sujeito) 2 representadaNoParlamentoBritânico (o predicado) 3 "Blackburn" (o objeto) + +O essencial é que o predicado liga o sujeito e o objeto. Ele descreve a relação entre eles. O sujeito vem primeiro na tripla semântica, mas isso é uma questão de escolha, como discutimos com o exemplo de pessoas que foram ensinadas a tocar piano por Liszt. + +Pode-se usar um ponto e vírgula se o sujeito for o mesmo mas o predicado e o objeto forem diferentes, ou uma vírgula se o sujeito e o predicado forem o mesmo e apenas o objeto for diferente. + + lccn:no2010025398 dc:creator viaf:96994048 , + viaf:12323361 . + +Aqui estamos a dizer que Shakespeare (96994048) e John Fletcher (12323361) foram ambos os criadores da obra *The Two Noble Kinsmen*. + +Quando analisamos as ontologias anteriormente sugeri que visse a [*Music Ontology*](https://web.archive.org/web/20170718143925/https://musicontology.com/docs/getting-started.html) (em inglês). Dê agora uma olhada novamente. Isto ainda é complicado, mas será que agora fazem mais sentido? + +Uma das ontologias mais acessíveis é a '*Friend of a Friend*' (amigo de um amigo) ou [FOAF](https://en.wikipedia.org/wiki/FOAF_(ontology)) (em inglês). Esta é concebida para descrever pessoas e, talvez por essa razão, é bastante intuitiva. Se, por exemplo, quiser escrever-me para me dizer que este curso é a melhor coisa que já leu, aqui está o meu email expresso como triplas semânticas em FOAF: + + @prefix foaf: . + + :"Jonathan Blaney" foaf:mbox . + +#### RDF/XML + +Em contraste com o Turtle, o RDF/XML pode parecer um pouco pesado. Para começar, vamos apenas converter uma tripla semântica da Turtle acima, aquela que refere que Shakespeare foi o criador de *The Two Noble Kinsmen*: + + lccn:no2010025398 dc:creator viaf:96994048 . + +Em RDF/XML, com os prefixos declarados dentro do trecho de código de XML, fica: + +``` xml + + + + + +``` + +O formato RDF/XML tem a mesma informação básica que o formato Turtle, mas é escrito de forma muito diferente, baseando-se nos princípios das etiquetas XML encaixadas. + +Passemos a um exemplo diferente para mostrar como o RDF/XML combina triplas semânticas e, ao mesmo tempo, introduz o [*Simple Knowledge Organization System*](https://pt.wikipedia.org/wiki/Simple_Knowledge_Organization_System) (SKOS) (Sistema Simples de Organização do Conhecimento), que foi concebido para codificar tesauros ou taxonomias. + + + Abdication + + +Aqui estamos a dizer que o conceito SKOS `21250`, *markdown abdication*, tem um rótulo preferido de "*abdication*". A forma como funciona é que o elemento sujeito (incluindo a parte da '*abdication*', que é um valor de atributo em termos de XML) tem o predicado e o objeto encaixados no seu interior. O elemento encaixado é o predicado e [o nó folha](https://pt.wikipedia.org/wiki/%C3%81rvore_(estrutura_de_dados)#Terminologia) (em inglês), é o objeto. Este exemplo é retirado de um projeto para publicar um [*Tesauro de História Britânica e Irlandesa*](https://www.history.ac.uk/research/digital-history) (em inglês). + +Tal como com o Turtle, podemos acrescentar mais triplas semânticas. Portanto, vamos declarar que o termo mais restrito na nossa hierarquia de sujeitos, um abaixo de *Abdication*, vai ser *Abdication crisis (1936)*. + + + Abdication + + + + + + +Lembra-se de como os predicados e os objetos são encaixados dentro do sujeito? Aqui já o fizemos duas vezes com o mesmo sujeito, para que possamos tornar isto menos prolixo, aninhando ambos os conjuntos de predicados como objetos dentro do mesmo sujeito: + + + Abdication + + + +Se estiver familiarizado com XML isto será fácil. Se não estiver, talvez prefira um formato como o Turtle. Mas a vantagem aqui é que ao criar o seu RDF/XML pode usar as ferramentas habituais disponíveis com XML, como editores e analisadores dedicados ao XML, para verificar se o seu RDF/XML está corretamente formatado. Se não for uma pessoa que use o XML recomendo o Turtle, podendo usar uma [ferramenta online](https://www.easyrdf.org/converter) (em inglês) para verificar se a sua sintaxe está correta. + +## Consulta de RDF com SPARQL + +Para esta secção final iremos interrogar algum LOD e ver o que poderá ser feito com ele. + +A linguagem de consulta que usamos para LOD é chamada [SPARQL](https://pt.wikipedia.org/wiki/SPARQL). É um daqueles acrónimos recursivos amados pelos técnicos: ***S**PARQL **P**rotocol **a**nd **R**DF **Q**uery **L**anguage* (Protocolo SPARQL e Linguagem de Consulta RDF). + +Como mencionado no início, o *Programming Historian* tem [uma lição completa](/en/lessons/retired/graph-databases-and-SPARQL) (em inglês), de Matthew Lincoln, sobre a utilização do SPARQL (embora não seja já mantida (ver nota no início desta tradução). A secção final aqui presente é apenas uma visão geral dos conceitos básicos. Se o SPARQL despertar o seu interesse, pode obter uma fundamentação completa no tutorial de Lincoln. + +Vamos realizar as nossas consultas SPARQL na [DBpedia](https://www.dbpedia.org/), que é um enorme conjunto de LOD derivado da Wikipedia. Além de estar cheio de informação que é muito difícil de encontrar através da habitual interface da Wikipédia, tem vários "pontos de extremidade" (end points) SPARQL - interfaces onde se podem digitar as consultas SPARQL e obter resultados a partir das triplas semânticas da DBpedia. + +O end point de consulta SPARQL que é utilizado chama-se [snorql](https://dbpedia.org/snorql/) (em inglês). Estes end points ocasionalmente ficam offline. Se for o seu caso, tente procurar por *dbpedia sparql* e deverá encontrar um substituto semelhante. + +Se for ao URL snorql acima verá, no início, um número de prefixos que já nos foram declarados, o que é útil. Agora também irá reconhecer alguns dos prefixos. + +{% include figure.html filename="en-or-intro-to-linked-data-03.png" alt="Captura de tela com a interface de criação de consultas snorql" caption="Figura 3. Caixa de consulta padrão do snorql, com alguns prefixos declarados para si." %} + +Na caixa de consulta abaixo das declarações de prefixo, deverá ver o seguinte: + + SELECT * WHERE { + ... + } + +Se alguma vez escreveu uma consulta de bases de dados em *Structured Query Language*, [mais conhecida como SQL](https://pt.wikipedia.org/wiki/SQL), isto vai parecer-lhe bastante familiar e vai ajudá-lo a aprender SPARQL. Se não, não se preocupe. As palavras-chave aqui utilizadas, ``SELECT`` (SELECIONAR) e ``WHERE`` (ONDE) não são sensíveis a maiúsculas e minúsculas, mas algumas partes de uma consulta SPARQL podem ser (ver abaixo), por isso recomendo que se cinja ao caso dado ao longo das consultas neste curso. + +Aqui `SELECT` significa "encontrar alguma coisa" e `*` significa "dá-me tudo". `WHERE` introduz uma condição, que é onde vamos colocar os detalhes de que tipo de coisas queremos que a consulta encontre. + +Vamos começar com algo simples para ver como é que isto funciona. Cole (ou, melhor, escreva) isto na caixa de consulta: + + SELECT * WHERE { + :Lyndal_Roper ?b ?c + } + +Clique em '*go*' (ir). Se deixar o menu *drop-down* como '*browse*' (navegar) deverá obter duas colunas com os rótulos "b" e "c". (Note que aqui, as maiúsculas/minúsculas importam: lyndal_roper não lhe dará resultados). + + +{% include figure.html filename="en-or-intro-to-linked-data-04.png" alt="Captura de tela com a interface de resultados de consultas snorql" caption="Figura 4. Topo das listas de resultados de uma consulta com todas as triplas semânticas com 'Lyndal_Roper' como sujeito." %} + +Então o que é que acabou de acontecer? E como é que soubemos o que escrever? + +Na verdade, não sabíamos. Esse é um dos problemas com end points do SPARQL. Quando se conhece um dataset, é preciso experimentar coisas e descobrir que termos são utilizados. Porque isto vem da *Wikipedia* e nós estávamos interessados sobre que informação sobre historiadores podíamos encontrar. Então vamos à página da *Wikipedia* da historiadora [Lyndal Roper](https://en.wikipedia.org/wiki/Lyndal_Roper) (em inglês). + +A parte final do URL é `Lyndal_Roper` e concluímos então que é provável que esta cadeia de caracteres seja a forma como Roper é referida na DBpedia. Porque não sabemos o que mais poderia estar em triplas semânticas que mencionam Roper, nós utilizamos `?b` e `?c`: estes são apenas marcadores de posição. Poderia igualmente ter digitado `?whatever` e `?you_like` e as colunas teriam esses rótulos. Quando quiser ser mais preciso sobre o que se está a pesquisar, será importante etiquetar as colunas de forma significativa. + +Experimente agora a sua própria consulta SPARQL: escolha uma página *Wikipedia* e copie a parte final do URL, após a barra final, e coloque-a no lugar de Lyndal_Roper. Depois clique em 'go'. + +A partir da informação que se obtém destes resultados é possível gerar *queries* mais precisas. Isto pode ser pouco fiável, por isso não se preocupe se algumas não funcionarem. + +Vamos voltar aos resultados para a consulta que fizemos há momentos: + + SELECT * WHERE { + :Lyndal_Roper ?b ?c + } + +Podemos ver uma longa lista na coluna etiquetada _c_. Estes são todos os atributos que Roper tem na *DBpedia* e que nos ajudarão a encontrar outras pessoas com estes atributos. Por exemplo, podemos ver ```http://dbpedia.org/class/yago/Historian110177150```. Poderemos utilizar isto para obter uma lista de historiadores? Vamos colocá-lo na nossa pergunta, mas em terceiro lugar, porque era onde estava quando a encontrei nos resultados da Lyndal Roper. A minha consulta tem este aspecto: + + SELECT * WHERE { + ?historian_name ?predicate + } + +Fizemos uma pequena mudança aqui. Se esta consulta funcionar de todo, então esperemos que os nossos historiadores estejam na primeira coluna, porque 'historiador' não parece poder ser um predicado: não funciona como um verbo numa frase; por isso vamos chamar à nossa primeira coluna de resultados 'historian_name' e à minha segunda (sobre a qual não sabemos nada) 'predicate' (predicado). + +Execute a *querie*. Deverá encontrar uma grande lista de historiadores. + +{% include figure.html filename="en-or-intro-to-linked-data-05.png" alt="Duas capturas de tela com a interface de consultas snorql e respectivos resultados" caption="Figura 5. Historiadores de acordo com a DBpedia." %} + +Assim, esta ferramenta funciona para criar listas, o que é útil, mas seria muito mais poderoso combinar listas para obter intersecções de conjuntos. Encontrei mais algumas coisas que podem ser interessantes consultar nos atributos DBpedia de Lyndal Roper: e . É muito fácil combiná-los pedindo uma variável a ser devolvida (no nosso caso isto é `?name` (nome)) e depois utilizando-a em múltiplas linhas de uma *querie*. Note também o espaço e o ponto completo no final da primeira linha que começa com `?name`: + + SELECT ?name + WHERE { + ?name ?b . + ?name ?b + } + +Funciona! Devemos obter cinco resultados. Na altura em que escrevo, há cinco historiadoras britânicas na *DBpedia*... + +{% include figure.html filename="en-or-intro-to-linked-data-06.png" alt="Duas capturas de tela com a interface de consultas snorql e respectivos resultados" caption="Figura 6. Historiadoras britânicas segundo a DBpedia." %} + +Apenas cinco historiadoras britânicas? Claro que há, na realidade, muitas mais do que isso, como poderíamos facilmente mostrá-lo substituindo o nome de, digamos, Alison Weir na nossa primeira consulta sobre Lyndal Roper. Isto leva-nos ao problema com a *Dbpedia* que mencionamos anteriormente: não é muito consistentemente marcado com informação estrutural do tipo que a *DBpedia* que utiliza. A nossa consulta pode listar algumas historiadoras britânicas mas acontece que não podemos utilizá-la para gerar uma lista significativa de pessoas nesta categoria. Tudo o que encontrámos foram as pessoas nas entradas da *Wikipedia* que alguém decidiu classificar como "historiadora britânica" e "historiadora". + +Com SPARQL na *DBpedia*, é preciso ter cuidado com as inconsistências do material de origem coletiva. Poderá usar o SPARQL exatamente da mesma forma num dataset mais confiável, por exemplo, os dados do governo britânico: https://data-gov.tw.rpi.edu//sparql (em inglês) e esperar obter resultados mais robustos (há aqui um breve tutorial para este dataset: https://data-gov.tw.rpi.edu/wiki/A_crash_course_in_SPARQL (em inglês). + +No entanto, apesar das suas inconsistências, a *DBpedia* é um ótimo local para aprender SPARQL. Esta foi apenas uma breve introdução, mas há muito mais em [Usando SPARQL para aceder ao Linked Open Data](/en/lessons/retired/graph-databases-and-SPARQL) (em inglês). + + +## Leituras e recursos adicionais + +* Dean Allemang e James Hendler, *Semantic Web for the Working Ontologist*, 2nd edn, Elsevier, 2011 +* Tim Berners-Lee [*Linked Data*](https://www.w3.org/DesignIssues/LinkedData.html) (em inglês) +* Bob DuCharme, *Learning SPARQL*, O'Reilly, 2011 +* [Blog de Bob DuCharme](https://www.snee.com/bobdc.blog/) (em inglês) também vale a pena ler +* Richard Gartner, *Metadata: Shaping Knowledge from Antiquity to the Semantic Web*, Springer, 2016 +* Seth van Hooland and Ruben Verborgh, *Linked Data for Libraries, Archives and Museums*, 2015 +* Matthew Lincoln ['*Using SPARQL to access Linked Open Data*'](/en/lessons/retired/graph-databases-and-SPARQL) (em inglês) +* [*Linked Data guides and tutorials*](https://linkeddata.org/guides-and-tutorials) (em inglês) +* Dominic Oldman, Martin Doerr e Stefan Gradmann, '*Zen and the Art of Linked Data: New Strategies for a Semantic Web of Humanist Knowledge*', em *A New Companion to Digital Humanities*, editado por Susan Schreibman et al. +* Max Schmachtenberg, Christian Bizer e Heiko Paulheim, [*State of the LOD Cloud 2017*](https://linkeddatacatalog.dws.informatik.uni-mannheim.de/state/) (em inglês) +* David Wood, Marsha Zaidman e Luke Ruth, *Linked Data: Structured data on the Web*, Manning, 2014 + +## Agradecimentos + +Gostaria de agradecer aos meus dois colegas revisores, Matthew Lincoln e Terhi Nurmikko-Fuller e ao meu editor, Adam Crymble, por me ajudarem generosamente a melhorar esta lição com numerosas sugestões, esclarecimentos e correções. Este tutorial baseia-se num outro escrito como parte do '*Thesaurus of British and Irish History as SKOS*' [*(Tobias) project*](https://gtr.ukri.org/projects?ref=AH%2FN003446%2F1#/tabOverview) (em inglês), financiado pelo [AHRC](https://www.ahrc.ac.uk/) (em inglês). A lição foi revista para o projeto *Programming Historian*. diff --git a/pt/licoes/introducao-estilometria-python.md b/pt/licoes/introducao-estilometria-python.md index 8a52335fd8..13ce91ca8f 100644 --- a/pt/licoes/introducao-estilometria-python.md +++ b/pt/licoes/introducao-estilometria-python.md @@ -1,708 +1,708 @@ ---- -title: Introdução à estilometria com Python -layout: lesson -slug: introducao-estilometria-python -date: 2018-04-21 -translation_date: 2021-12-27 -authors: -- François Dominic Laramée -reviewers: -- Folgert Karsdorp -- Jan Rybicki -- Antonio Rojas Castro -editors: -- Adam Crymble -translator: -- Daniel Bonatto Seco -translation-editor: -- Jimmy Medeiros -translation-reviewer: -- Bruno Almeida -- Suemi HIguchi -difficulty: 2 -review-ticket: https://github.com/programminghistorian/ph-submissions/issues/445 -activity: analyzing -topics: [distant-reading, python] -abstract: "Nesta lição, aprenderá a realizar análises estilométricas e a determinar a autoria de textos. A lição cobre três métodos: Curvas Características de Composição de Mendenhall, Método Qui-Quadrado de Kilgariff e Método Delta de John Burrows." -original: introduction-to-stylometry-with-python -avatar_alt: Mulher a ler junto a uma pintura -doi: 10.46430/phpt0024 ---- - - -{% include toc.html %} - -# Introdução - -[Estilometria](https://perma.cc/NYH2-KWLA) é o estudo quantitativo do estilo literário por meio de métodos de [leitura distante](https://perma.cc/XK8J-F6ZF) computacional. É baseado na observação de que os autores tendem a escrever de maneiras relativamente consistentes, reconhecíveis e únicas. Por exemplo: - -* Cada pessoa tem seu próprio vocabulário único, às vezes rico, às vezes limitado. Embora um vocabulário mais amplo esteja geralmente associado à qualidade literária, nem sempre é esse o caso. Ernest Hemingway é famoso por usar um número surpreendentemente pequeno de palavras diferentes em sua escrita,[^1] o que não o impediu de ganhar o Prêmio Nobel de Literatura em 1954; -* Algumas pessoas escrevem frases curtas, enquanto outras preferem blocos longos de texto compostos por muitas frases; -* Não há duas pessoas que usem ponto-e-vírgulas, travessões e outras formas de pontuação exatamente da mesma maneira. - -As maneiras como os escritores usam pequenas [*function words*](https://perma.cc/284C-CNHD), como artigos, preposições e conjunções, mostram-se particularmente reveladoras. Em uma pesquisa dos métodos estilométricos históricos e atuais, Efstathios Stamatatos aponta que as palavras funcionais são "usadas de maneira amplamente inconsciente pelos autores e são independentes do tópico"[^2]. Para a análise estilométrica, isso é muito vantajoso, visto que esse padrão inconsciente tende a variar menos no [*corpus*](https://perma.cc/9XQ4-J4A5) de um autor do que seu vocabulário geral (e também é muito difícil para um pretenso falsificador copiar). As palavras funcionais também foram identificadas como marcadores importantes do gênero literário e da cronologia. - -Os pesquisadores têm usado a estilometria como uma ferramenta para estudar uma variedade de questões culturais. Por exemplo, uma quantidade considerável de pesquisas estudou as diferenças entre as maneiras como homens e mulheres escrevem[^3] ou sobre o que escrevem.[^4] Outros pesquisadores estudaram as maneiras como uma mudança repentina no estilo de escrita em um único texto pode indicar plágio[^5] e até mesmo a maneira como as letras dos músicos John Lennon e Paul McCartney se tornaram cada vez menos alegres e menos ativas à medida que os [Beatles](https://perma.cc/DQ66-M79T) se aproximavam do fim de sua carreira de gravação na década de 1960.[^6] - -No entanto, uma das aplicações mais comuns da estilometria é na atribuição de autoria. Dado um texto anônimo, às vezes é possível inferir quem o escreveu medindo certas características, como o número médio de palavras por frase ou a propensão do autor de usar "todavia" em vez de "no entanto", e comparando as medidas com outros textos escritos pelo suposto autor. Este é o objetivo deste tutorial, onde a partir de um conjunto de obras clássicas de romancistas lusos e brasileiros do século XIX iremos comparar exemplares de suas obras com o estilo literário do conjunto de autores a fim de tentar inferir suas respectivas autorias (nota de tradução: foi decidido mudar o _corpus_ usado nesta lição para um que fosse culturalmente mais relevante para o público que fala e escreve português; foi mantida a restante estrutura da lição original, com excepção de ligeiras adaptações face à mudança do _corpus_). - -## Objetivos de aprendizado - -No final desta lição, teremos percorrido os seguintes tópicos: - -* Como aplicar vários métodos estilométricos para inferir a autoria de um texto anônimo ou conjunto de textos; -* Como usar estruturas de dados relativamente avançadas, incluindo [dicionários](https://perma.cc/TTF4-SJ23) de [strings](https://perma.cc/7DCC-M9AT) e dicionários de dicionários, em [Python](https://perma.cc/Z82S-3L3M); -* O básico do [Natural Language Toolkit](https://perma.cc/E7LZ-WECZ) (NLTK), um módulo Python popular dedicado a [processamento de linguagem natural](https://perma.cc/MFX4-LAVZ). - -## Leitura prévia - -Se você não tem experiência com a linguagem de programação Python ou está tendo dificuldade nos exemplos apresentados neste tutorial, o autor recomenda que você leia as lições [Trabalhando com ficheiros de texto em Python](/pt/licoes/trabalhando-ficheiros-texto-python) e [Manipular Strings com Python](/pt/licoes/manipular-strings-python). Note que essas lições foram escritas em Python versão 2, enquanto esta usa Python versão 3. As diferenças de [sintaxe](https://perma.cc/E5LQ-S65P) entre as duas versões da linguagem podem ser sutis. Se você ficar em dúvida, siga os exemplos conforme descritos nesta lição e use as outras lições como material de apoio. (Este tutorial encontra-se atualizado até à versão [Python 3.8.5](https://perma.cc/XCT2-Q4AT); as [strings literais formatadas](https://perma.cc/U6Q6-59V3) na linha `with open(f'data/pg{filename}.txt', 'r', encoding='utf-8') as f:`, por exemplo, requerem Python 3.6 ou uma versão mais recente da linguagem.) - -## Materiais requeridos - -Este tutorial usa conjuntos de dados e software que você terá que baixar e instalar. - -### O conjunto de dados ### - -Para trabalhar nesta lição, você precisará baixar e descompactar o ficheiro [.zip](/assets/introduction-to-stylometry-with-python/dataset_estilometria.zip) contendo as 15 obras que compõem o *corpus* que será utilizado neste tutorial. As obras foram originalmente extraídas do [Projeto Gutenberg](https://perma.cc/8GTT-3M9N). Ao descompactar o ficheiro, será criada uma pasta com o nome `dados`. Este será o seu [diretório de trabalho](https://perma.cc/9KVS-T3A5) e todo o trabalho deve ser salvo aqui durante a execução da lição. - -### O software ### - -Esta lição usa as seguintes versões da linguagem Python e [bibliotecas](https://pt.wikipedia.org/wiki/Biblioteca_(computa%C3%A7%C3%A3o)): -* [Python 3.x](https://www.python.org/downloads/) - a última versão estável é recomendada; -* [nltk](https://www.nltk.org/) - Natural Language Toolkit, geralmente abreviado `nltk`; -* [matplotlib](https://matplotlib.org/) - visualização de dados e geração de gráficos; -* [re](https://docs.python.org/pt-br/3/library/re.html) - limpeza de dados via Regex (veremos durante o tutorial o porquê). - -Alguns desses módulos podem não estar pré-instalados em seu computador. Se você encontrar mensagens de erro como: "Módulo não encontrado" ou similares, você terá que baixar e instalar o(s) módulo(s) ausente(s). A forma mais simples de realizar esta tarefa é através do comando `pip`. Mais detalhes estão disponíveis através do tutorial do *Programming Historian* [Instalação de Módulos Python com pip](/pt/licoes/instalacao-modulos-python-pip). - -## Algumas notas sobre Independência Linguística - -Este tutorial aplica a análise estilométrica a um conjunto de textos em português (PT-PT e PT-BR) usando uma biblioteca Python chamada `nltk`. Muitas das funcionalidades fornecidas pelo `nltk` operam com outros idiomas. Contanto que um idioma forneça uma maneira clara de distinguir os limites de uma palavra, o `nltk` deve ter um bom desempenho. Idiomas como o chinês, para os quais não há distinção clara entre os limites das palavras, podem ser problemáticos. O autor original desta lição utilizou `nltk` com textos em francês sem nenhum problema; outros idiomas que usam [diacríticos](https://perma.cc/7VGD-5968), como espanhol e alemão, também devem funcionar bem com `nltk`. Consulte a [documentação do nltk](https://perma.cc/S4EX-2DBT) para obter detalhes. - -Apenas uma das tarefas neste tutorial requer código dependente do idioma. Para dividir um texto em um conjunto de palavras em uma língua diferente do inglês, você precisará especificar o idioma apropriado como um parâmetro para o [tokenizador](https://perma.cc/NGM5-4MED) da biblioteca `nltk`, que usa o inglês como padrão. Isso será explicado no tutorial. - -Por fim, observe que algumas tarefas linguísticas, como [*part-of-speech tagging*](https://perma.cc/L9SU-PS9D), podem não ser suportadas pelo `nltk` em outros idiomas além do inglês. Este tutorial não cobre a aplicação de *part-of-speech tagging*. Se você precisar para os seus próprios projetos, consulte a [documentação do nltk](https://perma.cc/S4EX-2DBT) para obter orientações. - -# O *corpus* - Contextualização - -No [exemplo original deste tutorial em inglês](/en/lessons/introduction-to-stylometry-with-python), utilizaram-se os [papéis federalistas](https://perma.cc/DW5V-MH5W) como um exemplo de aplicação de estilometria, utilizando as técnicas que serão apresentadas para inferir a autoria dos textos contestados dentro do conjunto de documentos que configura o *corpus*.[^7] -Como na língua portuguesa não temos um conjunto de textos que possua estas mesmas características, no exemplo que apresentaremos traremos um total de 15 obras completas de 5 autores diferentes, três deles portugueses e dois brasileiros, todos romancistas do século XIX, disponibilizadas pelo [Projeto Gutenberg](https://perma.cc/5PRR-TM3D). Utilizaremos duas obras de cada autor para definir seus respectivos estilos e uma terceira para constituir o conjunto de testes, para avaliarmos se as técnicas utilizadas realizarão a inferência correta de autoria através do grau de similaridade de cada obra deste conjunto com o estilo obtido de cada autor. - -Os autores e obras utilizadas são os seguintes: - -| Autor | Obra 1 | Obra 2 | Obra 3 | -| --------- | --------- | --------- | --------- | -| [Machado de **Assis**](https://perma.cc/6BMU-UKZL) (Brasil)| [Quincas Borba](https://www.gutenberg.org/ebooks/55682) (**55682**) | [Memorias Posthumas de Braz Cubas](https://www.gutenberg.org/ebooks/54829) (**54829**) | [Dom Casmurro](https://www.gutenberg.org/ebooks/55752) (**55752**) | -| [José de **Alencar**](https://perma.cc/Y3Y2-VHJ5) (Brasil) | [Ubirajara](https://www.gutenberg.org/ebooks/38496) (**38496**) | [Cinco minutos](https://www.gutenberg.org/ebooks/44540) (**44540**) | [Como e porque sou romancista](https://www.gutenberg.org/ebooks/29040) (**29040**) | -| [Camilo **Castelo Branco**](https://perma.cc/Q4AJ-VZBH) (Portugal) | [Carlota Angela](https://www.gutenberg.org/ebooks/26025) (**26025**) | [Amor de Salvação](https://www.gutenberg.org/ebooks/26988) (**26988**) | [Amor de Perdição: Memorias d'uma familia](https://www.gutenberg.org/ebooks/16425) (**16425**) | -| [António Feliciano de **Castilho**](https://perma.cc/LZ9J-3H5Z) (Portugal) | [A Chave do Enigma](https://www.gutenberg.org/ebooks/32002) (**32002**) | [A Primavera](https://www.gutenberg.org/ebooks/65021) (**65021**) | [O presbyterio da montanha](https://www.gutenberg.org/ebooks/28127) (**28127**) | -| [Manuel Pinheiro **Chagas**](https://perma.cc/8LU3-RADW) (Portugal) | [Historia alegre de Portugal](https://www.gutenberg.org/ebooks/29394) (**29394**) | [A Lenda da Meia-Noite](https://www.gutenberg.org/ebooks/23400) (**23400**) | [Astucias de Namorada, e Um melodrama em Santo Thyrso](https://www.gutenberg.org/ebooks/29342) (**29342**) | - -As partes destacadas do nome de cada autor indicam como os mesmos serão referenciados neste tutorial a partir deste ponto. Para os códigos utilizaremos o `EBook-No.` (número de referência da obra no Projeto Gutenberg), presente no nome dos ficheiros disponibilizados. - -# Nossos casos de teste - -Nesta lição, usaremos obras de romancistas brasileiros e portugueses do século XIX como um estudo de caso para demonstrar três abordagens estilométricas diferentes: - -1. Curvas características de composição de Mendenhall -2. Método Qui-Quadrado de Kilgariff -3. Método Delta de John Burrows - -Em todas as abordagens acima mencionadas, utilizaremos os documentos das colunas **Obra 1** e **Obra 2** para definir o estilo de cada autor. Os documentos da coluna **Obra 3** serão testados individualmente com cada um dos 5 autores para tentarmos inferir a autoria pela proximidade de estilo. - -# Preparando os dados para análise - -Antes de prosseguirmos com a análise estilométrica, precisamos carregar os ficheiros contendo todas as 15 obras em [estruturas de dados](https://perma.cc/P843-J4LB) na memória do computador. - -O primeiro passo neste processo é designar cada obra para o seu respectivo conjunto. Como cada obra está relacionada com o seu respectivo `EBook-No.`, podemos atribuir cada obra (valor) à chave do seu autor (ou a uma chave separada, se ela fizer parte da amostra de teste) usando um *dicionário* Python. O dicionário é um tipo de conjunto de dados composto de um número arbitrário de pares de chave-valor; neste caso, os nomes dos autores servirão como chaves (separados entre treino e teste), enquanto os `EBook-No.` das obras serão os valores associados a essas chaves. - -```python -ids_obras = { - 'Assis' : [55752, 54829], - 'Alencar' : [38496, 44540], - 'Castelo Branco' : [26025, 26988], - 'Castilho' : [32002, 65021], - 'Chagas' : [29394, 23400], - 'Assis (teste)' : [55682], - 'Alencar (teste)' : [29040], - 'Castelo Branco (teste)' : [16425], - 'Castilho (teste)' : [28127], - 'Chagas (teste)' : [29342] -} -``` - -Os dicionários Python são muito flexíveis. Por exemplo, podemos acessar um valor específico *indexando* o dicionário com uma de suas chaves, podemos varrer o dicionário inteiro fazendo um loop em sua lista de chaves, etc. Faremos amplo uso desta funcionalidade à medida que avançarmos. - -A seguir, como estamos interessados no vocabulário de cada autor, definiremos uma breve [função](https://perma.cc/P8CA-Y43Q) em Python que irá criar uma longa lista de palavras em cada uma das obras atribuídas a um único autor. Isso será armazenado como uma [string](https://perma.cc/7DCC-M9AT). -Abra o seu ambiente de desenvolvimento Python escolhido. Se você não sabe como fazer isso, leia "Configurar um ambiente de desenvolvimento integrado para Python" ([Windows](/pt/licoes/instalacao-windows), [Linux](/pt/licoes/instalacao-linux), [Mac](/pt/licoes/instalacao-mac)) antes de prosseguir. - -```python -# Função que compila todos os ficheiros de texto de cada grupo em uma única string - -import re - -def ler_ficheiros_para_string(ids_ficheiros): - global texto - strings = [] - for id_ficheiro in ids_ficheiros: - with open(f'dados/pg{id_ficheiro}.txt', 'r', - encoding='utf-8') as f: - texto = f.read() - texto = re.search(r"(START.*?\*\*\*)(.*)(\*\*\* END)", - texto, - re.DOTALL).group(2) - strings.append(texto) - return '\n'.join(strings) -``` - -Perceba que, dentro da função, temos também uma etapa de limpeza dos textos usando [expressões regulares](https://perma.cc/DT3K-XUBG). Isso foi necessário para este corpus específico pois as obras publicadas no Projeto Gutenberg possuem uma estrutura de cabeçalho e rodapé de [metadados](https://perma.cc/E8P8-GKDR) que não pode ser considerada na análise estilométrica, uma vez que não foram redigidas pelos autores analisados. A utilização de expressões regulares não faz parte do escopo deste tutorial, então limitaremo-nos a compreender que estamos utilizando a biblioteca `re` para capturar apenas o conjunto de caracteres entre os marcadores `*** START OF THIS PROJECT GUTENBERG [NOME DA OBRA] ***` e `*** END OF THIS PROJECT GUTENBERG [NOME DA OBRA] ***` presentes em cada documento do projeto. Para maiores dúvidas sobre a utilização de expressões regulares e da biblioteca `re`, consulte a [documentação](https://perma.cc/JFP3-B4P4). - -Na sequência, construímos uma nova estrutura de dados chamando repetidamente a função `ler_ficheiros_para_string ()`, passando a ela uma lista diferente de documentos a cada vez. Armazenaremos os resultados em outro dicionário, este com nomes do autor/caso de teste como chaves e todo o texto dos respectivos documentos como valores. Para simplificar, iremos nos referir à string contendo uma lista de documentos como "corpus do autor". - -```python -# Criar um dicionário com os corpora dos autores -obras = {} -for autor, ids_ficheiros in ids_obras.items(): - obras[autor] = ler_ficheiros_para_string(ids_ficheiros) -``` - -Para nos certificarmos de que os ficheiros foram carregados corretamente, imprima os primeiros cem caracteres de cada entrada do dicionário na tela: - -```python -for autor in obras: - print(obras[autor][:100]) -``` - -Se esta operação de impressão exibir quaisquer trechos de texto no console, então a operação de leitura dos ficheiros funcionou conforme o esperado e você pode prosseguir para a análise estilométrica. - -
    -Se os ficheiros não forem carregados, o motivo mais provável é que o seu diretório de trabalho atual não seja o repositório `dados` criado ao descompactar o ficheiro da seção de Materiais Requeridos acima; mudar o seu diretório de trabalho deve resolver o problema. Como você faz isso depende do seu ambiente de desenvolvimento Python. -
    - -# Primeiro teste estilométrico: curvas características de composição de Mendenhall - -O pesquisador literário T. C. Mendenhall escreveu certa vez que a assinatura estilística de um autor pode ser encontrada contando a frequência com que usa palavras de tamanhos diferentes.[^8] Por exemplo, se contarmos os tamanhos de palavras em vários segmentos de 1.000 ou 5.000 palavras de qualquer romance e, em seguida, traçarmos um gráfico das distribuições de comprimento das palavras, as curvas pareceriam praticamente as mesmas, não importando que partes do romance tivéssemos escolhido. Na verdade, Mendenhall acreditava que se alguém contasse palavras suficientes selecionadas de várias partes da obra de toda a vida de um escritor (digamos, 100.000 ou mais), a "curva característica" de uso de comprimento de palavras do autor se tornaria tão precisa que seria constante ao longo de sua vida. - -Pelos padrões de hoje, contar o comprimento das palavras parece uma forma muito direta (e talvez simplista) de medir o estilo literário. O método de Mendenhall não leva em consideração as palavras do vocabulário de um autor, o que é obviamente problemático. Portanto, não devemos tratar as curvas características como uma fonte particularmente confiável de evidência estilométrica. No entanto, Mendenhall publicou a sua teoria há mais de cento e trinta anos e fez todos os cálculos à mão. É compreensível que ele tivesse optado por trabalhar com uma estatística que, embora grosseira, fosse ao menos fácil de compilar. Em honra ao valor histórico de sua tentativa inicial de estilometria, e porque a curva característica produz resultados visuais interessantes que podem ser implementados rapidamente, usaremos o método de Mendenhall como um primeiro passo em nossa exploração das técnicas de atribuição de autoria. - -O trecho de código necessário para calcular e exibir as curvas características para os autores e os documentos de teste é o seguinte: - -```python -# Carregar nltk e matpotlib -import nltk -nltk.download('punkt') -import matplotlib.pylab as plt - -obras_tokens = {} -obras_distribuicao_comprimento = {} - -id_subplot = 1 -fig = plt.figure(figsize=(20,20)) - -autores = list(obras.keys()) - -for autor in autores: - # Transformar os corpora dos autores em listas de tokens de palavras - tokens = nltk.word_tokenize(obras[autor], language="portuguese") - - # Filtrar pontuação - obras_tokens[autor] = ([token for token in tokens - if any(c.isalpha() for c in token)]) - -# Obter a distribuição de comprimentos de tokens -token_comprimentos = [len(token) for token in obras_tokens[autor]] -obras_distribuicao_comprimento[autor] = nltk.FreqDist(token_comprimentos) - - # Plotar a curva característica de composição - lista_chaves = [] - lista_valores = [] - - for i in range(1,16): - lista_chaves.append(i) - lista_valores.append(obras_distribuicao_comprimento[autor][i]) - - lista_valores_normalizado = [value/max(lista_valores) for value in lista_valores] - - plt.subplot(5, 5, id_subplot) - plt.plot(lista_chaves, lista_valores_normalizado) - plt.xticks(lista_chaves) - plt.title(autor) - id_subplot += 1 - -plt.savefig("stilometry_comparacao.jpeg", dpi=300, bbox_inches='tight') -plt.show() -``` - -Se você estiver trabalhando em um [Jupyter Notebook](http://jupyter.org/), adicione a expressão `%matplotlib inline` após a importação das bibliotecas; caso contrário, você pode não ver os gráficos em sua tela. Se você estiver trabalhando em um [Jupyter Lab](http://jupyterlab.readthedocs.io/en/stable/getting_started/installation.html), substitua esta expressão por `%matplotlib ipympl`. - -A primeira linha no trecho de código acima carrega o módulo *Natural Language Toolkit (nltk)*, que contém um número enorme de funções e recursos úteis para processamento de texto. Mal tocaremos em seus fundamentos nesta lição; se você decidir explorar mais a análise de texto em Python, recomendo fortemente que comece com [a documentação do nltk](https://www.nltk.org/). - -As próximas linhas configuram estruturas de dados que serão preenchidas pelo bloco de código dentro do loop `for`. Este loop faz os mesmos cálculos para todos os nossos "autores": - -* Invoca o método `word_tokenize()` do `nltk`, explicitando a linguagem do _corpus_ para português através do argumento `language="portuguese"`, e divide o _corpus_ em _tokens_, ou seja, palavras, números, pontuação, etc.; -* Olha para esta lista de tokens e filtra as não-palavras; -* Cria uma lista contendo os comprimentos de cada token de palavra restante; -* Cria um objeto de _distribuição de frequência_ a partir dessa lista de comprimentos de palavra, basicamente contando quantas palavras de uma letra, palavras de duas letras, etc., existem no _corpus_ do autor, e em seguida realiza a normalização dessa distribuição, ou seja, ajusta todos os valores em um intervalo entre 0 e 1. Esta etapa é realizada para comparar gráficos de distribuição em _corpus_ de tamanhos diferentes de forma mais clara; -* Plota um gráfico da distribuição de comprimentos de palavras no corpus, para todas as palavras de até 15 caracteres. - -Os resultados que obtemos são os seguintes: -{% include figure.html filename="introducao-estilometria-python-01.jpeg" caption="Imagem 1: Comparação da curva de Mendenhall para cada corpus." %} - -Como podemos ver pelos gráficos, é possível notar diferenças (embora sutis) entre todas as 5 curvas características de cada autor (linha superior de gráficos). Ao compararmos os documentos de teste (linha inferior de gráficos) com os autores, podemos notar que a curva característica dos documentos de teste dos autores Assis, Castilho e Chagas se assemelham mais à curva dos seus respectivos autores que de qualquer outro, o que seriam inferências corretas. O documento de Alencar é o que mais diverge da curva característica do autor. Isso pode ocorrer pelo fato do documento de teste ser uma autobiografia do autor, enquanto os documentos de treino são duas obras de ficção, o que poderia influenciar no seu estilo de escrita. Veremos nas próximas abordagens se conseguimos contornar esta situação. O documento de Castelo Branco também parece não ter se assemelhado à curva característica do autor. - -Para além desta análise meramente visual (que pode muitas vezes induzir ao erro), podemos ter um resultado quantitativo calculando a soma das distâncias entre os valores (normalizados) de frequência de cada documento de teste com os valores de frequência do *corpus* de cada possível autor. Por consequência, o autor que possuir a menor distância de frequência com o documento de teste seria o mais provável autor deste documento. Podemos implementar isso da seguinte forma: - -```python -# Dividir a lista de corpus entre autores e obras destacadas -autores = list(obras.keys())[:5] -obras_destacadas = list(obras.keys())[5:] - -obras_distribuicao_comprimento_normalizado = {} - -# Normalizar a distribuição de comprimentos de tokens em um novo dicionário -for index, obra in obras_distribuicao_comprimento.items(): - obras_distribuicao_comprimento_normalizado[index] = {k: - v/max(obra.values()) - for k, v in dict(obra).items()} - -# Calcular a soma da diferença da distribuição entre o documento de teste e cada autor (de 1 até 15 caracteres) -for obra in obras_destacadas: - for autor in autores: - soma_diferenca = 0 - for i in range(1,16): - diferenca = abs(obras_distribuicao_comprimento_normalizado[obra][i] - - obras_distribuicao_comprimento_normalizado[autor][i]) - soma_diferenca = soma_diferenca + diferenca - print('A soma da diferença do documento ' + - obra + - ' para o autor ' + - autor + - ' é ' + - str(soma_diferenca)) - print('\n') -``` - -O resultado deste trecho serão 5 blocos, cada um comparando um documento com os 5 possíveis autores. Abaixo o exemplo de como o primeiro bloco deve parecer: - -``` -A soma da diferença do documento Assis (teste) para o autor Assis é 0.25782806530977137 -A soma da diferença do documento Assis (teste) para o autor Alencar é 0.5192643726222002 -A soma da diferença do documento Assis (teste) para o autor Castelo Branco é 0.7410205025846326 -A soma da diferença do documento Assis (teste) para o autor Castilho é 0.46876355973646266 -A soma da diferença do documento Assis (teste) para o autor Chagas é 0.3466043230715998 -``` - -Vamos colocar os resultados dos 5 testes em uma [matriz de confusão](https://perma.cc/K42B-NQSR) (limitando a 4 casas decimais) para avaliarmos: - -| | Assis | Alencar | Castelo Branco | Castilho | Chagas | -| --------- | --------- | --------- | --------- | --------- | --------- | -| **Assis (teste)** | **0.2578** | 0.5192 | 0.7410 | 0.4687 | 0.3466 | -| **Alencar (teste)** | 0.9744 | **0.9844** | 0.4313 | 0.6979 | 0.7897 | -| **Castelo Branco (teste)** | 0.2812 | 0.4436 | **0.4761** | 0.2772 | 0.2803 | -| **Castilho (teste)** | 0.4396 | 0.4624 | 0.4114 | **0.1394** | 0.3184 | -| **Chagas (teste)** | 0.7746 | 0.5883 | 0.6636 | 0.6732 | **0.5888** | - -Os documentos de teste de Assis e Castilho possuem menor valor com seus respectivos autores, o que indica a maior proximidade. Isso é condizente com a similaridade dos gráficos que vimos anteriormente. O documento de teste de Chagas teve um "empate técnico" entre o estilo do próprio autor (0.5888) e Alencar (0.5883). Tanto os documentos de teste de Alencar quanto Castelo Branco ficaram com o maior valor em relação aos seus respectivos autores, logo a técnica não foi eficaz para estes dois autores. - -Se não tivéssemos informações adicionais para trabalharmos, poderíamos inferir corretamente 50% da atribuição de autoria (2 acertos, 2 erros e um "empate"), o que é um resultado considerável para uma técnica relativamente simples. Felizmente, a ciência estilométrica avançou muito desde a época de Mendenhall. - -# Segundo teste estilométrico: método qui-quadrado de Kilgariff - -Em um artigo de 2001, Adam Kilgarriff[^9] recomenda o uso da estatística qui-quadrado para determinar a autoria. Leitores familiarizados com métodos estatísticos podem se lembrar que o qui-quadrado às vezes é usado para testar se um conjunto de observações (digamos, as intenções dos eleitores conforme declarado em uma pesquisa) segue uma certa [distribuição de probabilidade](https://perma.cc/668N-9GPD) ou padrão. Não é isso que buscamos aqui. Em vez disso, simplesmente usaremos a estatística para medir a "distância" entre os vocabulários empregados em dois conjuntos de textos. Quanto mais semelhantes os vocabulários, mais provável é que o mesmo autor tenha escrito os textos em ambos os conjuntos. Isso pressupõe que o vocabulário de uma pessoa e os padrões de uso das palavras são relativamente constantes. - -Veja como aplicar a estatística para atribuição de autoria: - -* Pegue os corpora associados a dois autores; -* Junte-os em um único corpus, maior; -* Conte os tokens para cada uma das palavras que podem ser encontradas neste corpus maior; -* Selecione as [`n`](https://perma.cc/D9ND-3C83) palavras mais comuns no corpus maior; -* Calcule quantos tokens dessas `n` palavras mais comuns esperaríamos encontrar em cada um dos dois corpora originais se fossem do mesmo autor. Isso significa simplesmente dividir o número de tokens que observamos no corpus combinado em dois valores, com base nos tamanhos relativos das contribuições dos dois autores para o corpus comum; -* Calcule uma distância qui-quadrada somando, sobre as `n` palavras mais comuns, os _quadrados das diferenças entre os números reais de tokens encontrados no corpus de cada autor e os números esperados_, divididos pelos números esperados; A Figura 2 mostra a equação para a estatística qui-quadrado, onde C(i) representa o número observado de tokens para o recurso 'i' e E(i), o número esperado para esse recurso. - -{% include figure.html filename="stylometry-python-6.jpg" caption="Imagem 2: Equação para a estatística qui-quadrado." %} - -Quanto menor o valor do qui-quadrado, mais semelhantes são os dois corpora. Portanto, calcularemos o qui-quadrado de cada documento de teste com os 5 possíveis autores: os menores valores representarão a possível autoria de cada documento (assim como vimos no primeiro exemplo). - -Nota: Independentemente do método estilométrico que usamos, a escolha de `n`, o número de palavras a levar em consideração, é uma espécie de arte sombria. Na literatura pesquisada por Stamatatos[^2], pesquisadores sugeriram entre 100 e 1.000 das palavras mais comuns; um projeto chegou a usar cada palavra que aparecia no corpus pelo menos duas vezes. Como diretriz, quanto maior o corpus, maior o número de palavras que podem ser usadas como elementos sem correr o risco de dar importância indevida a uma palavra que ocorra apenas algumas vezes. Nesta lição, usaremos um `n` relativamente grande para o método qui-quadrado e um menor para o próximo método. Mudar o valor de `n` certamente mudará um pouco os resultados numéricos; no entanto, se uma pequena modificação de `n` causar uma mudança na atribuição de autoria, isso é um sinal de que o teste que você está realizando não é capaz de fornecer evidências significativas sobre o seu caso de teste. - -O seguinte trecho de código implementa o método de Kilgariff, com as frequências das 500 palavras mais comuns no corpus conjunto sendo usadas no cálculo: - -```python -# Converter os tokens para caracteres minúsculos para que a mesma palavra, -# maiúscula ou não, conte como uma palavra - -for autor in autores: - obras_tokens[autor] = ( - [token.lower() for token in obras_tokens[autor]]) - -# Calcular o qui-quadrado de cada documento de teste com cada um dos 5 autores -for obra in obras_destacadas: - for autor in autores: - - # Primeiro, construir um corpus conjunto e identificar - # as 500 palavras mais frequentes nele - corpus_conjunto= (obras_tokens[obra] + - obras_tokens[autor]) - freq_dist_conjunto = nltk.FreqDist(corpus_conjunto) - termos_comuns = list(freq_dist_conjunto.most_common(500)) - - # Que proporção do corpus conjunto é constituído pelos - # tokens do autor candidato? - autor_compartihado = (len(obras_tokens[autor]) - / len(corpus_conjunto)) - - # Agora, vamos observar as 500 palavras mais frequentes no corpus do candidato - # e comparar o número de vezes que elas podem ser observadas - # ao que seria esperado se os artigos do autor e o documento de teste - # fossem ambas amostras aleatórias do mesmo conjunto. - quiquadrado = 0 - for word,count_conjunto in termos_comuns: - - # Com que frequência vemos essa palavra comum? - autor_count = obras_tokens[autor].count(word) - obra_count = obras_tokens[obra].count(word) - - # Com que frequência deveríamos vê-la? - autor_count_esperado = count_conjunto * autor_compartihado - teste_count_esperado = count_conjunto * (1-autor_compartihado) - - # Adicionar a contribuição da palavra para a estatística qui-quadrado - quiquadrado += ((autor_count-autor_count_esperado) * - (autor_count-autor_count_esperado) / - autor_count_esperado) - - quiquadrado += ((obra_count-teste_count_esperado) * - (obra_count-teste_count_esperado) - / teste_count_esperado) - - print("A estatística de qui-quadrado do documento", - obra, - "para o candidato", - autor, - "é =", - quiquadrado) - print("\n") -``` - -Assim como no primeiro exemplo, o resultado será 5 blocos de resultados, cada um para um documento de teste. O primeiro bloco se parecerá com isso: -``` -A estatística de qui-quadrado do documento Assis (teste) para o candidato Assis é = 12266.387624251674 -A estatística de qui-quadrado do documento Assis (teste) para o candidato Alencar é = 13832.008019914058 -A estatística de qui-quadrado do documento Assis (teste) para o candidato Castelo Branco é = 15659.980573183348 -A estatística de qui-quadrado do documento Assis (teste) para o candidato Castilho é = 19458.24314684532 -A estatística de qui-quadrado do documento Assis (teste) para o candidato Chagas é = 13681.732446564287 -``` - -
    -No código acima, convertemos os tokens em minúsculas para não contar os tokens de palavras que começam com uma letra maiúscula porque aparecem no início de uma frase e os tokens minúsculos da mesma palavra como duas palavras diferentes. Às vezes, isso pode causar alguns erros, por exemplo, quando um substantivo próprio e um substantivo comum são escritos da mesma forma, exceto para maiúsculas, mas geralmente esta técnica aumenta a precisão. -
    - -Agora, vamos dar uma olhada na matriz de confusão dos resultados para esta técnica: - -| | Assis | Alencar | Castelo Branco | Castilho | Chagas | -| --------- | --------- | --------- | --------- | --------- | --------- | -| **Assis (teste)** | **12266** | 13832| 15659 | 19458 | 13681 | -| **Alencar (teste)** | 2550 | **3153** | 2581 | 2663 | 2765 | -| **Castelo Branco (teste)** | 17294 | 12063 | **11187** | 18133 | 13954 | -| **Castilho (teste)** | 11349 | 9203 | 8925 | **4531** | 7548 | -| **Chagas (teste)** | 6683 | 5700 | 5836 | 6970 | **5332** | - -Como podemos observar, o teste de qui-quadrado obteve um resultado superior à curva característica de composição de Mendenhall. Assis e Castilho permanecem com a inferência correta de autoria. Chagas, que passou pelo "empate técnico" na curva de composição, com o qui-quadrado também faz a inferência correta com uma distância considerável entre os demais possíveis autores. Dos autores que não haviam sido avaliados corretamente na curva de composição, Castelo Branco possui o menor valor de qui-quadrado, outra inferência correta. Alencar, no entanto, segue como o maior valor entre os 5 possíveis autores. De qualquer forma, já passamos de 50% de acerto com a curva característica de composição para 80% com o método qui-quadrado! - -No entanto, o qui-quadrado ainda é um método pouco refinado. Por um lado, palavras que aparecem com muita frequência tendem a ter um peso desproporcional no cálculo final. Às vezes, isso é bom; outras vezes, diferenças sutis de estilo representadas pelas maneiras como os autores usam palavras mais incomuns passarão despercebidas. - -## Uma nota sobre classes gramaticais - -Em alguns casos e idiomas, pode ser útil aplicar a marcação de [Part-of-speech (classes gramaticais)](https://perma.cc/ER5P-CFQE) aos tokens de palavras antes de contá-los, de modo que a mesma palavra usada como duas classes gramaticais diferentes possa contar como dois elementos diferentes (por exemplo, o termo "mais" sendo usado como substantivo ou como advérbio de intensidade). Esta lição não usa marcação de classes gramaticais, mas poderia refinar os resultados em estudos de caso mais complexos. - -Se você precisar aplicar a marcação de classe gramatical aos seus próprios dados, poderá fazer o download de marcadores para outros idiomas, para trabalhar com uma ferramenta de terceiros como [Tree Tagger](https://perma.cc/DG9G-S5T2), ou mesmo para treinar o seu próprio marcador, mas essas técnicas estão muito além do escopo da lição atual. - -# Terceiro teste estilométrico: método Delta de John Burrows (avançado) - -Os primeiros dois métodos estilométricos foram mais fáceis de implementar. Este próximo, baseado na estatística *Delta* de John Burrows[^10], é consideravelmente mais complexo, tanto conceitualmente (a matemática é mais complicada) quanto computacionalmente (mais código necessário). É, no entanto, um dos métodos estilométricos mais proeminentes em uso hoje. - -Assim como o qui-quadrado de Kilgariff, o método Delta de Burrows é uma medida da "distância" entre um texto cuja autoria queremos averiguar e algum outro corpus. Ao contrário do qui-quadrado, no entanto, o método Delta é projetado para comparar um texto anônimo (ou conjunto de textos) com as assinaturas de vários autores diferentes ao mesmo tempo. Mais precisamente, o método Delta mede como o texto anônimo *e conjuntos de textos escritos por um número arbitrário de autores conhecidos* divergem da média de todos eles juntos. Além disso, o método Delta atribui peso igual a todas as características que mede, evitando assim o problema de palavras comuns sobrecarregarem os resultados, o que era um problema com os testes de qui-quadrado. Por todas essas razões, o método Delta de John Burrows é geralmente uma solução mais eficaz para a questão da autoria. - -O algoritmo original de Burrows pode ser resumido da seguinte forma: - -* Reúna um grande corpus composto por textos escritos por um número arbitrário de autores; digamos que o número de autores seja `x`; -* Encontre as `n` palavras mais frequentes no corpus para usar como elementos; -* Para cada uma dessas `n` características, calcule a participação de cada subcorpora dos `x` autores, como uma porcentagem do número total de palavras. Por exemplo, a palavra "ele" pode representar 4,72% das palavras no subcorpus do Autor A; -* Em seguida, calcule a média e o desvio padrão desses `x` valores e use-os como a média oficial e o desvio padrão para esse elemento em todo o corpus. Em outras palavras, estaremos usando uma _média de médias_ em vez de calcular um único valor que represente a parcela de todo o corpus dado por cada palavra. Fazemos isso porque queremos evitar que um subcorpus maior tenha maior influência nos resultados a seu favor e defina a norma do corpus de tal forma que se espere que tudo se pareça com ele; -* Para cada um dos `n` elementos e `x` subcorpora, calcule um [`z-score`](https://perma.cc/S2RH-LF9K) descrevendo o quão distante da norma do corpus está o uso desse elemento particular neste subcorpus específico. Para fazer isso, subtraia a "média das médias" de um dado elemento da frequência com que ela é encontrada no subcorpus e divida o resultado pelo seu desvio padrão. A Figura 3 mostra a equação de z-score para o elemento 'i', onde C(i) representa a frequência observada, a letra grega mu representa a média das médias e a letra grega sigma, o desvio padrão; - -{% include figure.html filename="stylometry-python-7.jpg" caption="Imagem 3: Equação para a estatística de z-score." %} - -* Em seguida, calcule os mesmos `z-scores` para cada elemento no texto para o qual queremos determinar a autoria; -* Finalmente, calcule um *score delta* comparando o documento de teste com o subcorpus de cada candidato. Para fazer isso, tome a *média dos valores absolutos das diferenças entre os `z-scores` para cada elemento entre o documento de teste e o subcorpus do candidato*. (leia duas vezes!) Isso dá peso igual a cada elemento, não importa a frequência com que as palavras ocorram nos textos; caso contrário, os 3 ou 4 principais elementos sobrecarregariam todo o resto. A Figura 4 mostra a equação para Delta, onde Z(c,i) é o `z-score` para o elemento 'i' no candidato 'c', e Z(t,i) é o `z-score` para o elemento 'i' no caso de teste; - -{% include figure.html filename="stylometry-python-8.jpg" caption="Imagem 4: Equação para a estatística Delta de John Burrows." %} - -* O candidato "vencedor", assim como nas duas outras técnicas que aplicamos, é o autor para o qual a pontuação delta entre o subcorpus do autor e o documento de teste é a mais baixa. - -Stefan Evert _et al_.[^11] fornece uma discussão aprofundada das variantes, refinamentos e complexidades do método, mas nos ateremos ao essencial para os propósitos desta lição. Uma explicação diferente de Delta, escrita em espanhol, e uma aplicação a um corpus de romances espanhóis também podem ser encontradas em um artigo recente de José Calvo Tello.[^12] - -## Seleção de elementos - -Vamos combinar todos os subcorpora em um único corpus para Delta calcular um "padrão" para trabalhar. Então, vamos selecionar um número de palavras para usar como característica. Lembre-se de que usamos 500 palavras para calcular o qui-quadrado de Kilgariff; desta vez, usaremos um conjunto menor de 30 palavras (a maioria, senão todas, palavras funcionais e verbos comuns) como nossos elementos. - -```python -# Combinar todos os corpora, exceto os documentos de teste, em um único corpus -corpus_completo = [] -for autor in autores: - corpus_completo += obras_tokens[autor] - -# Obter uma distribuição de frequência -freq_dist_corpus_completo = list(nltk.FreqDist(corpus_completo).most_common(30)) -freq_dist_corpus_completo[ :10 ] -``` - -Uma amostra das palavras mais frequentes e suas respectivas ocorrências parece com o seguinte: - -``` -[('a', 17619), - ('que', 17345), - ('de', 17033), - ('e', 15449), - ('o', 14283), - ('não', 7086), - ('do', 6019), - ('da', 5647), - ('os', 5299), - ('um', 4873)] -``` - -## Calculando elementos para cada subcorpus - -Vejamos as frequências de cada característica no subcorpus de cada candidato, como uma proporção do número total de tokens no subcorpus. Vamos calcular esses valores e armazená-los em um dicionário de dicionários, uma maneira conveniente de construir um [array bidimensional](https://perma.cc/HR9K-24MG) em Python. - -```python -# Criar uma lista com os elementos e a estrutura principal de dados -features = [word for word,freq in freq_dist_corpus_completo] -feature_freqs = {} - -for autor in autores: - # Criar um dicionário para os elementos de cada candidato - feature_freqs[autor] = {} - - # Obter um valor auxiliar contendo o número de tokens no subcorpus do autor - geral = len(obras_tokens[autor]) - - # Calcular a presença de cada elemento no subcorpus - for feature in features: - presenca = obras_tokens[autor].count(feature) - feature_freqs[autor][feature] = presenca / geral -``` - -## Calculando médias de elementos e desvios-padrão - -Dadas as frequências de elementos para todos os subcorpora que acabamos de calcular, podemos encontrar uma "média das médias" e um desvio padrão para cada elemento. Armazenaremos esses valores em outro "dicionário de dicionários". - -```python -import math - -# A estrutura de dados na qual iremos armazenar -# as "estatísticas padrão do corpus" -corpus_features = {} - -# Para cada elemento... -for feature in features: - # Criar um subdicionário que conterá a média e o desvio padrão do elemento - corpus_features[feature] = {} - - # Calcular a média das frequências expressas no subcorpora - feature_average = 0 - for autor in autores: - feature_average += feature_freqs[autor][feature] - feature_average /= len(autores) - corpus_features[feature]["Mean"] = feature_average - - # Calcular o desvio padrão usando a fórmula básica para uma amostra - feature_stdev = 0 - for autor in autores: - diff = feature_freqs[autor][feature] - corpus_features[feature]["Mean"] - feature_stdev += diff * diff - feature_stdev /= (len(autores) - 1) - feature_stdev = math.sqrt(feature_stdev) - corpus_features[feature]["StdDev"] = feature_stdev -``` - -## Calculando z-scores - -Em seguida, transformamos as frequências de características observadas no subcorpora dos cinco candidatos em `z-scores`, descrevendo o quão distante da "estatística padrão do corpus" essas observações estão. Nada extravagante aqui: nós meramente aplicamos a definição do `z-score` para cada elemento e armazenamos os resultados em outro array bidimensional. - -```python -feature_zscores = {} - -for autor in autores: - feature_zscores[autor] = {} - - for feature in features: - # Definição do z-score = (value - mean) / stddev - # Usamos variáveis intermediárias para tornar o - # código mais fácil de ler - feature_val = feature_freqs[autor][feature] - feature_mean = corpus_features[feature]["Mean"] - feature_stdev = corpus_features[feature]["StdDev"] - feature_zscores[autor][feature] = ((feature_val-feature_mean) / - feature_stdev) -``` - -## Calculando elementos, z-scores e Delta para nosso caso de teste - -Em seguida, precisamos comparar os documentos de teste com o corpus. O seguinte trecho de código, que essencialmente recapitula tudo o que fizemos até agora, conta as frequências de cada um de nossos 30 elementos nos documentos de teste e calcula os `z-scores` de acordo. -Por fim, usamos a fórmula para Delta definida por Burrows para extrair uma única pontuação comparando cada documento de teste com cada um dos cinco "autores candidatos". Lembre-se: quanto menor a pontuação Delta, mais semelhante a assinatura estilométrica do documento à do candidato. - -```python -for obra in obras_destacadas: - # Tokenizar o documento de teste - testcase_tokens = nltk.word_tokenize(obras[obra]) - - # Filtrar a pontuação e colocar os tokens em minúsculas - testcase_tokens = [token.lower() for token in testcase_tokens - if any(c.isalpha() for c in token)] - - # Calcular as frequências dos elementos do documento de teste - geral = len(testcase_tokens) - testcase_freqs = {} - for feature in features: - presenca = testcase_tokens.count(feature) - testcase_freqs[feature] = presenca / geral - - # Calcular os z-scores dos elementos do documento de teste - testcase_zscores = {} - for feature in features: - feature_val = testcase_freqs[feature] - feature_mean = corpus_features[feature]["Mean"] - feature_stdev = corpus_features[feature]["StdDev"] - testcase_zscores[feature] = (feature_val - feature_mean) / feature_stdev - - # Calcular Delta para cada autor - for autor in autores: - delta = 0 - for feature in features: - delta += math.fabs((testcase_zscores[feature] - - feature_zscores[autor][feature])) - delta /= len(features) - print( "Delta score do documento", - obra, - "para o candidato", - autor, - "é =", - delta ) - print("\n") -``` - -Como nas outras duas técnicas, o resultado serão 5 blocos de código dando o valor de Delta de cada documento para cada suposto autor. O primeiro bloco se parecerá com isso: - -``` -Delta score do documento Assis (teste) para o candidato Assis é = 0.8715781237572774 -Delta score do documento Assis (teste) para o candidato Alencar é = 1.2624531605759595 -Delta score do documento Assis (teste) para o candidato Castelo Branco é = 1.2303968803032856 -Delta score do documento Assis (teste) para o candidato Castilho é = 1.6276770882853728 -Delta score do documento Assis (teste) para o candidato Chagas é = 1.0527125070730734 -``` - -Vamos avaliar todos os valores Delta na nossa matriz de confusão (reduzidos para 4 casas decimais): - -| | Assis | Alencar | Castelo Branco | Castilho | Chagas | -| --------- | --------- | --------- | --------- | --------- | --------- | -| **Assis (teste)** | **0.8715** | 1.2624 | 1.2303 | 1.6276 | 1.0527 | -| **Alencar (teste)** | 1.9762 | **1.3355** | 1.3878 | 1.6425 | 1.5042 | -| **Castelo Branco (teste)** | 1.004 | 1.3208 | **0.8182** | 1.5202 | 1.2829 | -| **Castilho (teste)** | 1.5705 | 1.2553 | 1.0970 | **0.4518** | 0.8176 | -| **Chagas (teste)** | 1.1444 | 1.0169 | 0.9462 | 0.9864 | **0.7756** | - -Com o método Delta, pudemos inferir corretamente 100% da autoria dos documentos de teste! Alencar, que teve o pior valor nas duas outras técnicas, aqui aparece com o menor valor entre os 5 candidatos. -Ao utilizarmos autores brasileiros e portugueses, tínhamos em mente também a possibilidade de que a comparação entre ficheiros de autores de uma mesma nacionalidade pudessem ter valores mais próximos que entre autores de nacionalidades distintas, em função de particularidades linguísticas, o que parece que não foi o caso aqui. Por se tratarem de obras do século XIX, poderíamos buscar explicações para isso na maior similaridade das línguas na época, na influência da Academia Portuguesa no Brasil, ou mesmo do letramento e influências dos autores. Uma segunda análise com obras mais contemporâneas seria um excelente segundo passo para esta análise, e fica como sugestão para o leitor. - -# Leituras adicionais e recursos - -## Estudos de caso interessantes - -Estilometria e/ou atribuição de autoria têm sido utilizadas em diversos contextos, empregando diversas técnicas. Aqui estão alguns estudos de caso interessantes: - -* Javier de la Rosa e Juan Luis Suárez procuram o autor de um famoso romance espanhol do século XVI entre uma lista considerável de candidatos. [^13] -* Maria Slautina e Mikhail Marusenko usam o reconhecimento de padrões em um conjunto de recursos sintáticos, gramaticais e lexicais, desde a contagem de palavras simples (com marcação de classe gramatical) a vários tipos de frases, a fim de estabelecer semelhanças estilísticas entre os textos medievais.[^14] -* Ellen Jordan, Hugh Craig e Alexis Antonia examinam o caso de periódicos britânicos do século XIX, nos quais os artigos geralmente não eram assinados, para determinar o autor de quatro resenhas de trabalhos de ou sobre as irmãs Brontë.[^15] Este estudo de caso aplica uma versão inicial de outro método desenvolvido por John Burrows, o método Zeta, que se concentra nas palavras favoritas de um autor em vez de palavras de função comum.[^16] -* Valérie Beaudoin e François Yvon analisaram 58 peças em verso dos dramaturgos franceses Corneille, Racine e Molière, descobrindo que as duas primeiras foram muito mais consistentes na maneira como estruturaram sua escrita do que as últimas.[^17] -* Marcelo Luiz Brocardo, Issa Traore, Sherif Saad e Isaac Woungang aplicam [aprendizagem supervisionada](https://perma.cc/7TAQ-JECD) e [modelos n-gram](https://perma.cc/X34K-5R9X) para determinar a autoria de mensagens curtas com um grande número de autores em potencial, como e-mails e tweets.[^18] -* Moshe Koppel e Winter Yaron propõem o "método do impostor", que tenta determinar se dois textos foram escritos pelo mesmo autor, inserindo-os em um conjunto de textos escritos por falsos candidatos.[^19] Justin Anthony Stover _et al._ recentemente aplicou a técnica para determinar a autoria de um manuscrito do século II recém-descoberto.[^20] -* Finalmente, uma equipe liderada por David I. Holmes estudou o caso peculiar de documentos escritos por um soldado da Guerra Civil ou por sua viúva que pode ter copiado intencionalmente seu estilo de escrita.[^21] - -## Referências adicionais sobre autoria e estilometria - -A referência mais exaustiva em todos os assuntos relacionados à atribuição de autoria, incluindo a história do campo, seus fundamentos matemáticos e linguísticos e seus vários métodos, foi escrita por Patrick Juola em 2007.[^22] O Capítulo 7, em particular, mostra como a atribuição de autoria pode servir como um marcador para várias identidades de grupo (gênero, nacionalidade, dialeto, etc.), para mudanças na linguagem ao longo do tempo, e até mesmo para personalidade e saúde mental. - -Uma pesquisa mais curta pode ser encontrada em Moshe Koppel _et al._, que discute casos em que há um único autor candidato cuja autoria deve ser confirmada, um grande número de candidatos para os quais apenas pequenas amostras de escrita estão disponíveis para treinar um algoritmo de aprendizado de máquina, ou nenhum candidato conhecido.[^23] - -O artigo de Stamatatos citado anteriormente[^2] também contém uma pesquisa qualitativa do campo. - -## Varia - -*Programming historians* que desejam explorar mais a estilometria podem fazer o download do pacote [Stylo](https://cran.r-project.org/web/packages/stylo/index.html),[^24] que se tornou um padrão _de facto_. Entre outras coisas, o pacote Stylo fornece uma implementação do método Delta, funcionalidade de extração de recursos e interfaces gráficas de usuário convenientes tanto para manipulação de dados quanto para produção de resultados visualmente atraentes. Observe que o Stylo é escrito em [R](https://www.r-project.org/), o que significa que você precisará do R instalado no seu computador para executá-lo, mas entre a interface gráfica do usuário e os tutoriais, pouco ou nenhum conhecimento prévio de programação R deve ser necessário. - -Leitores fluentes em francês interessados em explorar as implicações [epistemológicas](https://perma.cc/6DFE-QTWV) das interações entre métodos quantitativos e qualitativos na análise do estilo de escrita devem ler Clémence Jacquot.[^25] - -Surpreendentemente, os dados obtidos por meio de [reconhecimento ótico de caracteres](https://perma.cc/R9U6-TRGE) (OCR) se mostraram adequados para fins de atribuição de autoria, mesmo quando os dados sofrem de altas taxas de erro de OCR.[^26] - -Por fim, existe um [grupo Zotero](https://www.zotero.org/groups/643516/stylometry_bibliography/items) dedicado à estilometria, onde você pode encontrar muitas outras referências a métodos e estudos. - -# Agradecimentos - -Agradecimentos a Stéfan Sinclair e Andrew Piper, em cujos seminários na Universidade McGill este projeto começou. Também agradeço à minha orientadora de tese, Susan Dalton, cuja orientação é sempre inestimável. - -# Notas finais - -[^1]: Veja, por exemplo, Justin Rice, ["What Makes Hemingway Hemingway? A statistical analysis of the data behind Hemingway's style"](https://perma.cc/W8TR-UH6S) - -[^2]: Efstathios Stamatatos, “A Survey of Modern Authorship Attribution Method,” _Journal of the American Society for Information Science and Technology_, vol. 60, no. 3 (December 2008), p. 538–56, citation on p. 540, [https://doi.org/10.1002/asi.21001](https://doi.org/10.1002/asi.21001). - -[^3]: Jan Rybicki, “Vive La Différence: Tracing the (Authorial) Gender Signal by Multivariate Analysis of Word Frequencies,” _Digital Scholarship in the Humanities_, vol. 31, no. 4 (December 2016), pp. 746–61, [https://doi.org/10.1093/llc/fqv023](https://doi.org/10.1093/llc/fqv023). Sean G. Weidman e James O’Sullivan, “The Limits of Distinctive Words: Re-Evaluating Literature’s Gender Marker Debate,” _Digital Scholarship in the Humanities_, 2017, [https://doi.org/10.1093/llc/fqx017](https://doi.org/10.1093/llc/fqx017). - -[^4]: Ted Underwood, David Bamman, e Sabrina Lee, “The Transformation of Gender in English-Language Fiction”, _Cultural Analytics_, Feb. 13, 2018, [https://doi.org/10.22148/16.019](https://doi.org/10.22148/16.019). - -[^5]: Sven Meyer zu Eissen e Benno Stein, “Intrinsic Plagiarism Detection,” in _ECIR 2006_, edited by Mounia Lalmas, Andy MacFarlane, Stefan Rüger, Anastasios Tombros, Theodora Tsikrika, e Alexei Yavlinsky, Berlin, Heidelberg: Springer, 2006, pp. 565–69, [https://doi.org/10.1007/11735106_66](https://doi.org/10.1007/11735106_66). - -[^6]: Cynthia Whissell, “Traditional and Emotional Stylometric Analysis of the Songs of Beatles Paul McCartney and John Lennon,” _Computers and the Humanities_, vol. 30, no. 3 (1996), pp. 257–65. - -[^7]: Douglass Adair, "The Authorship of the Disputed Federalist Papers", _The William and Mary Quarterly_, vol. 1, no. 2 (April 1944), pp. 97-122. - -[^8]: T. C. Mendenhall, "The Characteristic Curves of Composition", _Science_, vol. 9, no. 214 (Mar. 11, 1887), pp. 237-249. - -[^9]: Adam Kilgarriff, "Comparing Corpora", _International Journal of Corpus Linguistics_, vol. 6, no. 1 (2001), pp. 97-133. - -[^10]: John Burrows, "'Delta': a Measure of Stylistic Difference and a Guide to Likely Authorship", _Literary and Linguistic Computing_, vol. 17, no. 3 (2002), pp. 267-287. - -[^11]: Stefan Evert et al., "Understanding and explaining Delta measures for authorship attribution", _Digital Scholarship in the Humanities_, vol. 32, no. suppl_2 (2017), pp. ii4-ii16. - -[^12]: José Calvo Tello, “Entendiendo Delta desde las Humanidades,” [_Caracteres_, vol.5, no.1 (May 27 2016)](https://perma.cc/LNF3-QP8V), pp.140-176. - -[^13]: Javier de la Rosa and Juan Luis Suárez, “The Life of Lazarillo de Tormes and of His Machine Learning Adversities,” _Lemir_, vol. 20 (2016), pp. 373-438. - -[^14]: Maria Slautina e Mikhaïl Marusenko, “L’émergence du style, The emergence of style,” _Les Cahiers du numérique_, vol. 10, no. 4 (November 2014), pp. 179–215, [https://doi.org/10.3166/LCN.10.4.179-215](https://doi.org/10.3166/LCN.10.4.179-215). - -[^15]: Ellen Jordan, Hugh Craig, e Alexis Antonia, “The Brontë Sisters and the ‘Christian Remembrancer’: A Pilot Study in the Use of the ‘Burrows Method’ to Identify the Authorship of Unsigned Articles in the Nineteenth-Century Periodical Press,” _Victorian Periodicals Review_, vol. 39, no. 1 (2006), pp. 21–45. - -[^16]: John Burrows, “All the Way Through: Testing for Authorship in Different Frequency Strata,” _Literary and Linguistic Computing_, vol. 22, no. 1 (April 2007), pp. 27–47, [https://doi.org/10.1093/llc/fqi067](https://doi.org/10.1093/llc/fqi067). - -[^17]: Valérie Beaudoin e François Yvon, “Contribution de La Métrique à La Stylométrie,” _JADT 2004: 7e Journées internationales d'Analyse statistique des Données Textuelles_, vol. 1, Louvain La Neuve, Presses Universitaires de Louvain, 2004, pp. 107–18. - -[^18]: Marcelo Luiz Brocardo, Issa Traore, Sherif Saad e Isaac Woungang, “Authorship Verification for Short Messages Using Stylometry,” _2013 International Conference on Computer, Information and Telecommunication Systems (CITS)_, 2013, [https://doi.org/10.1109/CITS.2013.6705711](https://doi.org/10.1109/CITS.2013.6705711). - -[^19]: Moshe Koppel e Winter Yaron, “Determining If Two Documents Are Written by the Same Author,” _Journal of the Association for Information Science and Technology_, vol. 65, no. 1 (October 2013), pp. 178–87, [https://doi.org/10.1002/asi.22954](https://doi.org/10.1002/asi.22954). - -[^20]: Justin Anthony Stover et al., "Computational authorship verification method attributes a new work to a major 2nd century African author", _Journal of the Association for Information Science and Technology_, vol. 67, no. 1 (2016), pp. 239–242. - -[^21]: David I. Holmes, Lesley J. Gordon, e Christine Wilson, "A widow and her soldier: Stylometry and the American Civil War", _Literary and Linguistic Computing_, vol. 16, no 4 (2001), pp. 403–420. - -[^22]: Patrick Juola, “Authorship Attribution,” _Foundations and Trends in Information Retrieval_, vol. 1, no. 3 (2007), pp. 233–334, [https://doi.org/10.1561/1500000005](https://doi.org/10.1561/1500000005). - -[^23]: Moshe Koppel, Jonathan Schler, e Shlomo Argamon, “Computational Methods in Authorship Attribution,” _Journal of the Association for Information Science and Technology_. vol. 60, no. 1 (January 2009), pp. 9–26, [https://doi.org/10.1002/asi.v60:1](https://doi.org/10.1002/asi.v60:1). - -[^24]: Maciej Eder, Jan Rybicki, e Mike Kestemont, “Stylometry with R: A Package for Computational Text Analysis,” _The R Journal_, vol. 8, no. 1 (2016), pp. 107–21. - -[^25]: Clémence Jacquot, “Rêve d'une épiphanie du style: visibilité et saillance en stylistique et en stylométrie,” _Revue d’Histoire Littéraire de la France_ , vol. 116, no. 3 (2016), pp. 619–39. - -[^26]: Patrick Juola, John Noecker Jr, e Michael Ryan, "Authorship Attribution and Optical Character Recognition Errors", _TAL_, vol. 53, no. 3 (2012), pp. 101–127. +--- +title: Introdução à estilometria com Python +layout: lesson +slug: introducao-estilometria-python +date: 2018-04-21 +translation_date: 2021-12-27 +authors: +- François Dominic Laramée +reviewers: +- Folgert Karsdorp +- Jan Rybicki +- Antonio Rojas Castro +editors: +- Adam Crymble +translator: +- Daniel Bonatto Seco +translation-editor: +- Jimmy Medeiros +translation-reviewer: +- Bruno Almeida +- Suemi HIguchi +difficulty: 2 +review-ticket: https://github.com/programminghistorian/ph-submissions/issues/445 +activity: analyzing +topics: [distant-reading, python] +abstract: "Nesta lição, aprenderá a realizar análises estilométricas e a determinar a autoria de textos. A lição cobre três métodos: Curvas Características de Composição de Mendenhall, Método Qui-Quadrado de Kilgariff e Método Delta de John Burrows." +original: introduction-to-stylometry-with-python +avatar_alt: Mulher a ler junto a uma pintura +doi: 10.46430/phpt0024 +--- + + +{% include toc.html %} + +# Introdução + +[Estilometria](https://perma.cc/NYH2-KWLA) é o estudo quantitativo do estilo literário por meio de métodos de [leitura distante](https://perma.cc/XK8J-F6ZF) computacional. É baseado na observação de que os autores tendem a escrever de maneiras relativamente consistentes, reconhecíveis e únicas. Por exemplo: + +* Cada pessoa tem seu próprio vocabulário único, às vezes rico, às vezes limitado. Embora um vocabulário mais amplo esteja geralmente associado à qualidade literária, nem sempre é esse o caso. Ernest Hemingway é famoso por usar um número surpreendentemente pequeno de palavras diferentes em sua escrita,[^1] o que não o impediu de ganhar o Prêmio Nobel de Literatura em 1954; +* Algumas pessoas escrevem frases curtas, enquanto outras preferem blocos longos de texto compostos por muitas frases; +* Não há duas pessoas que usem ponto-e-vírgulas, travessões e outras formas de pontuação exatamente da mesma maneira. + +As maneiras como os escritores usam pequenas [*function words*](https://perma.cc/284C-CNHD), como artigos, preposições e conjunções, mostram-se particularmente reveladoras. Em uma pesquisa dos métodos estilométricos históricos e atuais, Efstathios Stamatatos aponta que as palavras funcionais são "usadas de maneira amplamente inconsciente pelos autores e são independentes do tópico"[^2]. Para a análise estilométrica, isso é muito vantajoso, visto que esse padrão inconsciente tende a variar menos no [*corpus*](https://perma.cc/9XQ4-J4A5) de um autor do que seu vocabulário geral (e também é muito difícil para um pretenso falsificador copiar). As palavras funcionais também foram identificadas como marcadores importantes do gênero literário e da cronologia. + +Os pesquisadores têm usado a estilometria como uma ferramenta para estudar uma variedade de questões culturais. Por exemplo, uma quantidade considerável de pesquisas estudou as diferenças entre as maneiras como homens e mulheres escrevem[^3] ou sobre o que escrevem.[^4] Outros pesquisadores estudaram as maneiras como uma mudança repentina no estilo de escrita em um único texto pode indicar plágio[^5] e até mesmo a maneira como as letras dos músicos John Lennon e Paul McCartney se tornaram cada vez menos alegres e menos ativas à medida que os [Beatles](https://perma.cc/DQ66-M79T) se aproximavam do fim de sua carreira de gravação na década de 1960.[^6] + +No entanto, uma das aplicações mais comuns da estilometria é na atribuição de autoria. Dado um texto anônimo, às vezes é possível inferir quem o escreveu medindo certas características, como o número médio de palavras por frase ou a propensão do autor de usar "todavia" em vez de "no entanto", e comparando as medidas com outros textos escritos pelo suposto autor. Este é o objetivo deste tutorial, onde a partir de um conjunto de obras clássicas de romancistas lusos e brasileiros do século XIX iremos comparar exemplares de suas obras com o estilo literário do conjunto de autores a fim de tentar inferir suas respectivas autorias (nota de tradução: foi decidido mudar o _corpus_ usado nesta lição para um que fosse culturalmente mais relevante para o público que fala e escreve português; foi mantida a restante estrutura da lição original, com excepção de ligeiras adaptações face à mudança do _corpus_). + +## Objetivos de aprendizado + +No final desta lição, teremos percorrido os seguintes tópicos: + +* Como aplicar vários métodos estilométricos para inferir a autoria de um texto anônimo ou conjunto de textos; +* Como usar estruturas de dados relativamente avançadas, incluindo [dicionários](https://perma.cc/TTF4-SJ23) de [strings](https://perma.cc/7DCC-M9AT) e dicionários de dicionários, em [Python](https://perma.cc/Z82S-3L3M); +* O básico do [Natural Language Toolkit](https://perma.cc/E7LZ-WECZ) (NLTK), um módulo Python popular dedicado a [processamento de linguagem natural](https://perma.cc/MFX4-LAVZ). + +## Leitura prévia + +Se você não tem experiência com a linguagem de programação Python ou está tendo dificuldade nos exemplos apresentados neste tutorial, o autor recomenda que você leia as lições [Trabalhando com ficheiros de texto em Python](/pt/licoes/trabalhando-ficheiros-texto-python) e [Manipular Strings com Python](/pt/licoes/manipular-strings-python). Note que essas lições foram escritas em Python versão 2, enquanto esta usa Python versão 3. As diferenças de [sintaxe](https://perma.cc/E5LQ-S65P) entre as duas versões da linguagem podem ser sutis. Se você ficar em dúvida, siga os exemplos conforme descritos nesta lição e use as outras lições como material de apoio. (Este tutorial encontra-se atualizado até à versão [Python 3.8.5](https://perma.cc/XCT2-Q4AT); as [strings literais formatadas](https://perma.cc/U6Q6-59V3) na linha `with open(f'data/pg{filename}.txt', 'r', encoding='utf-8') as f:`, por exemplo, requerem Python 3.6 ou uma versão mais recente da linguagem.) + +## Materiais requeridos + +Este tutorial usa conjuntos de dados e software que você terá que baixar e instalar. + +### O conjunto de dados ### + +Para trabalhar nesta lição, você precisará baixar e descompactar o ficheiro [.zip](/assets/introduction-to-stylometry-with-python/dataset_estilometria.zip) contendo as 15 obras que compõem o *corpus* que será utilizado neste tutorial. As obras foram originalmente extraídas do [Projeto Gutenberg](https://perma.cc/8GTT-3M9N). Ao descompactar o ficheiro, será criada uma pasta com o nome `dados`. Este será o seu [diretório de trabalho](https://perma.cc/9KVS-T3A5) e todo o trabalho deve ser salvo aqui durante a execução da lição. + +### O software ### + +Esta lição usa as seguintes versões da linguagem Python e [bibliotecas](https://pt.wikipedia.org/wiki/Biblioteca_(computa%C3%A7%C3%A3o)): +* [Python 3.x](https://www.python.org/downloads/) - a última versão estável é recomendada; +* [nltk](https://www.nltk.org/) - Natural Language Toolkit, geralmente abreviado `nltk`; +* [matplotlib](https://matplotlib.org/) - visualização de dados e geração de gráficos; +* [re](https://docs.python.org/pt-br/3/library/re.html) - limpeza de dados via Regex (veremos durante o tutorial o porquê). + +Alguns desses módulos podem não estar pré-instalados em seu computador. Se você encontrar mensagens de erro como: "Módulo não encontrado" ou similares, você terá que baixar e instalar o(s) módulo(s) ausente(s). A forma mais simples de realizar esta tarefa é através do comando `pip`. Mais detalhes estão disponíveis através do tutorial do *Programming Historian* [Instalação de Módulos Python com pip](/pt/licoes/instalacao-modulos-python-pip). + +## Algumas notas sobre Independência Linguística + +Este tutorial aplica a análise estilométrica a um conjunto de textos em português (PT-PT e PT-BR) usando uma biblioteca Python chamada `nltk`. Muitas das funcionalidades fornecidas pelo `nltk` operam com outros idiomas. Contanto que um idioma forneça uma maneira clara de distinguir os limites de uma palavra, o `nltk` deve ter um bom desempenho. Idiomas como o chinês, para os quais não há distinção clara entre os limites das palavras, podem ser problemáticos. O autor original desta lição utilizou `nltk` com textos em francês sem nenhum problema; outros idiomas que usam [diacríticos](https://perma.cc/7VGD-5968), como espanhol e alemão, também devem funcionar bem com `nltk`. Consulte a [documentação do nltk](https://perma.cc/S4EX-2DBT) para obter detalhes. + +Apenas uma das tarefas neste tutorial requer código dependente do idioma. Para dividir um texto em um conjunto de palavras em uma língua diferente do inglês, você precisará especificar o idioma apropriado como um parâmetro para o [tokenizador](https://perma.cc/NGM5-4MED) da biblioteca `nltk`, que usa o inglês como padrão. Isso será explicado no tutorial. + +Por fim, observe que algumas tarefas linguísticas, como [*part-of-speech tagging*](https://perma.cc/L9SU-PS9D), podem não ser suportadas pelo `nltk` em outros idiomas além do inglês. Este tutorial não cobre a aplicação de *part-of-speech tagging*. Se você precisar para os seus próprios projetos, consulte a [documentação do nltk](https://perma.cc/S4EX-2DBT) para obter orientações. + +# O *corpus* - Contextualização + +No [exemplo original deste tutorial em inglês](/en/lessons/introduction-to-stylometry-with-python), utilizaram-se os [papéis federalistas](https://perma.cc/DW5V-MH5W) como um exemplo de aplicação de estilometria, utilizando as técnicas que serão apresentadas para inferir a autoria dos textos contestados dentro do conjunto de documentos que configura o *corpus*.[^7] +Como na língua portuguesa não temos um conjunto de textos que possua estas mesmas características, no exemplo que apresentaremos traremos um total de 15 obras completas de 5 autores diferentes, três deles portugueses e dois brasileiros, todos romancistas do século XIX, disponibilizadas pelo [Projeto Gutenberg](https://perma.cc/5PRR-TM3D). Utilizaremos duas obras de cada autor para definir seus respectivos estilos e uma terceira para constituir o conjunto de testes, para avaliarmos se as técnicas utilizadas realizarão a inferência correta de autoria através do grau de similaridade de cada obra deste conjunto com o estilo obtido de cada autor. + +Os autores e obras utilizadas são os seguintes: + +| Autor | Obra 1 | Obra 2 | Obra 3 | +| --------- | --------- | --------- | --------- | +| [Machado de **Assis**](https://perma.cc/6BMU-UKZL) (Brasil)| [Quincas Borba](https://www.gutenberg.org/ebooks/55682) (**55682**) | [Memorias Posthumas de Braz Cubas](https://www.gutenberg.org/ebooks/54829) (**54829**) | [Dom Casmurro](https://www.gutenberg.org/ebooks/55752) (**55752**) | +| [José de **Alencar**](https://perma.cc/Y3Y2-VHJ5) (Brasil) | [Ubirajara](https://www.gutenberg.org/ebooks/38496) (**38496**) | [Cinco minutos](https://www.gutenberg.org/ebooks/44540) (**44540**) | [Como e porque sou romancista](https://www.gutenberg.org/ebooks/29040) (**29040**) | +| [Camilo **Castelo Branco**](https://perma.cc/Q4AJ-VZBH) (Portugal) | [Carlota Angela](https://www.gutenberg.org/ebooks/26025) (**26025**) | [Amor de Salvação](https://www.gutenberg.org/ebooks/26988) (**26988**) | [Amor de Perdição: Memorias d'uma familia](https://www.gutenberg.org/ebooks/16425) (**16425**) | +| [António Feliciano de **Castilho**](https://perma.cc/LZ9J-3H5Z) (Portugal) | [A Chave do Enigma](https://www.gutenberg.org/ebooks/32002) (**32002**) | [A Primavera](https://www.gutenberg.org/ebooks/65021) (**65021**) | [O presbyterio da montanha](https://www.gutenberg.org/ebooks/28127) (**28127**) | +| [Manuel Pinheiro **Chagas**](https://perma.cc/8LU3-RADW) (Portugal) | [Historia alegre de Portugal](https://www.gutenberg.org/ebooks/29394) (**29394**) | [A Lenda da Meia-Noite](https://www.gutenberg.org/ebooks/23400) (**23400**) | [Astucias de Namorada, e Um melodrama em Santo Thyrso](https://www.gutenberg.org/ebooks/29342) (**29342**) | + +As partes destacadas do nome de cada autor indicam como os mesmos serão referenciados neste tutorial a partir deste ponto. Para os códigos utilizaremos o `EBook-No.` (número de referência da obra no Projeto Gutenberg), presente no nome dos ficheiros disponibilizados. + +# Nossos casos de teste + +Nesta lição, usaremos obras de romancistas brasileiros e portugueses do século XIX como um estudo de caso para demonstrar três abordagens estilométricas diferentes: + +1. Curvas características de composição de Mendenhall +2. Método Qui-Quadrado de Kilgariff +3. Método Delta de John Burrows + +Em todas as abordagens acima mencionadas, utilizaremos os documentos das colunas **Obra 1** e **Obra 2** para definir o estilo de cada autor. Os documentos da coluna **Obra 3** serão testados individualmente com cada um dos 5 autores para tentarmos inferir a autoria pela proximidade de estilo. + +# Preparando os dados para análise + +Antes de prosseguirmos com a análise estilométrica, precisamos carregar os ficheiros contendo todas as 15 obras em [estruturas de dados](https://perma.cc/P843-J4LB) na memória do computador. + +O primeiro passo neste processo é designar cada obra para o seu respectivo conjunto. Como cada obra está relacionada com o seu respectivo `EBook-No.`, podemos atribuir cada obra (valor) à chave do seu autor (ou a uma chave separada, se ela fizer parte da amostra de teste) usando um *dicionário* Python. O dicionário é um tipo de conjunto de dados composto de um número arbitrário de pares de chave-valor; neste caso, os nomes dos autores servirão como chaves (separados entre treino e teste), enquanto os `EBook-No.` das obras serão os valores associados a essas chaves. + +```python +ids_obras = { + 'Assis' : [55752, 54829], + 'Alencar' : [38496, 44540], + 'Castelo Branco' : [26025, 26988], + 'Castilho' : [32002, 65021], + 'Chagas' : [29394, 23400], + 'Assis (teste)' : [55682], + 'Alencar (teste)' : [29040], + 'Castelo Branco (teste)' : [16425], + 'Castilho (teste)' : [28127], + 'Chagas (teste)' : [29342] +} +``` + +Os dicionários Python são muito flexíveis. Por exemplo, podemos acessar um valor específico *indexando* o dicionário com uma de suas chaves, podemos varrer o dicionário inteiro fazendo um loop em sua lista de chaves, etc. Faremos amplo uso desta funcionalidade à medida que avançarmos. + +A seguir, como estamos interessados no vocabulário de cada autor, definiremos uma breve [função](https://perma.cc/P8CA-Y43Q) em Python que irá criar uma longa lista de palavras em cada uma das obras atribuídas a um único autor. Isso será armazenado como uma [string](https://perma.cc/7DCC-M9AT). +Abra o seu ambiente de desenvolvimento Python escolhido. Se você não sabe como fazer isso, leia "Configurar um ambiente de desenvolvimento integrado para Python" ([Windows](/pt/licoes/instalacao-windows), [Linux](/pt/licoes/instalacao-linux), [Mac](/pt/licoes/instalacao-mac)) antes de prosseguir. + +```python +# Função que compila todos os ficheiros de texto de cada grupo em uma única string + +import re + +def ler_ficheiros_para_string(ids_ficheiros): + global texto + strings = [] + for id_ficheiro in ids_ficheiros: + with open(f'dados/pg{id_ficheiro}.txt', 'r', + encoding='utf-8') as f: + texto = f.read() + texto = re.search(r"(START.*?\*\*\*)(.*)(\*\*\* END)", + texto, + re.DOTALL).group(2) + strings.append(texto) + return '\n'.join(strings) +``` + +Perceba que, dentro da função, temos também uma etapa de limpeza dos textos usando [expressões regulares](https://perma.cc/DT3K-XUBG). Isso foi necessário para este corpus específico pois as obras publicadas no Projeto Gutenberg possuem uma estrutura de cabeçalho e rodapé de [metadados](https://perma.cc/E8P8-GKDR) que não pode ser considerada na análise estilométrica, uma vez que não foram redigidas pelos autores analisados. A utilização de expressões regulares não faz parte do escopo deste tutorial, então limitaremo-nos a compreender que estamos utilizando a biblioteca `re` para capturar apenas o conjunto de caracteres entre os marcadores `*** START OF THIS PROJECT GUTENBERG [NOME DA OBRA] ***` e `*** END OF THIS PROJECT GUTENBERG [NOME DA OBRA] ***` presentes em cada documento do projeto. Para maiores dúvidas sobre a utilização de expressões regulares e da biblioteca `re`, consulte a [documentação](https://perma.cc/JFP3-B4P4). + +Na sequência, construímos uma nova estrutura de dados chamando repetidamente a função `ler_ficheiros_para_string ()`, passando a ela uma lista diferente de documentos a cada vez. Armazenaremos os resultados em outro dicionário, este com nomes do autor/caso de teste como chaves e todo o texto dos respectivos documentos como valores. Para simplificar, iremos nos referir à string contendo uma lista de documentos como "corpus do autor". + +```python +# Criar um dicionário com os corpora dos autores +obras = {} +for autor, ids_ficheiros in ids_obras.items(): + obras[autor] = ler_ficheiros_para_string(ids_ficheiros) +``` + +Para nos certificarmos de que os ficheiros foram carregados corretamente, imprima os primeiros cem caracteres de cada entrada do dicionário na tela: + +```python +for autor in obras: + print(obras[autor][:100]) +``` + +Se esta operação de impressão exibir quaisquer trechos de texto no console, então a operação de leitura dos ficheiros funcionou conforme o esperado e você pode prosseguir para a análise estilométrica. + +
    +Se os ficheiros não forem carregados, o motivo mais provável é que o seu diretório de trabalho atual não seja o repositório `dados` criado ao descompactar o ficheiro da seção de Materiais Requeridos acima; mudar o seu diretório de trabalho deve resolver o problema. Como você faz isso depende do seu ambiente de desenvolvimento Python. +
    + +# Primeiro teste estilométrico: curvas características de composição de Mendenhall + +O pesquisador literário T. C. Mendenhall escreveu certa vez que a assinatura estilística de um autor pode ser encontrada contando a frequência com que usa palavras de tamanhos diferentes.[^8] Por exemplo, se contarmos os tamanhos de palavras em vários segmentos de 1.000 ou 5.000 palavras de qualquer romance e, em seguida, traçarmos um gráfico das distribuições de comprimento das palavras, as curvas pareceriam praticamente as mesmas, não importando que partes do romance tivéssemos escolhido. Na verdade, Mendenhall acreditava que se alguém contasse palavras suficientes selecionadas de várias partes da obra de toda a vida de um escritor (digamos, 100.000 ou mais), a "curva característica" de uso de comprimento de palavras do autor se tornaria tão precisa que seria constante ao longo de sua vida. + +Pelos padrões de hoje, contar o comprimento das palavras parece uma forma muito direta (e talvez simplista) de medir o estilo literário. O método de Mendenhall não leva em consideração as palavras do vocabulário de um autor, o que é obviamente problemático. Portanto, não devemos tratar as curvas características como uma fonte particularmente confiável de evidência estilométrica. No entanto, Mendenhall publicou a sua teoria há mais de cento e trinta anos e fez todos os cálculos à mão. É compreensível que ele tivesse optado por trabalhar com uma estatística que, embora grosseira, fosse ao menos fácil de compilar. Em honra ao valor histórico de sua tentativa inicial de estilometria, e porque a curva característica produz resultados visuais interessantes que podem ser implementados rapidamente, usaremos o método de Mendenhall como um primeiro passo em nossa exploração das técnicas de atribuição de autoria. + +O trecho de código necessário para calcular e exibir as curvas características para os autores e os documentos de teste é o seguinte: + +```python +# Carregar nltk e matpotlib +import nltk +nltk.download('punkt') +import matplotlib.pylab as plt + +obras_tokens = {} +obras_distribuicao_comprimento = {} + +id_subplot = 1 +fig = plt.figure(figsize=(20,20)) + +autores = list(obras.keys()) + +for autor in autores: + # Transformar os corpora dos autores em listas de tokens de palavras + tokens = nltk.word_tokenize(obras[autor], language="portuguese") + + # Filtrar pontuação + obras_tokens[autor] = ([token for token in tokens + if any(c.isalpha() for c in token)]) + +# Obter a distribuição de comprimentos de tokens +token_comprimentos = [len(token) for token in obras_tokens[autor]] +obras_distribuicao_comprimento[autor] = nltk.FreqDist(token_comprimentos) + + # Plotar a curva característica de composição + lista_chaves = [] + lista_valores = [] + + for i in range(1,16): + lista_chaves.append(i) + lista_valores.append(obras_distribuicao_comprimento[autor][i]) + + lista_valores_normalizado = [value/max(lista_valores) for value in lista_valores] + + plt.subplot(5, 5, id_subplot) + plt.plot(lista_chaves, lista_valores_normalizado) + plt.xticks(lista_chaves) + plt.title(autor) + id_subplot += 1 + +plt.savefig("stilometry_comparacao.jpeg", dpi=300, bbox_inches='tight') +plt.show() +``` + +Se você estiver trabalhando em um [Jupyter Notebook](https://jupyter.org/), adicione a expressão `%matplotlib inline` após a importação das bibliotecas; caso contrário, você pode não ver os gráficos em sua tela. Se você estiver trabalhando em um [Jupyter Lab](https://jupyterlab.readthedocs.io/en/stable/getting_started/installation.html), substitua esta expressão por `%matplotlib ipympl`. + +A primeira linha no trecho de código acima carrega o módulo *Natural Language Toolkit (nltk)*, que contém um número enorme de funções e recursos úteis para processamento de texto. Mal tocaremos em seus fundamentos nesta lição; se você decidir explorar mais a análise de texto em Python, recomendo fortemente que comece com [a documentação do nltk](https://www.nltk.org/). + +As próximas linhas configuram estruturas de dados que serão preenchidas pelo bloco de código dentro do loop `for`. Este loop faz os mesmos cálculos para todos os nossos "autores": + +* Invoca o método `word_tokenize()` do `nltk`, explicitando a linguagem do _corpus_ para português através do argumento `language="portuguese"`, e divide o _corpus_ em _tokens_, ou seja, palavras, números, pontuação, etc.; +* Olha para esta lista de tokens e filtra as não-palavras; +* Cria uma lista contendo os comprimentos de cada token de palavra restante; +* Cria um objeto de _distribuição de frequência_ a partir dessa lista de comprimentos de palavra, basicamente contando quantas palavras de uma letra, palavras de duas letras, etc., existem no _corpus_ do autor, e em seguida realiza a normalização dessa distribuição, ou seja, ajusta todos os valores em um intervalo entre 0 e 1. Esta etapa é realizada para comparar gráficos de distribuição em _corpus_ de tamanhos diferentes de forma mais clara; +* Plota um gráfico da distribuição de comprimentos de palavras no corpus, para todas as palavras de até 15 caracteres. + +Os resultados que obtemos são os seguintes: +{% include figure.html filename="introducao-estilometria-python-01.jpeg" caption="Imagem 1: Comparação da curva de Mendenhall para cada corpus." %} + +Como podemos ver pelos gráficos, é possível notar diferenças (embora sutis) entre todas as 5 curvas características de cada autor (linha superior de gráficos). Ao compararmos os documentos de teste (linha inferior de gráficos) com os autores, podemos notar que a curva característica dos documentos de teste dos autores Assis, Castilho e Chagas se assemelham mais à curva dos seus respectivos autores que de qualquer outro, o que seriam inferências corretas. O documento de Alencar é o que mais diverge da curva característica do autor. Isso pode ocorrer pelo fato do documento de teste ser uma autobiografia do autor, enquanto os documentos de treino são duas obras de ficção, o que poderia influenciar no seu estilo de escrita. Veremos nas próximas abordagens se conseguimos contornar esta situação. O documento de Castelo Branco também parece não ter se assemelhado à curva característica do autor. + +Para além desta análise meramente visual (que pode muitas vezes induzir ao erro), podemos ter um resultado quantitativo calculando a soma das distâncias entre os valores (normalizados) de frequência de cada documento de teste com os valores de frequência do *corpus* de cada possível autor. Por consequência, o autor que possuir a menor distância de frequência com o documento de teste seria o mais provável autor deste documento. Podemos implementar isso da seguinte forma: + +```python +# Dividir a lista de corpus entre autores e obras destacadas +autores = list(obras.keys())[:5] +obras_destacadas = list(obras.keys())[5:] + +obras_distribuicao_comprimento_normalizado = {} + +# Normalizar a distribuição de comprimentos de tokens em um novo dicionário +for index, obra in obras_distribuicao_comprimento.items(): + obras_distribuicao_comprimento_normalizado[index] = {k: + v/max(obra.values()) + for k, v in dict(obra).items()} + +# Calcular a soma da diferença da distribuição entre o documento de teste e cada autor (de 1 até 15 caracteres) +for obra in obras_destacadas: + for autor in autores: + soma_diferenca = 0 + for i in range(1,16): + diferenca = abs(obras_distribuicao_comprimento_normalizado[obra][i] - + obras_distribuicao_comprimento_normalizado[autor][i]) + soma_diferenca = soma_diferenca + diferenca + print('A soma da diferença do documento ' + + obra + + ' para o autor ' + + autor + + ' é ' + + str(soma_diferenca)) + print('\n') +``` + +O resultado deste trecho serão 5 blocos, cada um comparando um documento com os 5 possíveis autores. Abaixo o exemplo de como o primeiro bloco deve parecer: + +``` +A soma da diferença do documento Assis (teste) para o autor Assis é 0.25782806530977137 +A soma da diferença do documento Assis (teste) para o autor Alencar é 0.5192643726222002 +A soma da diferença do documento Assis (teste) para o autor Castelo Branco é 0.7410205025846326 +A soma da diferença do documento Assis (teste) para o autor Castilho é 0.46876355973646266 +A soma da diferença do documento Assis (teste) para o autor Chagas é 0.3466043230715998 +``` + +Vamos colocar os resultados dos 5 testes em uma [matriz de confusão](https://perma.cc/K42B-NQSR) (limitando a 4 casas decimais) para avaliarmos: + +| | Assis | Alencar | Castelo Branco | Castilho | Chagas | +| --------- | --------- | --------- | --------- | --------- | --------- | +| **Assis (teste)** | **0.2578** | 0.5192 | 0.7410 | 0.4687 | 0.3466 | +| **Alencar (teste)** | 0.9744 | **0.9844** | 0.4313 | 0.6979 | 0.7897 | +| **Castelo Branco (teste)** | 0.2812 | 0.4436 | **0.4761** | 0.2772 | 0.2803 | +| **Castilho (teste)** | 0.4396 | 0.4624 | 0.4114 | **0.1394** | 0.3184 | +| **Chagas (teste)** | 0.7746 | 0.5883 | 0.6636 | 0.6732 | **0.5888** | + +Os documentos de teste de Assis e Castilho possuem menor valor com seus respectivos autores, o que indica a maior proximidade. Isso é condizente com a similaridade dos gráficos que vimos anteriormente. O documento de teste de Chagas teve um "empate técnico" entre o estilo do próprio autor (0.5888) e Alencar (0.5883). Tanto os documentos de teste de Alencar quanto Castelo Branco ficaram com o maior valor em relação aos seus respectivos autores, logo a técnica não foi eficaz para estes dois autores. + +Se não tivéssemos informações adicionais para trabalharmos, poderíamos inferir corretamente 50% da atribuição de autoria (2 acertos, 2 erros e um "empate"), o que é um resultado considerável para uma técnica relativamente simples. Felizmente, a ciência estilométrica avançou muito desde a época de Mendenhall. + +# Segundo teste estilométrico: método qui-quadrado de Kilgariff + +Em um artigo de 2001, Adam Kilgarriff[^9] recomenda o uso da estatística qui-quadrado para determinar a autoria. Leitores familiarizados com métodos estatísticos podem se lembrar que o qui-quadrado às vezes é usado para testar se um conjunto de observações (digamos, as intenções dos eleitores conforme declarado em uma pesquisa) segue uma certa [distribuição de probabilidade](https://perma.cc/668N-9GPD) ou padrão. Não é isso que buscamos aqui. Em vez disso, simplesmente usaremos a estatística para medir a "distância" entre os vocabulários empregados em dois conjuntos de textos. Quanto mais semelhantes os vocabulários, mais provável é que o mesmo autor tenha escrito os textos em ambos os conjuntos. Isso pressupõe que o vocabulário de uma pessoa e os padrões de uso das palavras são relativamente constantes. + +Veja como aplicar a estatística para atribuição de autoria: + +* Pegue os corpora associados a dois autores; +* Junte-os em um único corpus, maior; +* Conte os tokens para cada uma das palavras que podem ser encontradas neste corpus maior; +* Selecione as [`n`](https://perma.cc/D9ND-3C83) palavras mais comuns no corpus maior; +* Calcule quantos tokens dessas `n` palavras mais comuns esperaríamos encontrar em cada um dos dois corpora originais se fossem do mesmo autor. Isso significa simplesmente dividir o número de tokens que observamos no corpus combinado em dois valores, com base nos tamanhos relativos das contribuições dos dois autores para o corpus comum; +* Calcule uma distância qui-quadrada somando, sobre as `n` palavras mais comuns, os _quadrados das diferenças entre os números reais de tokens encontrados no corpus de cada autor e os números esperados_, divididos pelos números esperados; A Figura 2 mostra a equação para a estatística qui-quadrado, onde C(i) representa o número observado de tokens para o recurso 'i' e E(i), o número esperado para esse recurso. + +{% include figure.html filename="stylometry-python-6.jpg" caption="Imagem 2: Equação para a estatística qui-quadrado." %} + +Quanto menor o valor do qui-quadrado, mais semelhantes são os dois corpora. Portanto, calcularemos o qui-quadrado de cada documento de teste com os 5 possíveis autores: os menores valores representarão a possível autoria de cada documento (assim como vimos no primeiro exemplo). + +Nota: Independentemente do método estilométrico que usamos, a escolha de `n`, o número de palavras a levar em consideração, é uma espécie de arte sombria. Na literatura pesquisada por Stamatatos[^2], pesquisadores sugeriram entre 100 e 1.000 das palavras mais comuns; um projeto chegou a usar cada palavra que aparecia no corpus pelo menos duas vezes. Como diretriz, quanto maior o corpus, maior o número de palavras que podem ser usadas como elementos sem correr o risco de dar importância indevida a uma palavra que ocorra apenas algumas vezes. Nesta lição, usaremos um `n` relativamente grande para o método qui-quadrado e um menor para o próximo método. Mudar o valor de `n` certamente mudará um pouco os resultados numéricos; no entanto, se uma pequena modificação de `n` causar uma mudança na atribuição de autoria, isso é um sinal de que o teste que você está realizando não é capaz de fornecer evidências significativas sobre o seu caso de teste. + +O seguinte trecho de código implementa o método de Kilgariff, com as frequências das 500 palavras mais comuns no corpus conjunto sendo usadas no cálculo: + +```python +# Converter os tokens para caracteres minúsculos para que a mesma palavra, +# maiúscula ou não, conte como uma palavra + +for autor in autores: + obras_tokens[autor] = ( + [token.lower() for token in obras_tokens[autor]]) + +# Calcular o qui-quadrado de cada documento de teste com cada um dos 5 autores +for obra in obras_destacadas: + for autor in autores: + + # Primeiro, construir um corpus conjunto e identificar + # as 500 palavras mais frequentes nele + corpus_conjunto= (obras_tokens[obra] + + obras_tokens[autor]) + freq_dist_conjunto = nltk.FreqDist(corpus_conjunto) + termos_comuns = list(freq_dist_conjunto.most_common(500)) + + # Que proporção do corpus conjunto é constituído pelos + # tokens do autor candidato? + autor_compartihado = (len(obras_tokens[autor]) + / len(corpus_conjunto)) + + # Agora, vamos observar as 500 palavras mais frequentes no corpus do candidato + # e comparar o número de vezes que elas podem ser observadas + # ao que seria esperado se os artigos do autor e o documento de teste + # fossem ambas amostras aleatórias do mesmo conjunto. + quiquadrado = 0 + for word,count_conjunto in termos_comuns: + + # Com que frequência vemos essa palavra comum? + autor_count = obras_tokens[autor].count(word) + obra_count = obras_tokens[obra].count(word) + + # Com que frequência deveríamos vê-la? + autor_count_esperado = count_conjunto * autor_compartihado + teste_count_esperado = count_conjunto * (1-autor_compartihado) + + # Adicionar a contribuição da palavra para a estatística qui-quadrado + quiquadrado += ((autor_count-autor_count_esperado) * + (autor_count-autor_count_esperado) / + autor_count_esperado) + + quiquadrado += ((obra_count-teste_count_esperado) * + (obra_count-teste_count_esperado) + / teste_count_esperado) + + print("A estatística de qui-quadrado do documento", + obra, + "para o candidato", + autor, + "é =", + quiquadrado) + print("\n") +``` + +Assim como no primeiro exemplo, o resultado será 5 blocos de resultados, cada um para um documento de teste. O primeiro bloco se parecerá com isso: +``` +A estatística de qui-quadrado do documento Assis (teste) para o candidato Assis é = 12266.387624251674 +A estatística de qui-quadrado do documento Assis (teste) para o candidato Alencar é = 13832.008019914058 +A estatística de qui-quadrado do documento Assis (teste) para o candidato Castelo Branco é = 15659.980573183348 +A estatística de qui-quadrado do documento Assis (teste) para o candidato Castilho é = 19458.24314684532 +A estatística de qui-quadrado do documento Assis (teste) para o candidato Chagas é = 13681.732446564287 +``` + +
    +No código acima, convertemos os tokens em minúsculas para não contar os tokens de palavras que começam com uma letra maiúscula porque aparecem no início de uma frase e os tokens minúsculos da mesma palavra como duas palavras diferentes. Às vezes, isso pode causar alguns erros, por exemplo, quando um substantivo próprio e um substantivo comum são escritos da mesma forma, exceto para maiúsculas, mas geralmente esta técnica aumenta a precisão. +
    + +Agora, vamos dar uma olhada na matriz de confusão dos resultados para esta técnica: + +| | Assis | Alencar | Castelo Branco | Castilho | Chagas | +| --------- | --------- | --------- | --------- | --------- | --------- | +| **Assis (teste)** | **12266** | 13832| 15659 | 19458 | 13681 | +| **Alencar (teste)** | 2550 | **3153** | 2581 | 2663 | 2765 | +| **Castelo Branco (teste)** | 17294 | 12063 | **11187** | 18133 | 13954 | +| **Castilho (teste)** | 11349 | 9203 | 8925 | **4531** | 7548 | +| **Chagas (teste)** | 6683 | 5700 | 5836 | 6970 | **5332** | + +Como podemos observar, o teste de qui-quadrado obteve um resultado superior à curva característica de composição de Mendenhall. Assis e Castilho permanecem com a inferência correta de autoria. Chagas, que passou pelo "empate técnico" na curva de composição, com o qui-quadrado também faz a inferência correta com uma distância considerável entre os demais possíveis autores. Dos autores que não haviam sido avaliados corretamente na curva de composição, Castelo Branco possui o menor valor de qui-quadrado, outra inferência correta. Alencar, no entanto, segue como o maior valor entre os 5 possíveis autores. De qualquer forma, já passamos de 50% de acerto com a curva característica de composição para 80% com o método qui-quadrado! + +No entanto, o qui-quadrado ainda é um método pouco refinado. Por um lado, palavras que aparecem com muita frequência tendem a ter um peso desproporcional no cálculo final. Às vezes, isso é bom; outras vezes, diferenças sutis de estilo representadas pelas maneiras como os autores usam palavras mais incomuns passarão despercebidas. + +## Uma nota sobre classes gramaticais + +Em alguns casos e idiomas, pode ser útil aplicar a marcação de [Part-of-speech (classes gramaticais)](https://perma.cc/ER5P-CFQE) aos tokens de palavras antes de contá-los, de modo que a mesma palavra usada como duas classes gramaticais diferentes possa contar como dois elementos diferentes (por exemplo, o termo "mais" sendo usado como substantivo ou como advérbio de intensidade). Esta lição não usa marcação de classes gramaticais, mas poderia refinar os resultados em estudos de caso mais complexos. + +Se você precisar aplicar a marcação de classe gramatical aos seus próprios dados, poderá fazer o download de marcadores para outros idiomas, para trabalhar com uma ferramenta de terceiros como [Tree Tagger](https://perma.cc/DG9G-S5T2), ou mesmo para treinar o seu próprio marcador, mas essas técnicas estão muito além do escopo da lição atual. + +# Terceiro teste estilométrico: método Delta de John Burrows (avançado) + +Os primeiros dois métodos estilométricos foram mais fáceis de implementar. Este próximo, baseado na estatística *Delta* de John Burrows[^10], é consideravelmente mais complexo, tanto conceitualmente (a matemática é mais complicada) quanto computacionalmente (mais código necessário). É, no entanto, um dos métodos estilométricos mais proeminentes em uso hoje. + +Assim como o qui-quadrado de Kilgariff, o método Delta de Burrows é uma medida da "distância" entre um texto cuja autoria queremos averiguar e algum outro corpus. Ao contrário do qui-quadrado, no entanto, o método Delta é projetado para comparar um texto anônimo (ou conjunto de textos) com as assinaturas de vários autores diferentes ao mesmo tempo. Mais precisamente, o método Delta mede como o texto anônimo *e conjuntos de textos escritos por um número arbitrário de autores conhecidos* divergem da média de todos eles juntos. Além disso, o método Delta atribui peso igual a todas as características que mede, evitando assim o problema de palavras comuns sobrecarregarem os resultados, o que era um problema com os testes de qui-quadrado. Por todas essas razões, o método Delta de John Burrows é geralmente uma solução mais eficaz para a questão da autoria. + +O algoritmo original de Burrows pode ser resumido da seguinte forma: + +* Reúna um grande corpus composto por textos escritos por um número arbitrário de autores; digamos que o número de autores seja `x`; +* Encontre as `n` palavras mais frequentes no corpus para usar como elementos; +* Para cada uma dessas `n` características, calcule a participação de cada subcorpora dos `x` autores, como uma porcentagem do número total de palavras. Por exemplo, a palavra "ele" pode representar 4,72% das palavras no subcorpus do Autor A; +* Em seguida, calcule a média e o desvio padrão desses `x` valores e use-os como a média oficial e o desvio padrão para esse elemento em todo o corpus. Em outras palavras, estaremos usando uma _média de médias_ em vez de calcular um único valor que represente a parcela de todo o corpus dado por cada palavra. Fazemos isso porque queremos evitar que um subcorpus maior tenha maior influência nos resultados a seu favor e defina a norma do corpus de tal forma que se espere que tudo se pareça com ele; +* Para cada um dos `n` elementos e `x` subcorpora, calcule um [`z-score`](https://perma.cc/S2RH-LF9K) descrevendo o quão distante da norma do corpus está o uso desse elemento particular neste subcorpus específico. Para fazer isso, subtraia a "média das médias" de um dado elemento da frequência com que ela é encontrada no subcorpus e divida o resultado pelo seu desvio padrão. A Figura 3 mostra a equação de z-score para o elemento 'i', onde C(i) representa a frequência observada, a letra grega mu representa a média das médias e a letra grega sigma, o desvio padrão; + +{% include figure.html filename="stylometry-python-7.jpg" caption="Imagem 3: Equação para a estatística de z-score." %} + +* Em seguida, calcule os mesmos `z-scores` para cada elemento no texto para o qual queremos determinar a autoria; +* Finalmente, calcule um *score delta* comparando o documento de teste com o subcorpus de cada candidato. Para fazer isso, tome a *média dos valores absolutos das diferenças entre os `z-scores` para cada elemento entre o documento de teste e o subcorpus do candidato*. (leia duas vezes!) Isso dá peso igual a cada elemento, não importa a frequência com que as palavras ocorram nos textos; caso contrário, os 3 ou 4 principais elementos sobrecarregariam todo o resto. A Figura 4 mostra a equação para Delta, onde Z(c,i) é o `z-score` para o elemento 'i' no candidato 'c', e Z(t,i) é o `z-score` para o elemento 'i' no caso de teste; + +{% include figure.html filename="stylometry-python-8.jpg" caption="Imagem 4: Equação para a estatística Delta de John Burrows." %} + +* O candidato "vencedor", assim como nas duas outras técnicas que aplicamos, é o autor para o qual a pontuação delta entre o subcorpus do autor e o documento de teste é a mais baixa. + +Stefan Evert _et al_.[^11] fornece uma discussão aprofundada das variantes, refinamentos e complexidades do método, mas nos ateremos ao essencial para os propósitos desta lição. Uma explicação diferente de Delta, escrita em espanhol, e uma aplicação a um corpus de romances espanhóis também podem ser encontradas em um artigo recente de José Calvo Tello.[^12] + +## Seleção de elementos + +Vamos combinar todos os subcorpora em um único corpus para Delta calcular um "padrão" para trabalhar. Então, vamos selecionar um número de palavras para usar como característica. Lembre-se de que usamos 500 palavras para calcular o qui-quadrado de Kilgariff; desta vez, usaremos um conjunto menor de 30 palavras (a maioria, senão todas, palavras funcionais e verbos comuns) como nossos elementos. + +```python +# Combinar todos os corpora, exceto os documentos de teste, em um único corpus +corpus_completo = [] +for autor in autores: + corpus_completo += obras_tokens[autor] + +# Obter uma distribuição de frequência +freq_dist_corpus_completo = list(nltk.FreqDist(corpus_completo).most_common(30)) +freq_dist_corpus_completo[ :10 ] +``` + +Uma amostra das palavras mais frequentes e suas respectivas ocorrências parece com o seguinte: + +``` +[('a', 17619), + ('que', 17345), + ('de', 17033), + ('e', 15449), + ('o', 14283), + ('não', 7086), + ('do', 6019), + ('da', 5647), + ('os', 5299), + ('um', 4873)] +``` + +## Calculando elementos para cada subcorpus + +Vejamos as frequências de cada característica no subcorpus de cada candidato, como uma proporção do número total de tokens no subcorpus. Vamos calcular esses valores e armazená-los em um dicionário de dicionários, uma maneira conveniente de construir um [array bidimensional](https://perma.cc/HR9K-24MG) em Python. + +```python +# Criar uma lista com os elementos e a estrutura principal de dados +features = [word for word,freq in freq_dist_corpus_completo] +feature_freqs = {} + +for autor in autores: + # Criar um dicionário para os elementos de cada candidato + feature_freqs[autor] = {} + + # Obter um valor auxiliar contendo o número de tokens no subcorpus do autor + geral = len(obras_tokens[autor]) + + # Calcular a presença de cada elemento no subcorpus + for feature in features: + presenca = obras_tokens[autor].count(feature) + feature_freqs[autor][feature] = presenca / geral +``` + +## Calculando médias de elementos e desvios-padrão + +Dadas as frequências de elementos para todos os subcorpora que acabamos de calcular, podemos encontrar uma "média das médias" e um desvio padrão para cada elemento. Armazenaremos esses valores em outro "dicionário de dicionários". + +```python +import math + +# A estrutura de dados na qual iremos armazenar +# as "estatísticas padrão do corpus" +corpus_features = {} + +# Para cada elemento... +for feature in features: + # Criar um subdicionário que conterá a média e o desvio padrão do elemento + corpus_features[feature] = {} + + # Calcular a média das frequências expressas no subcorpora + feature_average = 0 + for autor in autores: + feature_average += feature_freqs[autor][feature] + feature_average /= len(autores) + corpus_features[feature]["Mean"] = feature_average + + # Calcular o desvio padrão usando a fórmula básica para uma amostra + feature_stdev = 0 + for autor in autores: + diff = feature_freqs[autor][feature] - corpus_features[feature]["Mean"] + feature_stdev += diff * diff + feature_stdev /= (len(autores) - 1) + feature_stdev = math.sqrt(feature_stdev) + corpus_features[feature]["StdDev"] = feature_stdev +``` + +## Calculando z-scores + +Em seguida, transformamos as frequências de características observadas no subcorpora dos cinco candidatos em `z-scores`, descrevendo o quão distante da "estatística padrão do corpus" essas observações estão. Nada extravagante aqui: nós meramente aplicamos a definição do `z-score` para cada elemento e armazenamos os resultados em outro array bidimensional. + +```python +feature_zscores = {} + +for autor in autores: + feature_zscores[autor] = {} + + for feature in features: + # Definição do z-score = (value - mean) / stddev + # Usamos variáveis intermediárias para tornar o + # código mais fácil de ler + feature_val = feature_freqs[autor][feature] + feature_mean = corpus_features[feature]["Mean"] + feature_stdev = corpus_features[feature]["StdDev"] + feature_zscores[autor][feature] = ((feature_val-feature_mean) / + feature_stdev) +``` + +## Calculando elementos, z-scores e Delta para nosso caso de teste + +Em seguida, precisamos comparar os documentos de teste com o corpus. O seguinte trecho de código, que essencialmente recapitula tudo o que fizemos até agora, conta as frequências de cada um de nossos 30 elementos nos documentos de teste e calcula os `z-scores` de acordo. +Por fim, usamos a fórmula para Delta definida por Burrows para extrair uma única pontuação comparando cada documento de teste com cada um dos cinco "autores candidatos". Lembre-se: quanto menor a pontuação Delta, mais semelhante a assinatura estilométrica do documento à do candidato. + +```python +for obra in obras_destacadas: + # Tokenizar o documento de teste + testcase_tokens = nltk.word_tokenize(obras[obra]) + + # Filtrar a pontuação e colocar os tokens em minúsculas + testcase_tokens = [token.lower() for token in testcase_tokens + if any(c.isalpha() for c in token)] + + # Calcular as frequências dos elementos do documento de teste + geral = len(testcase_tokens) + testcase_freqs = {} + for feature in features: + presenca = testcase_tokens.count(feature) + testcase_freqs[feature] = presenca / geral + + # Calcular os z-scores dos elementos do documento de teste + testcase_zscores = {} + for feature in features: + feature_val = testcase_freqs[feature] + feature_mean = corpus_features[feature]["Mean"] + feature_stdev = corpus_features[feature]["StdDev"] + testcase_zscores[feature] = (feature_val - feature_mean) / feature_stdev + + # Calcular Delta para cada autor + for autor in autores: + delta = 0 + for feature in features: + delta += math.fabs((testcase_zscores[feature] - + feature_zscores[autor][feature])) + delta /= len(features) + print( "Delta score do documento", + obra, + "para o candidato", + autor, + "é =", + delta ) + print("\n") +``` + +Como nas outras duas técnicas, o resultado serão 5 blocos de código dando o valor de Delta de cada documento para cada suposto autor. O primeiro bloco se parecerá com isso: + +``` +Delta score do documento Assis (teste) para o candidato Assis é = 0.8715781237572774 +Delta score do documento Assis (teste) para o candidato Alencar é = 1.2624531605759595 +Delta score do documento Assis (teste) para o candidato Castelo Branco é = 1.2303968803032856 +Delta score do documento Assis (teste) para o candidato Castilho é = 1.6276770882853728 +Delta score do documento Assis (teste) para o candidato Chagas é = 1.0527125070730734 +``` + +Vamos avaliar todos os valores Delta na nossa matriz de confusão (reduzidos para 4 casas decimais): + +| | Assis | Alencar | Castelo Branco | Castilho | Chagas | +| --------- | --------- | --------- | --------- | --------- | --------- | +| **Assis (teste)** | **0.8715** | 1.2624 | 1.2303 | 1.6276 | 1.0527 | +| **Alencar (teste)** | 1.9762 | **1.3355** | 1.3878 | 1.6425 | 1.5042 | +| **Castelo Branco (teste)** | 1.004 | 1.3208 | **0.8182** | 1.5202 | 1.2829 | +| **Castilho (teste)** | 1.5705 | 1.2553 | 1.0970 | **0.4518** | 0.8176 | +| **Chagas (teste)** | 1.1444 | 1.0169 | 0.9462 | 0.9864 | **0.7756** | + +Com o método Delta, pudemos inferir corretamente 100% da autoria dos documentos de teste! Alencar, que teve o pior valor nas duas outras técnicas, aqui aparece com o menor valor entre os 5 candidatos. +Ao utilizarmos autores brasileiros e portugueses, tínhamos em mente também a possibilidade de que a comparação entre ficheiros de autores de uma mesma nacionalidade pudessem ter valores mais próximos que entre autores de nacionalidades distintas, em função de particularidades linguísticas, o que parece que não foi o caso aqui. Por se tratarem de obras do século XIX, poderíamos buscar explicações para isso na maior similaridade das línguas na época, na influência da Academia Portuguesa no Brasil, ou mesmo do letramento e influências dos autores. Uma segunda análise com obras mais contemporâneas seria um excelente segundo passo para esta análise, e fica como sugestão para o leitor. + +# Leituras adicionais e recursos + +## Estudos de caso interessantes + +Estilometria e/ou atribuição de autoria têm sido utilizadas em diversos contextos, empregando diversas técnicas. Aqui estão alguns estudos de caso interessantes: + +* Javier de la Rosa e Juan Luis Suárez procuram o autor de um famoso romance espanhol do século XVI entre uma lista considerável de candidatos. [^13] +* Maria Slautina e Mikhail Marusenko usam o reconhecimento de padrões em um conjunto de recursos sintáticos, gramaticais e lexicais, desde a contagem de palavras simples (com marcação de classe gramatical) a vários tipos de frases, a fim de estabelecer semelhanças estilísticas entre os textos medievais.[^14] +* Ellen Jordan, Hugh Craig e Alexis Antonia examinam o caso de periódicos britânicos do século XIX, nos quais os artigos geralmente não eram assinados, para determinar o autor de quatro resenhas de trabalhos de ou sobre as irmãs Brontë.[^15] Este estudo de caso aplica uma versão inicial de outro método desenvolvido por John Burrows, o método Zeta, que se concentra nas palavras favoritas de um autor em vez de palavras de função comum.[^16] +* Valérie Beaudoin e François Yvon analisaram 58 peças em verso dos dramaturgos franceses Corneille, Racine e Molière, descobrindo que as duas primeiras foram muito mais consistentes na maneira como estruturaram sua escrita do que as últimas.[^17] +* Marcelo Luiz Brocardo, Issa Traore, Sherif Saad e Isaac Woungang aplicam [aprendizagem supervisionada](https://perma.cc/7TAQ-JECD) e [modelos n-gram](https://perma.cc/X34K-5R9X) para determinar a autoria de mensagens curtas com um grande número de autores em potencial, como e-mails e tweets.[^18] +* Moshe Koppel e Winter Yaron propõem o "método do impostor", que tenta determinar se dois textos foram escritos pelo mesmo autor, inserindo-os em um conjunto de textos escritos por falsos candidatos.[^19] Justin Anthony Stover _et al._ recentemente aplicou a técnica para determinar a autoria de um manuscrito do século II recém-descoberto.[^20] +* Finalmente, uma equipe liderada por David I. Holmes estudou o caso peculiar de documentos escritos por um soldado da Guerra Civil ou por sua viúva que pode ter copiado intencionalmente seu estilo de escrita.[^21] + +## Referências adicionais sobre autoria e estilometria + +A referência mais exaustiva em todos os assuntos relacionados à atribuição de autoria, incluindo a história do campo, seus fundamentos matemáticos e linguísticos e seus vários métodos, foi escrita por Patrick Juola em 2007.[^22] O Capítulo 7, em particular, mostra como a atribuição de autoria pode servir como um marcador para várias identidades de grupo (gênero, nacionalidade, dialeto, etc.), para mudanças na linguagem ao longo do tempo, e até mesmo para personalidade e saúde mental. + +Uma pesquisa mais curta pode ser encontrada em Moshe Koppel _et al._, que discute casos em que há um único autor candidato cuja autoria deve ser confirmada, um grande número de candidatos para os quais apenas pequenas amostras de escrita estão disponíveis para treinar um algoritmo de aprendizado de máquina, ou nenhum candidato conhecido.[^23] + +O artigo de Stamatatos citado anteriormente[^2] também contém uma pesquisa qualitativa do campo. + +## Varia + +*Programming historians* que desejam explorar mais a estilometria podem fazer o download do pacote [Stylo](https://cran.r-project.org/web/packages/stylo/index.html),[^24] que se tornou um padrão _de facto_. Entre outras coisas, o pacote Stylo fornece uma implementação do método Delta, funcionalidade de extração de recursos e interfaces gráficas de usuário convenientes tanto para manipulação de dados quanto para produção de resultados visualmente atraentes. Observe que o Stylo é escrito em [R](https://www.r-project.org/), o que significa que você precisará do R instalado no seu computador para executá-lo, mas entre a interface gráfica do usuário e os tutoriais, pouco ou nenhum conhecimento prévio de programação R deve ser necessário. + +Leitores fluentes em francês interessados em explorar as implicações [epistemológicas](https://perma.cc/6DFE-QTWV) das interações entre métodos quantitativos e qualitativos na análise do estilo de escrita devem ler Clémence Jacquot.[^25] + +Surpreendentemente, os dados obtidos por meio de [reconhecimento ótico de caracteres](https://perma.cc/R9U6-TRGE) (OCR) se mostraram adequados para fins de atribuição de autoria, mesmo quando os dados sofrem de altas taxas de erro de OCR.[^26] + +Por fim, existe um [grupo Zotero](https://www.zotero.org/groups/643516/stylometry_bibliography/items) dedicado à estilometria, onde você pode encontrar muitas outras referências a métodos e estudos. + +# Agradecimentos + +Agradecimentos a Stéfan Sinclair e Andrew Piper, em cujos seminários na Universidade McGill este projeto começou. Também agradeço à minha orientadora de tese, Susan Dalton, cuja orientação é sempre inestimável. + +# Notas finais + +[^1]: Veja, por exemplo, Justin Rice, ["What Makes Hemingway Hemingway? A statistical analysis of the data behind Hemingway's style"](https://perma.cc/W8TR-UH6S) + +[^2]: Efstathios Stamatatos, “A Survey of Modern Authorship Attribution Method,” _Journal of the American Society for Information Science and Technology_, vol. 60, no. 3 (December 2008), p. 538–56, citation on p. 540, [https://doi.org/10.1002/asi.21001](https://doi.org/10.1002/asi.21001). + +[^3]: Jan Rybicki, “Vive La Différence: Tracing the (Authorial) Gender Signal by Multivariate Analysis of Word Frequencies,” _Digital Scholarship in the Humanities_, vol. 31, no. 4 (December 2016), pp. 746–61, [https://doi.org/10.1093/llc/fqv023](https://doi.org/10.1093/llc/fqv023). Sean G. Weidman e James O’Sullivan, “The Limits of Distinctive Words: Re-Evaluating Literature’s Gender Marker Debate,” _Digital Scholarship in the Humanities_, 2017, [https://doi.org/10.1093/llc/fqx017](https://doi.org/10.1093/llc/fqx017). + +[^4]: Ted Underwood, David Bamman, e Sabrina Lee, “The Transformation of Gender in English-Language Fiction”, _Cultural Analytics_, Feb. 13, 2018, [https://doi.org/10.22148/16.019](https://doi.org/10.22148/16.019). + +[^5]: Sven Meyer zu Eissen e Benno Stein, “Intrinsic Plagiarism Detection,” in _ECIR 2006_, edited by Mounia Lalmas, Andy MacFarlane, Stefan Rüger, Anastasios Tombros, Theodora Tsikrika, e Alexei Yavlinsky, Berlin, Heidelberg: Springer, 2006, pp. 565–69, [https://doi.org/10.1007/11735106_66](https://doi.org/10.1007/11735106_66). + +[^6]: Cynthia Whissell, “Traditional and Emotional Stylometric Analysis of the Songs of Beatles Paul McCartney and John Lennon,” _Computers and the Humanities_, vol. 30, no. 3 (1996), pp. 257–65. + +[^7]: Douglass Adair, "The Authorship of the Disputed Federalist Papers", _The William and Mary Quarterly_, vol. 1, no. 2 (April 1944), pp. 97-122. + +[^8]: T. C. Mendenhall, "The Characteristic Curves of Composition", _Science_, vol. 9, no. 214 (Mar. 11, 1887), pp. 237-249. + +[^9]: Adam Kilgarriff, "Comparing Corpora", _International Journal of Corpus Linguistics_, vol. 6, no. 1 (2001), pp. 97-133. + +[^10]: John Burrows, "'Delta': a Measure of Stylistic Difference and a Guide to Likely Authorship", _Literary and Linguistic Computing_, vol. 17, no. 3 (2002), pp. 267-287. + +[^11]: Stefan Evert et al., "Understanding and explaining Delta measures for authorship attribution", _Digital Scholarship in the Humanities_, vol. 32, no. suppl_2 (2017), pp. ii4-ii16. + +[^12]: José Calvo Tello, “Entendiendo Delta desde las Humanidades,” [_Caracteres_, vol.5, no.1 (May 27 2016)](https://perma.cc/LNF3-QP8V), pp.140-176. + +[^13]: Javier de la Rosa and Juan Luis Suárez, “The Life of Lazarillo de Tormes and of His Machine Learning Adversities,” _Lemir_, vol. 20 (2016), pp. 373-438. + +[^14]: Maria Slautina e Mikhaïl Marusenko, “L’émergence du style, The emergence of style,” _Les Cahiers du numérique_, vol. 10, no. 4 (November 2014), pp. 179–215, [https://doi.org/10.3166/LCN.10.4.179-215](https://doi.org/10.3166/LCN.10.4.179-215). + +[^15]: Ellen Jordan, Hugh Craig, e Alexis Antonia, “The Brontë Sisters and the ‘Christian Remembrancer’: A Pilot Study in the Use of the ‘Burrows Method’ to Identify the Authorship of Unsigned Articles in the Nineteenth-Century Periodical Press,” _Victorian Periodicals Review_, vol. 39, no. 1 (2006), pp. 21–45. + +[^16]: John Burrows, “All the Way Through: Testing for Authorship in Different Frequency Strata,” _Literary and Linguistic Computing_, vol. 22, no. 1 (April 2007), pp. 27–47, [https://doi.org/10.1093/llc/fqi067](https://doi.org/10.1093/llc/fqi067). + +[^17]: Valérie Beaudoin e François Yvon, “Contribution de La Métrique à La Stylométrie,” _JADT 2004: 7e Journées internationales d'Analyse statistique des Données Textuelles_, vol. 1, Louvain La Neuve, Presses Universitaires de Louvain, 2004, pp. 107–18. + +[^18]: Marcelo Luiz Brocardo, Issa Traore, Sherif Saad e Isaac Woungang, “Authorship Verification for Short Messages Using Stylometry,” _2013 International Conference on Computer, Information and Telecommunication Systems (CITS)_, 2013, [https://doi.org/10.1109/CITS.2013.6705711](https://doi.org/10.1109/CITS.2013.6705711). + +[^19]: Moshe Koppel e Winter Yaron, “Determining If Two Documents Are Written by the Same Author,” _Journal of the Association for Information Science and Technology_, vol. 65, no. 1 (October 2013), pp. 178–87, [https://doi.org/10.1002/asi.22954](https://doi.org/10.1002/asi.22954). + +[^20]: Justin Anthony Stover et al., "Computational authorship verification method attributes a new work to a major 2nd century African author", _Journal of the Association for Information Science and Technology_, vol. 67, no. 1 (2016), pp. 239–242. + +[^21]: David I. Holmes, Lesley J. Gordon, e Christine Wilson, "A widow and her soldier: Stylometry and the American Civil War", _Literary and Linguistic Computing_, vol. 16, no 4 (2001), pp. 403–420. + +[^22]: Patrick Juola, “Authorship Attribution,” _Foundations and Trends in Information Retrieval_, vol. 1, no. 3 (2007), pp. 233–334, [https://doi.org/10.1561/1500000005](https://doi.org/10.1561/1500000005). + +[^23]: Moshe Koppel, Jonathan Schler, e Shlomo Argamon, “Computational Methods in Authorship Attribution,” _Journal of the Association for Information Science and Technology_. vol. 60, no. 1 (January 2009), pp. 9–26, [https://doi.org/10.1002/asi.v60:1](https://doi.org/10.1002/asi.v60:1). + +[^24]: Maciej Eder, Jan Rybicki, e Mike Kestemont, “Stylometry with R: A Package for Computational Text Analysis,” _The R Journal_, vol. 8, no. 1 (2016), pp. 107–21. + +[^25]: Clémence Jacquot, “Rêve d'une épiphanie du style: visibilité et saillance en stylistique et en stylométrie,” _Revue d’Histoire Littéraire de la France_ , vol. 116, no. 3 (2016), pp. 619–39. + +[^26]: Patrick Juola, John Noecker Jr, e Michael Ryan, "Authorship Attribution and Optical Character Recognition Errors", _TAL_, vol. 53, no. 3 (2012), pp. 101–127. diff --git a/pt/licoes/introducao-instalacao-python.md b/pt/licoes/introducao-instalacao-python.md index 0fe53b40f3..7d83aa9471 100644 --- a/pt/licoes/introducao-instalacao-python.md +++ b/pt/licoes/introducao-instalacao-python.md @@ -1,86 +1,86 @@ ---- -title: Introdução e instalação do Python -slug: introducao-instalacao-python -layout: lesson -date: 2012-07-17 -translation_date: 2021-05-13 -authors: -- William J. Turkel -- Adam Crymble -reviewers: -- Jim Clifford -- Amanda Morton -editors: -- Miriam Posner -translator: -- Josir C. Gomes -translation-editor: -- Danielle Sanches -translation-reviewer: -- Bruno Martins -- Renato Rocha Souza -difficulty: 1 -review-ticket: https://github.com/programminghistorian/ph-submissions/issues/323 -activity: transforming -topics: [python, get-ready] -abstract: "Essa primeira lição em nossa seção para tratar de Fontes Online -é preparada para que você e o seu computador estejam prontos para se iniciarem na programação. -Nós iremos nos concentrar em instalar os softwares necessários – todos livres e de boa reputação -– e finalmente nós iremos te auxiliar a experimentar os primeiros passos na programação -para que você tenha resultados imediatos." -next: nocoes-basicas-paginas-web-html -python_warning: false -original: introduction-and-installation -avatar_alt: Uma cobra enrolada -doi: 10.46430/phpt0004 ---- - -{% include toc.html %} - - - - - -## Objetivos da Lição - -Essa primeira lição na nossa seção sobre Fontes Online é preparada para que você e o seu computador estejam preparados para se iniciarem na programação. -Iremos focar na instalação do software necessário, livre e de boa reputação. Posteriormente, iremos auxiliar na experimentação com os primeiros passos na programação, para que você tenha resultados rápidos. - -Neste módulo de abertura, você irá instalar a [Linguagem de Programação Python][], o [analisador de HTML/XML Beautiful Soup][], e um editor de texto. - -Os ecrãns de exemplo mostrados aqui correspondem ao [Komodo Edit][], mas você pode utilizar qualquer editor de texto apto a trabalhar com o Python. Aqui está uma lista de outras opções: [Editores Python][]. Uma vez que tudo esteja instalado, você irá escrever os seus primeiros programas, "Olá Mundo" em Python e HTML. - -## A linguagem de programação Python - -A linguagem de programação que usaremos nesta série de lições é Python, uma linguagem livre e de código aberto. -A menos que seja observado o contrário, usaremos a versão **Python 3** daqui em diante. -A versão 2 não é mais suportada, mas ainda pode estar sendo usada em projetos ou lições mais antigas. - -[Python 3 tem algumas poucas diferenças na formatação](http://sebastianraschka.com/Articles/2014_python_2_3_key_diff.html) (pense em regras gramaticais). Assim, fique atento se você encontrar exemplos online que ainda utilizam o Python 2. Esses exemplos podem não funcionar nas versões atuais do Python. - - -## Faça backups do seu trabalho! - -Antes de fazer o download ou instalar qualquer novo software, é crucial que você faça backups do seu trabalho. Você deve fazer backup de todo o seu computador pelo menos uma vez por semana, e de preferência com uma frequência ainda menor. É também uma boa ideia fazer backups fora do seu ambiente local pois, dessa forma, fica salvaguardado caso algo aconteça com o seu computador, com a sua casa ou o seu escritório. Sites como o [Jungle Disk][] ou o [Dropbox][] fornecem opções de backup fáceis de usar e relativamente baratas. - -### Escolha o seu Sistema Operativo - -## Passo 1 – Instale e configure o Software - -Para que você consiga trabalhar nas técnicas aqui apresentadas, você precisará descarregar e instalar software disponível gratuitamente. -Nós fornecemos instruções para o Mac, Windows e Linux. Uma vez que você tenha instalado o software no seu Sistema Operativo, siga para a lição '[Noções básicas de páginas web e HTML][]'. Se você encontrar dificuldades com as nossas instruções ou achar que algo não funciona na sua plataforma, por favor nos informe. - -- [Instalação do Python para Mac][] -- [Instalação do Python para Windows][] -- [Instalação do Python para Linux][] - - [Linguagem de Programação Python]: http://www.python.org/ - [Analisador de HTML/XML Beautiful Soup]: http://www.crummy.com/software/BeautifulSoup/ - [Komodo Edit]: http://www.activestate.com/komodo-edit - [Editores Python]: http://wiki.python.org/moin/PythonEditors/ - [Jungle Disk]: https://www.jungledisk.com/ - [Dropbox]: https://www.dropbox.com/home - [Noções básicas de páginas web e HTML]: nocoes-basicas-paginas-web-html - [Instalação do Python para Mac]: instalacao-mac - [Instalação do Python para Windows]: instalacao-windows - [Instalação do Python para Linux]: instalacao-linux +--- +title: Introdução e instalação do Python +slug: introducao-instalacao-python +layout: lesson +date: 2012-07-17 +translation_date: 2021-05-13 +authors: +- William J. Turkel +- Adam Crymble +reviewers: +- Jim Clifford +- Amanda Morton +editors: +- Miriam Posner +translator: +- Josir C. Gomes +translation-editor: +- Danielle Sanches +translation-reviewer: +- Bruno Martins +- Renato Rocha Souza +difficulty: 1 +review-ticket: https://github.com/programminghistorian/ph-submissions/issues/323 +activity: transforming +topics: [python, get-ready] +abstract: "Essa primeira lição em nossa seção para tratar de Fontes Online +é preparada para que você e o seu computador estejam prontos para se iniciarem na programação. +Nós iremos nos concentrar em instalar os softwares necessários – todos livres e de boa reputação +– e finalmente nós iremos te auxiliar a experimentar os primeiros passos na programação +para que você tenha resultados imediatos." +next: nocoes-basicas-paginas-web-html +python_warning: false +original: introduction-and-installation +avatar_alt: Uma cobra enrolada +doi: 10.46430/phpt0004 +--- + +{% include toc.html %} + + + + + +## Objetivos da Lição + +Essa primeira lição na nossa seção sobre Fontes Online é preparada para que você e o seu computador estejam preparados para se iniciarem na programação. +Iremos focar na instalação do software necessário, livre e de boa reputação. Posteriormente, iremos auxiliar na experimentação com os primeiros passos na programação, para que você tenha resultados rápidos. + +Neste módulo de abertura, você irá instalar a [Linguagem de Programação Python][], o [analisador de HTML/XML Beautiful Soup][], e um editor de texto. + +Os ecrãns de exemplo mostrados aqui correspondem ao [Komodo Edit][], mas você pode utilizar qualquer editor de texto apto a trabalhar com o Python. Aqui está uma lista de outras opções: [Editores Python][]. Uma vez que tudo esteja instalado, você irá escrever os seus primeiros programas, "Olá Mundo" em Python e HTML. + +## A linguagem de programação Python + +A linguagem de programação que usaremos nesta série de lições é Python, uma linguagem livre e de código aberto. +A menos que seja observado o contrário, usaremos a versão **Python 3** daqui em diante. +A versão 2 não é mais suportada, mas ainda pode estar sendo usada em projetos ou lições mais antigas. + +[Python 3 tem algumas poucas diferenças na formatação](https://sebastianraschka.com/Articles/2014_python_2_3_key_diff.html) (pense em regras gramaticais). Assim, fique atento se você encontrar exemplos online que ainda utilizam o Python 2. Esses exemplos podem não funcionar nas versões atuais do Python. + + +## Faça backups do seu trabalho! + +Antes de fazer o download ou instalar qualquer novo software, é crucial que você faça backups do seu trabalho. Você deve fazer backup de todo o seu computador pelo menos uma vez por semana, e de preferência com uma frequência ainda menor. É também uma boa ideia fazer backups fora do seu ambiente local pois, dessa forma, fica salvaguardado caso algo aconteça com o seu computador, com a sua casa ou o seu escritório. Sites como o [Jungle Disk][] ou o [Dropbox][] fornecem opções de backup fáceis de usar e relativamente baratas. + +### Escolha o seu Sistema Operativo + +## Passo 1 – Instale e configure o Software + +Para que você consiga trabalhar nas técnicas aqui apresentadas, você precisará descarregar e instalar software disponível gratuitamente. +Nós fornecemos instruções para o Mac, Windows e Linux. Uma vez que você tenha instalado o software no seu Sistema Operativo, siga para a lição '[Noções básicas de páginas web e HTML][]'. Se você encontrar dificuldades com as nossas instruções ou achar que algo não funciona na sua plataforma, por favor nos informe. + +- [Instalação do Python para Mac][] +- [Instalação do Python para Windows][] +- [Instalação do Python para Linux][] + + [Linguagem de Programação Python]: https://www.python.org/ + [Analisador de HTML/XML Beautiful Soup]: https://www.crummy.com/software/BeautifulSoup/ + [Komodo Edit]: https://www.activestate.com/komodo-edit + [Editores Python]: https://wiki.python.org/moin/PythonEditors/ + [Jungle Disk]: https://www.jungledisk.com/ + [Dropbox]: https://www.dropbox.com/home + [Noções básicas de páginas web e HTML]: nocoes-basicas-paginas-web-html + [Instalação do Python para Mac]: instalacao-mac + [Instalação do Python para Windows]: instalacao-windows + [Instalação do Python para Linux]: instalacao-linux diff --git a/pt/licoes/introducao-jupyter-notebooks.md b/pt/licoes/introducao-jupyter-notebooks.md index 0f90e9b02e..1fd526c07b 100644 --- a/pt/licoes/introducao-jupyter-notebooks.md +++ b/pt/licoes/introducao-jupyter-notebooks.md @@ -1,421 +1,421 @@ ---- -title: "Introdução ao Jupyter Notebook" -slug: introducao-jupyter-notebooks -original: jupyter-notebooks -layout: lesson -collection: lessons -date: 2019-12-08 -translation_date: 2023-06-02 -authors: -- Quinn Dombrowski -- Tassie Gniady -- David Kloster -reviewers: -- Patrick Burns -- Jeri Wieringa -editors: -- Brandon Walsh -translator: -- Vânia Rosa -translation-editor: -- Jimmy Medeiros -translation-reviewer: -- Juliana Marques -- Caio Mello -review-ticket: https://github.com/programminghistorian/ph-submissions/issues/431 -difficulty: 1 -activity: presenting -topics: [python, website] -abstract: Jupyter Notebook fornece um ambiente onde você pode trabalhar com facilidade o seu código na linguagem Python. Esta lição descreve como instalar o software Jupyter Notebook, como executar e criar ficheiros para o Jupyter Notebook. -avatar_alt: O planeta Júpiter -doi: 10.46430/phpt0043 ---- - -{% include toc.html %} - -## Introdução - -Quando a computação é uma parte intrínseca de sua prática de pesquisa, como você publica um argumento acadêmico de forma que torne o código tão acessível e legível como a prosa que o acompanha? Na área das humanidades, a publicação de uma pesquisa assume principalmente a forma de prosa escrita, artigo ou monografia. Embora as editoras estejam cada vez mais abertas à inclusão de códigos suplementares ou outros materiais, tal arranjo inerentemente os relega a um estatuto secundário relativo ao texto escrito. - -E se você pudesse publicar sua pesquisa em um formato que desse um peso equilibrado entre a prosa e o código? A realidade das atuais diretrizes de publicação acadêmica significa que a separação forçosa do seu código e da argumentação pode ser uma necessidade, e sua reunificação pode ser impossível sem que se navegue por numerosos obstáculos. Atualmente o código é tipicamente publicado em separado no GitHub ou em outro repositório, caso no qual os leitores têm que procurar uma nota de rodapé no texto para descobrir quais scripts estão sendo referenciados, encontrar a URL do repositório, acessar a URL, procurar os scripts, baixá-los e também os ficheiro(s) de dados associados, e então executar os códigos. No entanto, se você tiver os direitos e permissões necessários para republicar o texto de sua pesquisa em outro formato, o Jupyter Notebook fornece um ambiente onde código e prosa podem ser justapostos e apresentados com igual peso e valor. - -Os Jupyter Notebooks têm visto uma adoção entusiástica na comunidade de ciência de dados, a ponto de cada vez mais substituir o Microsoft Word como um ambiente padrão de escrita da pesquisa. Dentro da literatura de humanidades digitais, pode-se encontrar referência a Jupyter Notebooks (separados do iPython, ou Python interativo, notebooks em 2014) desde 2015. - -Os Jupyter Notebooks também ganharam força nas humanidades digitais como uma ferramenta pedagógica. Diversos tutoriais do Programming Historian, como [Mineração de texto em Python através do leitor de recursos HTRC](/en/lessons/text-mining-with-extracted-features), e [Extraindo páginas ilustradas de bibliotecas digitais com python](/pt/licoes/extrair-paginas-ilustradas-com-python#jupyter-notebooks), assim como outros materiais pedagógicos para oficinas fazem referência à colocação de código em um Jupyter Notebook ou ao uso do Jupyter Notebook para orientar os estudantes, permitindo que eles remixem e editem o código livremente. O formato do notebook é ideal para o ensino, especialmente quando os estudantes têm diferentes níveis de proficiência técnica e de conforto com escrita e edição dos códigos. - -O objetivo dos Jupyter Notebooks é fornecer uma interface mais acessível para o código usado em pesquisa ou práticas pedagógicas com suporte digital. Ferramentas como os Jupyter Notebook são menos significativas para aprender ou ensinar no vácuo, porque os Jupyter Notebooks em si não fazem nada para promover diretamente a pesquisa ou a pedagogia. Antes de começar esta lição, pense no que você quer obter usando Jupyter Notebooks. Deseja organizar o fluxo de trabalho do seu projeto? Você quer trabalhar analisando seus dados, acompanhando as coisas que você tenta ao longo do caminho? Você quer que os leitores da sua pesquisa possam seguir os lados teóricos e técnicos do seu argumento sem alternar entre um PDF e uma pasta de scripts? Quer ministrar oficinas de programação mais acessíveis aos participantes com uma gama de conhecimentos técnicos? Você quer usar ou adaptar notebooks que outras pessoas escreveram? Tenha seu objetivo em mente enquanto você trabalha nesta lição. Dependendo de como você imagina usar Jupyter Notebooks, você pode ser capaz de pular seções que são mais aplicáveis em outro contexto. - -## Metas de lição - -Nesta lição você aprenderá: - -- O que são Jupyter Notebooks - -- Como instalar, configurar e usar o pacote de software do Jupyter Notebook - -- Quando os cadernos podem ser úteis em pesquisas e contextos pedagógicos - -Para esta lição, vamos trabalhar em um cenário de uso de Jupyter Notebooks para analisar dados e, em seguida, adaptar esse mesmo notebook e dados para uso em sala de aula. A aula também abordará temas mais avançados relacionados aos Jupyter Notebooks, tais como: - -- Usando Jupyter Notebook para linguagens de programação que não sejam Python - -- Convertendo o código Python existente em Jupyter Notebooks - -- Usando Jupyter Notebooks para ampliar a capacidade computacional em ambientes como clusters de computação de alto desempenho - -## Pré-requisitos - -Esta lição é adequada para iniciantes intrépidos, assumindo pouca experiência técnica anterior. - -Na verdade, o Jupyter Notebook é um ótimo recurso para pessoas que estão aprendendo a escrever código. - -Dependendo do notebook que você quer executar, você pode precisar [instalar alguns módulos Python com pip](/pt/licoes/instalacao-modulos-python-pip), que assume alguma familiaridade com a linha de comando (para [windows aqui](/en/lessons/intro-to-powershell), ou [Mac/Linux aqui](/en/lessons/intro-to-bash) (em inglês)). - -A lição é escrita usando o Jupyter Notebook 6.0, mas a interface do usuário e a funcionalidade do software tem sido bastante consistente entre as versões. - -## Computação Letrada - -A relação entre código legível por computador e texto legível por humanos ganhou visibilidade dentro da ciência da computação na década de 1970, quando Donald Knuth propôs o paradigma da "programação letrada" (ou “programação alfabetizada”). Em vez de organizar o código de acordo com os requisitos que privilegiam a execução do código pelo computador, a programação letrada trata um programa como literatura compreensível aos seres humanos, priorizando o próprio processo de pensamento do programador. A programação letrada projetada por Knuth assume a forma de prosa escrita, com código acionável por computador incorporado em macros (um formato abreviado para escrever código). Ferramentas de programação letrada são usadas para gerar duas saídas do programa letrado: código "emaranhado" que pode ser executado pelo computador e documentação formatada "tecida".[^1] - -Fernando Pérez, o criador do ambiente de programação iPython que acabou se tornando o Projeto Jupyter, cunhou o termo computação letrada para o modelo usado pelos Jupyter Notebooks: - -> Um ambiente de computação letrado é aquele que permite aos usuários não apenas executar comandos, mas também armazenar os resultados desses comandos em um formato de documento literário, juntamente com figuras e com texto em formato livre que pode incluir expressões matemáticas formatadas. Na prática, ele pode ser visto como uma mistura de um ambiente de linha de comando, como o shell Unix, com um processador de texto, uma vez que os documentos resultantes podem ser lidos como texto, mas contêm blocos de código que foram executados pelo sistema computacional subjacente.[^2] - -Jupyter não é nem o primeiro e nem o único exemplo de cadernos computacionais. Já na década de 1980, interfaces de notebook estavam disponíveis através de softwares como Wolfram Mathematica e MATLAB. Em 2013, Stéfan Sinclair e Geoffrey Rockwell propuseram "cadernos Voyant" baseados no modelo de Mathematica, que exporia algumas das suposições que sustentam as [Ferramentas Voyant](https://perma.cc/9M5K-JWU7) e as tornaram configuráveis pelo usuário.[^3] Eles desenvolveram ainda esse conceito em [A Arte da Análise de Texto Literário Cadernos Spyral](https://perma.cc/53HW-GGSJ). - - -Jupyter ganhou força em muitos campos como um ambiente de código aberto compatível com inúmeras linguagens de programação. O nome Jupyter é uma referência às três linguagens principais suportadas pelo projeto (Julia, Python e R), mas [núcleos estão disponíveis que tornam o Jupyter compatível com dezenas de idiomas](https://perma.cc/B448-XMJQ), incluindo Ruby, PHP, Javascript, SQL e Node.js. Pode não fazer sentido implementar projetos em todas essas línguas usando Jupyter Notebooks (por exemplo, Omeka não permitirá que você instale um plugin escrito como um Jupyter Notebook), mas o ambiente Jupyter ainda pode ser valioso para documentar códigos, ensinar linguagens de programação e fornecer aos alunos um espaço onde eles podem facilmente experimentar com exemplos fornecidos. - - -## Instalando o Jupyter Notebooks - -Desde o final de 2019, existem dois grandes ambientes que você pode usar para executar Jupyter Notebooks: O Jupyter Notebook (não confundir com os próprios ficheiro(s) do Jupyter Notebook, que possuem uma extensão `.ipynb`), e o mais novo Jupyter Lab. O Jupyter Notebook é amplamente usado e bem documentado, e fornece um navegador simples de ficheiro(s), juntamente com o ambiente para criar, editar e executar os notebooks. Jupyter Lab é mais complexo, com um ambiente de usuário mais parecido com um Ambiente de Desenvolvimento Integrado (discutido em tutoriais anteriores do Programming Historian para [Windows](/pt/licoes/instalacao-windows), [Mac](/pt/licoes/instalacao-mac) e [Linux](/pt/licoes/instalacao-linux)). Embora o Jupyter Lab seja feito para, eventualmente, substituir o Jupyter Notebook, não há indicação de que o Jupyter Notebook deixará de ser suportado tão cedo. Devido à sua simplicidade comparativa e facilidade de uso para iniciantes, este tutorial usa o Jupyter Notebook como o software para executar ficheiro(s) de notebook. Ambos os pacotes de software estão incluídos na Anaconda, descrita abaixo. É mais fácil usar a Anaconda para instalar o Jupyter Notebook, mas se você já tem Python instalado em seu sistema e não quer lidar com o grande pacote Anaconda, você pode executar `pip3 install jupyter` (para Python 3). - - -## Anaconda - -Anaconda é uma distribuição gratuita de código aberto de Python e R que vem com mais de 1.400 pacotes, o gerenciador de pacotes Conda para instalação de pacotes adicionais, e o navegador Anaconda, que permite gerenciar ambientes (por exemplo, você pode instalar diferentes conjuntos de pacotes para diferentes projetos, para que eles não causem conflitos uns para os outros) usando uma interface gráfica. Após a instalação da Anaconda, você pode usar o navegador Anaconda para instalar novos pacotes (ou `conda install` através da linha de comando), mas muitos pacotes estão disponíveis apenas através de pip (ou seja, usando `pip install` através da linha de comando ou em seu Jupyter Notebook). - -Para a maioria dos propósitos, você deve optar pela versão Python 3 do Anaconda, mas alguns códigos ainda podem ser escritos em Python 2. Nesta lição, você usará Python 3. O instalador Anaconda tem mais de 500 MB, e após a instalação pode levar mais de 3 GB de espaço no disco rígido, por isso certifique-se de que você tem espaço suficiente no computador e uma conexão de rede rápida antes de começar. - -
    -Se o espaço do disco rígido é uma preocupação, você pode empacotar um notebook para que ele possa ser executado usando recursos gratuitos de computação em nuvem, em vez de fazer com que os usuários instalem o Anaconda. Isso pode ser especialmente útil em situações de oficina. Veja a seção abaixo. -
    - -Para baixar e instalar a Anaconda, acesse o [site da Anaconda](https://www.anaconda.com/data-science-platform). Certifique-se de ter clicado no ícone do seu sistema operacional (que deve alterar o texto Anaconda [número da versão] para [sistema operacional selecionado], de forma a indicar o seu sistema operacional) e, em seguida, clique no botão Baixar na caixa para a versão atual do Python 3. Se você estiver no Windows, deve baixar um ficheiro `.exe`; em Mac, é `.pkg`; no Linux, é `.sh`. - -Abra normalmente o ficheiro para instalar o software em seu sistema operacional. Mais detalhes de instalação estão disponíveis nos [documentos da Anaconda](https://docs.anaconda.com/anaconda/install/), incluindo como instalar a Anaconda através da linha de comando em cada sistema operacional. Se o computador não conseguir abrir o ficheiro que você baixou, certifique-se de selecionar o sistema operacional correto antes de baixar o instalador. No Windows, não deixe de escolher a opção de "Adicionar Anaconda à PATH Variable" durante o processo de instalação, ou você não poderá lançar Jupyter Notebook a partir da linha de comando. - -## Usando Jupyter Notebook para pesquisa - -Esta lição descreve como você pode inicialmente escrever um Jupyter Notebook para análise de dados como parte de um projeto de pesquisa e, em seguida, adaptá-lo para uso em sala de aula. Embora este exemplo em particular seja extraído de estudos de fãs, ele se concentra na conversão de datas, que é amplamente necessária na análise de dados históricos e literários. - -## Abrindo o Jupyter Notebook - -Supondo que você já tenha instalado a Anaconda como descrito acima, você pode abrir o Anaconda Navigator como qualquer outro aplicativo de software (você pode fechar o prompt sobre a criação de uma conta na nuvem do Anaconda; você não precisa de uma conta para trabalhar com o Anaconda). Na tela inicial, você deve ver um conjunto de ícones e breves sinopses sobre cada aplicativo incluído no Anaconda. - -Clique no botão "Iniciar" sob o ícone do Jupyter Notebook. - -{% include figure.html filename="tr-pt-introducao-jupyter-notebooks-1.png" alt="Imagem com captura de tela do interface do Anaconda Navigator" caption="Figura 1. Interface do Anaconda Navigator" %} - -Se você preferir usar a linha de comando em vez do navegador Anaconda, uma vez que você tenha o Anaconda instalado, você deve ser capaz de abrir uma nova janela Terminal (Mac) ou Command Prompt (Win) e executar `jupyter notebook` para iniciar o navegador web com o aplicativo Jupyter Notebook. Se você estiver usando a linha de comando para iniciar o Jupyter Notebook, preste atenção no diretório em que você está quando o iniciar. Essa pasta se torna o diretório doméstico que aparecerá imediatamente na interface Jupyter Notebook, conforme descrito abaixo. - -As duas abordagens abrirão uma nova janela ou guia no seu navegador padrão com a interface Jupyter Notebook. O Jupyter Notebook é baseado no navegador: você só interage com ele através do seu navegador, mesmo quando o Jupyter Notebook está sendo executado no seu próprio computador. - -
    Se você estiver usando notebooks que importam pacotes Python que têm dependências de versões específicas de outros pacotes, você deve configurar um ambiente para usar com esses notebooks, para que você não lide com conflitos de versão (por exemplo, se um notebook requer a versão 1.0 de um pacote, e outro requer a versão 2.0). [A documentação do Anaconda Navegador para Gerenciar Ambientes](https://perma.cc/E9TC-YMCU) (ou, se preferir usar a linha de comando, a [documentação Conda](https://perma.cc/KHB8-U3CT)) fornece instruções passo a passo para criar, atualizar e ativar um ambiente. Para lançar o Jupyter Notebook dentro de um ambiente específico, você precisa primeiro ativar esse ambiente.
    - -## Navegando na interface do Jupyter Notebook - -A interface do gerenciador de ficheiro do Jupyter Notebook é a principal maneira de abrir um ficheiro Jupyter Notebook (.ipynb). Se você tentar abrir em um editor de texto simples, o notebook será exibido como um ficheiro JSON, não com blocos interativos de código. Para visualizar um notebook através da interface Jupyter, você tem que abrir o Jupyter Notebook primeiro (que será exibido em uma janela do navegador), e abrir o ficheiro de dentro do Jupyter Notebook. Infelizmente, não há como definir o Jupyter Notebook como o aplicativo de software padrão para abrir `ficheiro.ipynb` quando você clica duas vezes neles. - -Quando você lança o Jupyter Notebook do navegador Anaconda, ele exibe automaticamente o diretório doméstico. Este é geralmente o diretório com seu nome de usuário em um Mac (/Users/seu nome de usuário). Em um PC geralmente é `C: \` . Se você abrir o Jupyter Notebook a partir da linha de comando, ele exibirá o conteúdo da pasta em que você estava quando o lançou (usando a linha de comando, você também pode lançar diretamente um notebook específico, por exemplo, `jupyter-notebook-example.ipynb`.) - -Para evitar desordenar esta pasta, você pode fazer uma nova pasta dentro deste diretório para seus notebooks. Você pode fazer isso na sua interface usual de gerenciamento de ficheiro(s)(Finder no Mac, ou File Explorer no Windows), ou dentro do próprio Jupyter Notebook, já que o Jupyter Notebook, assim como o Google Drive, fornece uma interface de gerenciamento de ficheiro(s) dentro de um navegador, bem como uma interface de menu e de barra de ferramentas para a criação de ficheiro(s). Para adicionar uma nova pasta no Jupyter Notebook, clique em Novo no canto superior direito e escolha Pasta. Isso criará uma nova pasta chamada "Pasta Sem Título". Para alterar o nome, clique na caixa de seleção à esquerda da "Pasta Sem Título", em seguida, clique no botão "Renomear" que aparece na guia "ficheiro(s)". Nomeie os notebooks da pasta. Clique nele para abrir essa pasta. - -## Upload dos dados do exemplo -O ficheiro CSV de exemplo para esta lição é um extrato de metadados de fan fiction de Harry Potter coletados do site de fanfic italiano https://efpfanfic.net, depois limpos usando uma combinação de [expressões regulares](/en/lessons/understanding-regular-expressions) e [OpenRefine](/pt/licoes/limpar-dados-openrefine). O CSV tem três colunas: a classificação da história (similar a uma classificação de filme), a data que foi originalmente publicada, e a data mais recente de atualização. As opções de classificação são verde (verde), giallo (amarelo), arancione (laranja), e rosso (vermelho). A publicação e as datas atualizadas são criadas automaticamente; quando consistente a história é postada no site ou atualizado, assim você pode tomá-las como consistentes. - -Baixe o [ficheiro CSV](/assets/jupyter-notebooks/ph-jupyter-notebook-example.csv). - -Dentro do navegador de ficheiro(s) Jupyter Notebook, você deve estar dentro do diretório de notebooks que acabou de criar. No canto superior direito, clique no botão "Carregar" e carregue o ficheiro CSV de amostra. Será mais fácil de acessar se estiver no mesmo diretório do Jupyter Notebook que você criará na próxima etapa a fim de converter as datas. - -{% include figure.html filename="tr-pt-introducao-jupyter-notebooks-2.png" alt="Imagem com captura de tela sobre o upload de ficheiros no interface Jupyter Notebook" caption="Figura 2. Upload de ficheiro(s) na interface Jupyter Notebook" %} - -Observe que esta não é a única maneira de fazer os ficheiro(s) aparecerem no gerenciador de ficheiro(s) do Jupyter Notebook. A pasta de notebooks que você criou é um diretório regular em seu computador, e assim você também pode usar sua interface usual de gerenciamento de ficheiro(s) (por exemplo, Finder no Mac, ou File Explorer no Windows) para colocar ficheiro(s) `.ipynb` e/ou de dados neste diretório. Os Jupyter Notebooks usam a localização do próprio ficheiro do notebook (o `ficheiro.ipynb`) como o caminho de partida padrão. Para oficinas e cursos, pode fazer sentido criar uma pasta onde você pode armazenar o notebook, qualquer imagem anexada e os dados com os quais você vai trabalhar, todos juntos. Se tudo não estiver na mesma pasta, você terá que incluir o caminho ao referenciá-lo ou usar o código Python dentro do notebook para alterar o diretório de trabalho. - -## Criando um novo notebook - -Dentro da pasta de notebooks, crie um novo Jupyter Notebook para converter as datas para o seu projeto de pesquisa. Clique no botão "new" no canto superior direito da interface do gerenciador de ficheiro(s) do Jupyter Notebook. Se você acabou de instalar o Anaconda como descrito acima, sua única opção será criar um Jupyter Notebook usando o _kernel_ Python 3 (o componente de backend que realmente executa o código escrito no notebook), mas vamos discutir abaixo como adicionar kernels para outras linguagens de programação. Clique em "Python 3", e o Jupyter Notebook abrirá uma nova guia com a interface para os próprios Jupyter Notebooks. Por padrão, o notebook será chamado de "Sem título"; você pode clicar nesse texto na parte superior da tela para renomeá-lo. - -{% include figure.html filename="tr-pt-introducao-jupyter-notebooks-3.png" alt="Imagem com captura de tela da interface do Jupyter Notebook para criar novo ficheiro" caption="Figura 3. Criando um novo Jupyter Notebook" %} - -## Trabalhando em Jupyter Notebooks - -Um notebook é composto de células: caixas que contêm código ou texto legível por humanos. Cada célula tem um tipo, que pode ser selecionado a partir das opções drop-down do menu (“menu deslizante”). A opção padrão é "Code"; as caixas de textos legíveis por humanos devem usar o tipo "Markdown" e precisarão ser escritas usando as convenções de formatação do Markdown. Para saber mais sobre Markdown, veja a lição do Programming Historian “[Introdução ao Markdown](/pt/licoes/introducao-ao-markdown)”. - -Quando você cria um novo Jupyter Notebook, a primeira célula será uma célula de código. No topo da interface do Jupyter Notebook está uma barra de ferramentas com funções que se aplicam à célula selecionada atualmente. A primeira função do menu deslizante é, por padrão, "Code". Clique nesse menu e selecione "Markdown" (você também pode usar um atalho de teclado, _esc + m_, para alterar a célula atual para Markdown, e _esc + y_ muda de volta para uma célula de código). Vamos começar este caderno com um título e uma breve explicação do que o caderno está fazendo. No momento, isso é apenas para sua própria memória e referência; você não quer investir muito em prosa e formatação nesta fase do projeto, quando você não sabe se você vai acabar usando este código como parte de seu projeto final, ou se você vai usar uma ferramenta ou método diferente. Mas ainda pode ser útil incluir algumas células de marcação com notas para ajudá-lo a reconstruir seu processo. - -Cole o seguinte na primeira célula. Se a primeira linha não aparecer com uma fonte grande (como um cabeçalho), certifique-se de ter selecionado "Markdown" no menu suspenso na parte superior. - - -``` -# Fanfic date conversion -Converting published & updated dates for Italian fanfic into days of the week. -``` - -{% include figure.html filename="tr-pt-introducao-jupyter-notebooks-4.png" alt="Imagem com captura de tela da interface do Jupyter Notebook para editar Markdown" caption="Figura 4. Editando a célula Markdown em um Jupyter Notebook" %} - -Quando você está editando uma célula, você pode usar _Ctrl + Z_ (Win) ou _Command + Z_ (Mac) para desfazer as alterações que você fez. Cada célula mantém seu próprio histórico de edição; mesmo que você passe para uma célula diferente e faça edições lá, você pode posteriormente clicar de volta na primeira célula e desfazer suas alterações anteriores lá, sem perder as alterações realizadas para a segunda célula. - -Para deixar o modo de edição e "executar" esta célula (para uma célula Markdown, isso não faz nada, apenas move o cursor mais para baixo no notebook), você pode clicar na barra de ferramentas ou pressione Ctrl+Enter (Ctrl+Return no Mac). Se você quiser retomar a edição mais tarde, você pode clicar duas vezes nela ou selecionar a célula (que mostrará uma linha azul vertical à esquerda uma vez selecionada) clicando-a uma vez e, em seguida, pressionando a tecla Enter (Win) ou Return (Mac). Para deixar o modo de edição, você pode clicar na barra de ferramentas ou pressionar Ctrl+Enter (Ctrl+Return no Mac). Se você quiser executar sua célula atual e adicionar uma nova célula (por padrão, uma célula de código) imediatamente abaixo dela, você pode pressionar Alt+Enter (Option+Enter no Mac). - -Em seguida, você precisa descobrir como fazer a conversão. A busca por termos relevantes pode levá-lo a essa [discussão do StackOverflow](https://perma.cc/JG6H-KZAZ), e a primeira resposta envolve o uso do módulo Python datetime. Como primeiro passo, você precisa importar datetime, usando uma célula de código. Você também sabe que o seu ficheiro de entrada é um CSV, então você deve importar o módulo csv também. - -Para adicionar uma nova célula, clique no botão + (mais) na barra de ferramentas (ou use o atalho do teclado _esc + b_). Isso criará uma nova célula de código abaixo da célula que está atualmente selecionada. Crie uma nova célula de código e cole o código a seguir para importar um módulo Python: - - -``` -import datetime -import csv - -``` - -Pensando desde já na possibilidade de compartilhar este notebook ou parte dele, pode ser útil dividir as importações de módulos em células individuais, e colocar o código em si em outra célula, para que você possa incluir uma célula Markdown que explique o que cada uma delas está fazendo. - -Ambos os pacotes que você está importando para este notebook já estão instalados como parte do Anaconda, mas existem muitos pacotes de nicho relevantes para a pesquisa (por exemplo, o [Classic Languages Toolkit, CLTK](https://perma.cc/Q9Q8-9TNZ), para fazer análise de texto em línguas históricas) que não estão incluídos com a Anaconda, e não estão disponíveis através do _instalador conda_. Se você precisa de um pacote como esse, você tem que instalá-lo usando _pip_. Instalar pacotes de dentro do Jupyter notebook pode ser um pouco complicado, porque pode haver diferenças entre o kernel Jupyter que o notebook está usando, e outras versões do Python que você pode ter instalado no seu computador. Você pode encontrar uma longa discussão técnica sobre esses problemas neste [post de blog](https://perma.cc/N6M6-ZF5G). - -Se você está trabalhando em um notebook que deseja compartilhar, e ele inclui pacotes menos comuns, você pode incluir uma célula Markdown instruindo os usuários a instalar os pacotes com antecedência usando conda ou pip, ou você pode usar: - -``` -import sys -!conda install --yes --prefix {sys.prefix} YourModuleNameHere - -``` - -para instalar algo do notebook usando conda; a sintaxe `!` indica que o código está executando algo da linha de comando, em vez do kernel Jupyter. Ou, se o pacote não estiver disponível na conda (muitos pacotes de nicho relevantes para a pesquisa não estão), você pode usar `pip`: - -``` -import sys - -!{sys.executable} -m pip install YourModuleNameHere - -``` - -Se você não tinha instalado o Python no computador antes de instalar o Anaconda para esta lição, talvez seja necessário adicionar o pacote pip para poder usá-lo para instalar outros pacotes. Você pode adicioná-lo através da GUI (interface gráfica do usuário) do navegador Anaconda, ou executar `conda install pip` a partir da linha de comando. - -Voltando ao nosso exemplo, em seguida adicione uma nova célula de código e cole o seguinte código (certifique-se de que incluiu os espaçamentos): - -``` -with open('ph-jupyter-notebook-example.csv') as f: - csv_reader = csv.reader(f, delimiter=',') - for row in csv_reader: - datetime.datetime.strptime(row[1], '%d/%m/%Y').strftime('%A') - print(row) -``` - -Clicar no botão 'play' na barra de ferramentas quando você tem uma célula de código selecionada executa o código dentro da célula (se você tentar executar este código depois de executar as declarações de importação, verá um erro: "ValueError: time data ‘1/7/18’ does not match format ‘%d/%m/%Y’". Não se preocupe, vamos depurar isso a seguir). - -Depois de executar uma célula de código, um número aparecerá entre colchetes à esquerda da célula. Este número indica a ordem em que a célula foi executada. Se você voltar e executar o celular novamente, o número é atualizado. - -Se um número não aparecer imediatamente ao lado da célula, você verá um asterisco entre os colchetes. Isso significa que a célula de código não terminou de funcionar. Isso é comum para códigos computação intensiva (por exemplo, processamento de linguagem natural) ou tarefas de longa duração, como extração de conteúdo na web. Sempre que uma célula de código está sendo executada, o favicon na guia do navegador do notebook muda para uma ampulheta. Se você quiser alterar as guias e fazer outra coisa enquanto o código estiver em execução, você pode saber que a ação anterior foi concluída quando a ampulheta muda de volta para o ícone do notebook. - - -{% include figure.html filename="tr-pt-introducao-jupyter-notebooks-5.png" alt="Imagem com captura de tela sobre a execução de código no Jupyter Notebook" caption="Figura 5. Executando uma célula de código em um Jupyter Notebook" %} - -``` -O Jupyter notebook funciona melhor se você executar as células sequencialmente. Às vezes, você pode obter erros ou saídas incorretas se executar as células fora de ordem ou tentar editar e executar iterativamente diferentes partes do notebook. Se você fez muitas alterações e executou blocos de código de forma não linear e descobrir que você está recebendo uma saída estranha, você pode redefinir o Jupyter Notebook clicando no _Kernel_ no menu e escolhendo _Restart & Clear Output_. Mesmo que você não tenha notado nada de estranho, é uma boa ideia utilizar o Restart & Clear Output em seu código, uma vez que você tenha terminado de escrevê-lo, para ter certeza de que o resultado está correto. -``` - -Depois de executar a segunda célula de código, você verá um erro. Para descobrir o que está acontecendo, você pode consultar a -[documentação para datação](https://perma.cc/S92Z-3QVM) que explica cada uma das diferentes opções de formatação. Lá, você verá que a única opção de valores para “dia” assume o uso de dois dígitos (ou seja, dias de um dígito são prefixados com um 0). Olhando para os dados do exemplo, os meses (listados em segundo lugar nesta ordem de data) já são acrescidos de zero, quando tem apenas um dígito, mas não os dias. Você tem duas opções: você pode tentar alterar os dados, ou você pode tentar alterar seu código. - -Digamos que você queira tentar uma abordagem diferente, mas quer deixar o que você fez até agora, no caso de você querer revisitar esse código, e talvez usá-lo depois de alterar os dados. Para lembrar do que aconteceu, adicione uma célula Markdown acima da sua segunda célula do código. Clique na primeira célula do código e clique no botão mais na barra de ferramentas. Se você clicar no botão de adição na barra de ferramentas depois de executar a última célula de código, a nova célula aparecerá na parte inferior do notebook. Você pode movê-la para onde quiser clicando no botão de seta para cima. Certifique-se de que está no modo Markdown e cole o seguinte texto: - -``` - ### Não funciona, precisa de datas precedidas por zero - [documentação do datetime](https://docs.python.org/2/library/datetime.html?highlight=strftime#strftime-and-strptime-behavior). - Modificar o ficheiro de origem? - -``` - -Lendo ainda mais na [discussão do StackOverflow](https://perma.cc/EN55-P57H), há outra abordagem que usa uma biblioteca diferente, dateutil, que parece ser mais flexível com os tipos de datas que ela aceita. Volte para a célula usada para importar módulos e edite-a para adicionar a nova biblioteca (em qualquer lugar dessa célula, desde que cada declaração de importação esteja em sua própria linha): - -``` -import dateutil - -``` - -Re-execute essa célula de código; note que o número ao lado da célula muda na segunda vez que você executá-lo. - -Agora crie uma nova célula Markdown na parte inferior do notebook e cole: - - -``` -#### tentando dateutil para analisar datas, conforme https://stackoverflow.com/a/16115575 - -``` - -Abaixo dele, adicione uma nova célula de código com o seguinte código (prestando atenção ao espaçamento, de modo que o código seja indentado assim como você vê abaixo): - -``` -with open('ph-jupyter-notebook-example.csv') as f: - csv_reader = csv.reader(f, delimiter=',') - for row in csv_reader: - parseddate = dateutil.parser.parse(row[1]) - print(parseddate) -``` - -Execute a célula com o código que você acabou de adicionar. Pode levar mais tempo; continue esperando até que o asterisco ao lado da célula de código se transforme em um número. O resultado deve mostrar a lista de datas de publicação, formatadas de forma diferente, com hífen em vez de barras, e com a adição das horas, minutos e segundos (como zeros, porque as datas registradas não incluem esses dados). À primeira vista, parece que funcionou, mas se você compará-lo mais de perto com o ficheiro de origem, você verá que o módulo dateutil não está sendo consistente em como analisa as datas. Datas em que o valor do dia é maior que 12 estão sendo analisadas corretamente (ele sabe que um valor maior que 12 não pode ser um mês), mas quando o valor da data é 12 ou menos, a data está sendo identificada com o mês primeiro. A primeira linha do ficheiro de origem tem a data 1/7/18, que é entendida como "2018-01-07 00:00:00". Na documentação para dateutil, você descobrirá que você pode [especificar `dayfirst=true`](https://perma.cc/W54E-SP5Z) para corrigir isso. Edite a última célula de código e altere a penúltima linha para ler: - -``` -parseddate = dateutil.parser.parse(row[1], dayfirst=True) - - ``` - -Quando você executar a linha novamente, você verá que todas as datas foram analisadas corretamente. - -Analisar a data é apenas o primeiro passo – você ainda precisa usar o módulo datetime para converter as datas em dias da semana. - -Exclua a última linha do bloco de código e substitua-a pelo seguinte (certificando-se de que você tenha o mesmo nível de recuo da última linha anterior, para ambas as linhas): - - -``` -dayofweek = datetime.date.strftime(parseddate, '%A') - -print(dayofweek) - -``` - -Execute o bloco de códigos novamente. Isso deve lhe dar uma lista de dias da semana. - -Agora que você tem código para analisar e re-formatar uma data, você precisa fazê-lo para ambas as datas em cada linha do seu ficheiro de origem. Porque você sabe que tem código funcionante na célula de código atual, se você não se sentir muito confortável com Python, você pode querer copiar a célula de código atual antes de fazer modificações. Selecione a célula que deseja copiar e clique no botão copiar na barra de ferramentas; o botão de colar irá colar a célula abaixo de qualquer célula atualmente selecionada. Fazer uma cópia permite que você faça livremente alterações no código, sabendo que você sempre pode voltar facilmente para uma versão que funciona. - -Se você não quiser resolver isso por conta própria, você pode copiar e colar esse código em uma nova célula de código ou substituir a célula de código atual: - -``` -#identifica o ficheiro fonte a ser aberto, chama-o f -with open('ph-jupyter-notebook-example.csv') as f: - #cria um ficheiro de saída (referido como "out" no notebook) para ser gravado - with open('ph-jupyter-notebook-example-dayofweek.csv', 'w') as out: - #define "csv_reader" como executando a função csv.reader no ficheiro - csv_reader = csv.reader(f, delimiter=',') - #define "csv_writer" como executando a função csv.writer para "out" (o ficheiro de saída) - csv_writer = csv.writer(out) - #para cada linha que está sendo lida pelo csv_reader... - for row in csv_reader: - #define "csv_reader" como executando a função csv.reader no ficheiro - csv_reader = csv.reader(f, delimiter=',') - #para cada linha que está sendo lida pelo csv_reader... - for row in csv_reader: - #cria uma lista chamada "values" com o conteúdo da linha - values = list(row) - #define "rating" como a primeira coisa na lista - #contagem em Python começa com 0, não 1 - rating = values[0] - #define "parseddatepub" como a segunda coisa (1, porque começamos com 0) na lista, - #convertido em um formato de data padrão usando dateutil.parser - #e quando essas datas são analisadas, o analisador deve saber - #que o primeiro valor na sequência é o dia - parseddatepub = dateutil.parser.parse(values[1], dayfirst=True) - #mesmo que acima para a data atualizada, a terceira coisa (2) na lista - parseddateupdate = dateutil.parser.parse(values[2], dayfirst=True) - #define "dayofweekpub" como parseddatepub (definido acima), convertido para o dia da semana - #%A é usado para mudar para o dia da semana - #Pode ver outros formatos aqui: https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior - dayofweekpub = datetime.date.strftime(parseddatepub, '%A') - #mesma coisa para data de atualização - dayofweekupdate = datetime.date.strftime(parseddateupdate, '%A') - #cria uma lista da classificação e as novas datas formatadas - updatedvalues = [rating, dayofweekpub, dayofweekupdate] - #escreve todos os valores nesta célula de código - csv_writer.writerow(updatedvalues) - print(updatedvalues) -``` - -Depois de executar este código, você terá um novo ficheiro ph-jupyter-notebook-exemplo-dayofweek.csv, com seus dados no formato que você precisa para a análise. - -Agora que você tem um código que funciona para converter as datas do formulário que você tem para o formulário que você precisa, você pode limpar as falsas partidas e notas para si mesmo. Você vai querer manter o primeiro código com as declarações de importação, e a primeira célula Markdown com o título e a descrição, mas você deve excluir outras células de código e Markdown que não são o seu código final. Para excluir uma célula, clique nela e clique no botão tesoura na barra de ferramentas. Se você excluir uma célula por engano, você pode clicar em Editar no menu e escolher "Desfazer excluir células". - -## Salvando, exportando e publicando Jupyter Notebooks - -O Jupyter salva automaticamente seu trabalho de forma periódica, criando "pontos de verificação". Se algo der errado com seu notebook, você pode reverter para um ponto de verificação anterior indo em "File", em seguida, "Revert to Checkpoint", e escolhendo um horário. Dito isto, ainda é importante salvar seu notebook (usando o botão de salvar), porque se você fechar e desligar o kernel do notebook (incluindo reiniciar o kernel), os pontos de verificação serão perdidos. - -Você também pode baixar o notebook (_File> Download as_) em vários formatos de ficheiro diferentes. Baixar o formato Notebook (`.ipynb`) é útil se você quiser compartilhar seu código em seu formato completo de notebook. Você também pode baixá-lo como código em qualquer linguagem em que seu notebook estiver (por exemplo, `.r` se em R ou `.py` se Python ou `.js` se JavaScript), como um ficheiro de `.html`, como um ficheiro de marcação (`.md`) ou como um PDF via LaTeX. Se você baixá-lo como código, as células Markdown se tornam comentários (se você quiser converter um ficheiro, `ficheiro.ipynb` para outro formato depois de baixá-lo, você pode usar a ferramenta [nbconvert](https://perma.cc/6J73-KCK5)). - -Se você está trabalhando em um projeto de pesquisa, você pode usar um Jupyter notebook, ou uma série de notebooks, ao longo do caminho para acompanhar seu fluxo de trabalho. Alguns estudiosos postam esses cadernos no GitHub, juntamente com slides ou PDFs de pôsteres e dados de origem (ou metadados, se os direitos autorais permitirem), para acompanhar apresentações e palestras. O GitHub renderiza versões não interativas de ficheiro(s) de notebook, para que possam ser visualizados dentro de um repositório. Alternativamente, você pode colar a URL de um repositório do GitHub que tem notebooks Jupyter em [nbviewer,](https://nbviewer.jupyter.org/) o que às vezes pode ser uma visualização mais rápida e confiável. Você pode querer incluir uma célula Markdown com uma citação recomendada para o seu Jupyter notebook, e uma referência para o repositório do GitHub onde ela está armazenada, especialmente se o seu notebook inclui código que outros possam reutilizar para análises semelhantes. - -O código que você acabou de desenvolver como parte desta lição pertence a algum lugar no meio de um projeto real. Se você estiver usando notebooks para documentar seu fluxo de trabalho, você pode optar por adicionar a nova célula de código a um notebook existente, em vez de baixá-lo como um notebook separado e autônomo. Os Jupyter notebooks podem ser particularmente úteis para documentar fluxos de trabalho de projetos quando você está trabalhando com colaboradores que só podem estar envolvidos por um curto período de tempo (como estagiários de graduação no período de férias escolares). Com colaboradores de curto prazo, é importante ajudá-los a entender e começar a usar os fluxos de trabalho do projeto sem muito tempo de iniciação, e os Jupyter notebooks podem definir esses fluxos de trabalho passo a passo, explicar onde e como os ficheiro(s) são armazenados e fornecer dicas para tutoriais externos e materiais de treinamento para ajudar os colaboradores que estão menos familiarizados com os fundamentos técnicos do projeto a serem iniciados. Por exemplo, dois projetos que usaram Jupyter notebooks para publicar fluxos de trabalho são o Projeto Realismo Socialista de Sarah McEleney e a [“mineração de texto da literatura infantil inglesa 1789-1914 para a representação de insetos e outros rastejantes assustadores”](https://perma.cc/JD8N-P79G). - -À medida que seu projeto progride, se você estiver publicando através de canais de acesso aberto e se seus conjuntos de dados podem ser compartilhados livremente, os Jupyter notebooks podem fornecer um formato ideal para tornar o código que sustenta seu argumento acadêmico visível, testável e reutilizável. Embora os periódicos e publicações possam não aceitar os Jupyter notebooks como um formato de submissão, você pode desenvolver uma "versão" do seu artigo que inclui o texto completo (como células Markdown),com células de código integradas ao fluxo da narrativa acadêmica como uma ilustração imediatamente acessada da análise que você está descrevendo. Você também pode incluir as células de código que compõem os fluxos de trabalho de preparação de dados como um apêndice, seja no mesmo notebook, ou em um separado. Integrar o código com o texto de um artigo acadêmico torna muito mais provável que os leitores realmente se envolvam com o código, já que eles podem simplesmente executá-lo dentro do mesmo caderno onde estão lendo o argumento. Alguns estudiosos, particularmente na Europa, também postam seus cadernos no [Zenodo](https://zenodo.org/), um ficheiro para dados de pesquisa, independentemente do país de origem, financiador ou disciplina. O Zenodo suporta configurações de dados de até 50 GB (vs. o limite de tamanho de ficheiro de 100 MB no Github), e fornece DOIs para o material carregado, incluindo notebooks. Alguns estudiosos combinam arquivamento no Zenodo para sustentabilidade com a publicação no GitHub para a possibilidade de encontrar, incluindo o Zenodo DOI como parte do ficheiro readme.md no repositório do GitHub que inclui os notebooks. Como exemplo, o caderno de workshop ["Análise de Dados Aplicados" por Giovanni Colavizza e Matteo Romanello para o DHOxSS 2019](https://perma.cc/6S7H-LQEA) é publicado no GitHub, mas inclui [um Zenodo DOI](https://doi.org/10.5281/zenodo.3352830). - -Embora a argumentação e o código totalmente integrados ainda sejam difíceis de encontrar devido à falta de um local para publicar esse tipo de trabalho, os estudiosos começaram a usar os Jupyter notebooks como um passo incremental mais interativo para publicações computacionais dinâmicas. José Calvo tem um exemplo de um [caderno acompanhando um artigo sobre estilizometria](https://perma.cc/Y9CK-CFK8) (em espanhol), e Jed Dobson publicou um [conjunto de cadernos](https://perma.cc/UDA3-467P) para acompanhar seu livro Critical Digital Humanities: The Search for a Methodology, que aborda diretamente os Jupyter Notebooks como objetos acadêmicos (p.39-41). - -## Usando Jupyter Notebook para ensinar - -O Jupyter Notebook é uma ótima ferramenta para ensinar programação, ou para ensinar conceitos como modelagem de tópicos ou vetores de palavras que envolvem programação. A capacidade de fornecer instruções e explicações como Markdown permite que os educadores forneçam notas detalhadas sobre o código através de marcação alternada e células de código, de modo que o texto de Markdown explique o código na célula logo abaixo. Isso é útil para oficinas práticas, pois as instruções e o código podem ser escritos com antecedência. Isso permite que os participantes abram o notebook, baixem um conjunto de dados e executem o código conforme está. Se você espera ministrar uma oficina onde os alunos terão diferentes níveis de familiaridade com a programação, você pode configurar o notebook para ter tarefas suplementares para os alunos que se sentem confortáveis em modificar o código. Ao mesmo tempo, mesmo os alunos que hesitam em tocar no código ainda poderão alcançar o resultado principal da oficina apenas executando células de código pré-escritas. - -Como outra abordagem, você também pode usar Jupyter notebooks para escrever código na medida em que o desenvolve. Em tal oficina, os alunos podem começar com um caderno em branco, e escrever o código junto com você. As células ajudam a segmentar o código como você o escreve, em vez de usar um editor de texto ou IDE (Ambiente de Desenvolvimento Integrado) que não quebra o código de forma tão clara e pode causar confusão, especialmente quando ensina iniciantes. - -Você pode usar Jupyter notebooks para tarefas em sala de aula dando instruções em Markdown e fazendo com que os alunos escrevam código em uma célula em branco com base nas instruções. Dessa forma, você pode criar uma tarefa de programação interativa que ensina aos alunos não apenas a sintaxe e o vocabulário de uma linguagem de programação, mas também pode explicar as melhores práticas de programação em geral. - -Se você já está usando Jupyter notebooks para documentar o fluxo de trabalho do seu projeto, você pode ser capaz de reformular esses cadernos de pesquisa para uso em sala de aula, como uma maneira de trazer sua pesquisa para a sala de aula. Este [exemplo de caderno pedagógico](/assets/jupyter-notebooks/ph-jupyter-notebook-example.ipynb) é um híbrido de algumas das abordagens pedagógicas descritas acima. A primeira seção do caderno destina-se a estudantes que têm pouca ou nenhuma experiência anterior executando o código; o principal resultado do aprendizado é comparar o tempo necessário para converter manualmente formatos de dados, em comparação com fazê-lo com código. Você poderia usar este caderno para uma sessão de laboratório prática em uma introdução à humanidades digitais ou história digital, onde todos os alunos instalam Anaconda e aprendem o básico do Jupyter Notebook. Se a turma tem uma mistura de alunos sem formação técnica e alunos com exposição prévia ao Python, você pode orientar os alunos com experiência de programação a trabalhar em conjunto em grupos de dois ou três para propor soluções para os prompts na segunda parte do notebook. Tenha em mente que se você usar uma tarefa de classe como esta como uma forma de fazer com que os alunos de ciência da computação escrevem código que ajude seu projeto de pesquisa, eles devem ser creditados como colaboradores e reconhecidos em publicações subsequentes vindas do projeto.[^4] - -Existem muitos cursos e workshops de 'Introdução ao Python' nas Humanidades Digitais que utilizam Jupyter Notebook (incluindo [Introdução ao Python e Desenvolvimento web com Python para as Humanidades](https://perma.cc/ANL2-K7SM) by Thibault Clérice, traduzido do material por Matthew Munson). O Jupyter Notebook também é comumente usado em oficinas de análise de texto, como a [oficina de vetores de palavras na DH 2018](https://perma.cc/5UZ9-25XW), ministrada por Eun Seo Jo, Javier de la Rosa e Scott Bailey. - -Ensinar com Jupyter Notebook nem sempre tem que envolver o processo demorado de baixar e instalar a Anaconda, especialmente se você está imaginando ter apenas uma ou duas lições que envolvem notebooks. Se suas atividades em sala de aula com Jupyter notebooks envolvem o uso de dados de exemplo que você já preparou, e se você já escreveu pelo menos parte do código, você pode querer explorar a execução de Jupyter Notebooks usando recursos gratuitos de computação em nuvem, desde que seus alunos tenham a garantia de ter conectividade confiável com a internet em sala de aula. Rodar notebooks na nuvem também fornece um ambiente consistente para todos os alunos, poupando você de ter que negociar diferenças entre Windows e Mac, ou fornecer uma alternativa para estudantes cujos laptops não têm espaço ou memória para executar Anaconda efetivamente. - -Como as opções estão evoluindo rapidamente, é melhor usar seu mecanismo de busca favorito para encontrar uma lista mais atualizada com opções de computação em nuvem para Jupyter Notebook. Um projeto que tem visto uma absorção particular entre usuários acadêmicos de notebooks é o [MyBinder](https://mybinder.org/). Nele você levará um repositório do GitHub que contém dados relacionados a ficheiro(s) `jupyter.ipynb` (imagens incorporadas, conjuntos de dados que você deseja usar os notebooks, etc.), as informações sobre pacotes e dependências necessários (em um `requisito.txt` ou `ficheiro-environment.yml`) e torná-lo incializável usando um servidor de nuvem. Uma vez que você tenha o pacote MyBinder até o seu repo GitHub, você pode adicionar um "crachá" binder ao ficheiro readme para o repo. Quem estiver vendo o relatório pode lançar o notebook diretamente do seu navegador, sem ter que baixar ou instalar nada. - -Como os dados que o notebook precisa acessar devem ser incluídos no repo, isso não funcionará para todas as situações (por exemplo, se os dados não podem ser redistribuídos legalmente no GitHub, excede o tamanho máximo de ficheiro(s) do GitHub e não podem ser baixados de outros lugares como parte da configuração do ambiente Binder, ou se você quiser que as pessoas usem o notebook com seus próprios dados), mas é uma ótima opção para oficinas ou aulas onde todos estão trabalhando com os mesmos dados compartilháveis. - -Se você quiser começar a explorar opções de nuvem, Shawn Graham criou [alguns modelos para configurar notebooks Python e R Jupyter para uso no Binder](https://perma.cc/T25E-BFH4). - -Finalmente, se você precisa manter seus notebooks fora da nuvem (por exemplo, devido a dados sensíveis ou de outra forma restritos), mas quiser fornecer um ambiente consistente para todos os seus alunos, você pode explorar o [JupyterHub,](https://perma.cc/8EH7-N22K) que tem sido adotado como infraestrutura técnica central para um número crescente de programas de ciência de dados. - -## Convertendo códigos Python - -Mesmo que você goste da ideia de usar Jupyter Notebooks, qualquer conversão de formato requer trabalho adicional. Se você já tem seu código escrito como scripts Python, a conversão para Os Jupyter Notebooks é bastante simples. Você pode copiar e colar o código do seu ficheiro.py em uma única célula de código de um novo notebook e, em seguida, dividir a célula de código em segmentos e adicionar células de Markdown adicionais conforme necessário. - -Alternativamente, pode ser mais fácil segmentar à medida que você transfere o código, copiando um segmento de cada vez em uma nova célula de código. Qualquer método funciona e é uma questão de preferência pessoal. - -Há também ferramentas como o [pacote 'p2j'](https://perma.cc/5YUE-YBH7) que convertem automaticamente o código Python existente em notebooks Jupyter, seguindo um conjunto documentado de convenções (por exemplo, transformando comentários em células Markdown). - -## Cadernos Jupyter para outras linguagens de programação - -Os Jupyter Notebooks permitem que você use muitas linguagens de programação diferentes, incluindo R, Julia, JavaScript, PHP ou Ruby. Uma lista atual de linguagens disponíveis pode ser encontrada na página do [Jupyter Kernels](https://perma.cc/B448-XMJQ) GitHub. - -Enquanto o Python é suportado por padrão quando você instala o Jupyter Notebook através da Anaconda, as outras linguagens de programação precisam ter seus núcleos de linguagens instalados antes que eles possam ser executados no Jupyter Notebook. As instruções de instalação são diferentes para cada núcleo de linguagem, por isso é melhor apenas encontrar e seguir as instruções para a sua linguagem preferida. Pelo menos para R, isso é relativamente simples. A página Jupyter Kernels GitHub tem links para instruções para todos os kernels de linguagens disponíveis. - -Uma vez que você tenha o kernel para a linguagem desejada instalado, você pode executar cadernos escritos nessa linguagem de programação, ou você pode criar seus próprios cadernos que executam essa linguagem. Cada linguagem com um kernel instalado em seu computador estará disponível como uma opção quando você criar um novo notebook como descrito acima. - -Como exemplo de um notebook R, [veja esta adaptação jupyter do código R de Andrew Piper de "Enumerações"](https://perma.cc/656B-U9SB). - -## Dimensionando a computação com Jupyter Notebooks - -Especialmente se você é novo em programar em Python, apenas conseguir qualquer coisa para trabalhar pode parecer uma vitória. No entanto, se você começar a trabalhar com conjuntos de dados maiores, poderá descobrir que algumas das “soluções” iniciais encontradas (como usar `readlines()` para ler um ficheiro de texto linha por linha) se tornam computacionalmente ineficientes, a ponto de causar problemas. Uma maneira de começar a entender as ineficiências em seu código é adicionar `%%timeit` ao topo de uma célula. O notebook escolherá um número de iterações para executar o código, dependendo da complexidade da tarefa, imprimirá o número de iterações e o tempo médio. Fazer várias iterações, em vez de apenas uma, pode ser útil para contabilizar pequenos atrasos no âmbito do sistema (por exemplo, se seu laptop estiver momentaneamente atolado com outros processos). Você pode colocar `%timeit` na frente da linha. Tenha cuidado com a ordenação significativa: ordenar uma aplicação pequena de muito mais tempo para a primeira iteração do que para a segunda, depois que a lista já estiver em ordem. Em casos como a classificação de listas em que não faz sentido medir várias iterações ou para tarefas de longa duração onde pequenos atrasos no sistema não terão um impacto significativo, você pode usar `%%time` no topo de uma célula ou `%time` na frente de uma linha, que mede o tempo que uma única execução leva. Esses comandos fazem parte de uma família de “comandos mágicos” integrados disponíveis em Jupyter Notebooks. Veja a [documentação do Jupyter](https://perma.cc/ED9F-DNDA) para saber de mais detalhes. - -Ter alguma ideia de aumento do tempo previsto para ser implementado é um requisito necessário para aumentar o tempo dos clusters em uso, como no caso dos clusters de programação de alto desempenho (HPC) financiados de forma centralizadamente, disponíveis em muitas instituições. A maioria esmagadora dos pesquisadores que usam esses recursos está nas ciências duras, mas geralmente qualquer membro do corpo docente pode solicitar acesso. É possível que você também possa ter acesso a recursos de HPC regionais ou nacionais. Esses recursos de computação podem acelerar significativamente grandes trabalhos de computação, especialmente tarefas como modelagem 3D que podem tirar proveito de nós computacionais com poderosas unidades de processamento gráfico (GPUs). Aprender a usar clusters HPC é um tópico suficientemente grande para sua própria lição, mas os notebooks Jupyter podem permitir que você pegue um atalho. Alguns grupos de computação de pesquisa oferecem maneiras mais fáceis para os pesquisadores executarem Jupyter Notebooks usando recursos de cluster HPC, e você pode encontrar [vários guias e exemplos de uso geral](https://perma.cc/A5R4-9ZD7) para fazê-lo. Se você conseguir acesso aos recursos do HPC, vale a pena contatar a equipe de TI que com computação para uma área de e pesquisar sobre como você pode executar o Jupyter Notebook caso você não lidou com sua redação a respeito no site da sua instituição. O TI que trabalha majoritariamente com pesquisa pode se comunicar de forma brusca do que você é de forma mais pessoal, mas não permite que a maioria dos humanos querem, porque usam a diversidade da sua base de usuários é importante para suas medidas de atuação na universidade. - -## Conclusão -Desde a experimentação do código até a documentação de fluxos de trabalho, da pedagogia à publicação acadêmica, o Jupyter Notebook é uma ferramenta flexível e multiuso que pode apoiar a pesquisa digital em diversos contextos. Mesmo que você não tenha certeza de como exatamente você vai usá-los, é bastante fácil instalar o software Jupyter Notebook e baixar e explorar notebooks existentes, ou experimentar alguns dos seus próprios. Os Jupyter Notebooks têm uma grande promessa de fazer a ponte das facetas críticas e computacionais da pesquisa de humanidades digitais. Para concluir com uma citação de Jed Dobson's _Critical Digital Humanities: The Search for a Methodology_. - ->Notebooks são teoria - não apenas código como teoria, mas teoria como reflexo reflexivo com o trabalho teórico e implicações do próprio código. As normas disciplinares, incluindo enquadramento contextual, teoria e autocrítica, precisam acompanhar, complementar e informar qualquer crítica computacional. Revelar o máximo possível do código, dos dados e dos métodos é essencial para permitir a conversa disciplinar em curso. Compilando-os juntos em um único objeto, que pode ser exportado, compartilhado, examinado e executado por outros, produz um tipo dinâmico de teorização que é modular, mas firmemente ligado ao seu objeto.[^5] - -## Links -- Uma lista crescente de notebooks [Jupyter para DH](https://perma.cc/V5JX-VPP8), em múltiplas linguagens humanas e de programação. Obrigado a todos que enviaram sugestões no Twitter; referências adicionais são bem-vindas. - - Uma descrição técnica detalhada da [instalação de pacotes Python do Jupyter](https://perma.cc/N6M6-ZF5G). - -## Agradecimentos -- Obrigado a Stéfan Sinclair pelas referências a discussões anteriores sobre o uso de notebooks em humanidades digitais. - -- Obrigado a Rachel Midura por sugerir o uso de Jupyter Notebooks para colaboração. - -[^1]: Knuth, Donald. 1992. Literate Programming Stanford, Califórnia: Centro para o Estudo da Linguagem e da Informação. - -[^2]: Millman, KJ e Fernando Perez. 2014. “Developing open source scientific practice”. In Implementing Reproducible Research, Ed. Victoria Stodden, Friedrich Leisch, and Roger D. Peng. [https://osf.io/h9gsd/](https://perma.cc/M8R7-9JTL) - -[^3]: Sinclair, Stéfan & Geoffrey Rockwell. 2013. “Voyant Notebooks: Literate Programming and Programming Literacy”. Journal of Digital Humanities, Vol. 2, No. 3 Summer 2013. [http://journalofdigitalhumanities.org/2-3/voyant-notebooks-literate-programming-and-programming-literacy/](https://perma.cc/R253-BP2B) - -[^4]: Haley Di Pressi, Stephanie Gorman, Miriam Posner, Raphael Sasayama, and Tori Schmitt, with contributions from Roderic Crooks, Megan Driscoll, Amy Earhart, Spencer Keralis, Tiffany Naiman, and Todd Presner. “A Student Collaborator’s Bill of Rights”. [https://humtech.ucla.edu/news/a-student-collaborators-bill-of-rights/](https://perma.cc/A8G2-BBL9) - -[^5]: Dobson, James. 2019. Critical Digital Humanities: The Search for a Methodology. Urbana-Champaign: University of Illinois Press. p. 40. +--- +title: "Introdução ao Jupyter Notebook" +slug: introducao-jupyter-notebooks +original: jupyter-notebooks +layout: lesson +collection: lessons +date: 2019-12-08 +translation_date: 2023-06-02 +authors: +- Quinn Dombrowski +- Tassie Gniady +- David Kloster +reviewers: +- Patrick Burns +- Jeri Wieringa +editors: +- Brandon Walsh +translator: +- Vânia Rosa +translation-editor: +- Jimmy Medeiros +translation-reviewer: +- Juliana Marques +- Caio Mello +review-ticket: https://github.com/programminghistorian/ph-submissions/issues/431 +difficulty: 1 +activity: presenting +topics: [python, website] +abstract: Jupyter Notebook fornece um ambiente onde você pode trabalhar com facilidade o seu código na linguagem Python. Esta lição descreve como instalar o software Jupyter Notebook, como executar e criar ficheiros para o Jupyter Notebook. +avatar_alt: O planeta Júpiter +doi: 10.46430/phpt0043 +--- + +{% include toc.html %} + +## Introdução + +Quando a computação é uma parte intrínseca de sua prática de pesquisa, como você publica um argumento acadêmico de forma que torne o código tão acessível e legível como a prosa que o acompanha? Na área das humanidades, a publicação de uma pesquisa assume principalmente a forma de prosa escrita, artigo ou monografia. Embora as editoras estejam cada vez mais abertas à inclusão de códigos suplementares ou outros materiais, tal arranjo inerentemente os relega a um estatuto secundário relativo ao texto escrito. + +E se você pudesse publicar sua pesquisa em um formato que desse um peso equilibrado entre a prosa e o código? A realidade das atuais diretrizes de publicação acadêmica significa que a separação forçosa do seu código e da argumentação pode ser uma necessidade, e sua reunificação pode ser impossível sem que se navegue por numerosos obstáculos. Atualmente o código é tipicamente publicado em separado no GitHub ou em outro repositório, caso no qual os leitores têm que procurar uma nota de rodapé no texto para descobrir quais scripts estão sendo referenciados, encontrar a URL do repositório, acessar a URL, procurar os scripts, baixá-los e também os ficheiro(s) de dados associados, e então executar os códigos. No entanto, se você tiver os direitos e permissões necessários para republicar o texto de sua pesquisa em outro formato, o Jupyter Notebook fornece um ambiente onde código e prosa podem ser justapostos e apresentados com igual peso e valor. + +Os Jupyter Notebooks têm visto uma adoção entusiástica na comunidade de ciência de dados, a ponto de cada vez mais substituir o Microsoft Word como um ambiente padrão de escrita da pesquisa. Dentro da literatura de humanidades digitais, pode-se encontrar referência a Jupyter Notebooks (separados do iPython, ou Python interativo, notebooks em 2014) desde 2015. + +Os Jupyter Notebooks também ganharam força nas humanidades digitais como uma ferramenta pedagógica. Diversos tutoriais do Programming Historian, como [Mineração de texto em Python através do leitor de recursos HTRC](/en/lessons/text-mining-with-extracted-features), e [Extraindo páginas ilustradas de bibliotecas digitais com python](/pt/licoes/extrair-paginas-ilustradas-com-python#jupyter-notebooks), assim como outros materiais pedagógicos para oficinas fazem referência à colocação de código em um Jupyter Notebook ou ao uso do Jupyter Notebook para orientar os estudantes, permitindo que eles remixem e editem o código livremente. O formato do notebook é ideal para o ensino, especialmente quando os estudantes têm diferentes níveis de proficiência técnica e de conforto com escrita e edição dos códigos. + +O objetivo dos Jupyter Notebooks é fornecer uma interface mais acessível para o código usado em pesquisa ou práticas pedagógicas com suporte digital. Ferramentas como os Jupyter Notebook são menos significativas para aprender ou ensinar no vácuo, porque os Jupyter Notebooks em si não fazem nada para promover diretamente a pesquisa ou a pedagogia. Antes de começar esta lição, pense no que você quer obter usando Jupyter Notebooks. Deseja organizar o fluxo de trabalho do seu projeto? Você quer trabalhar analisando seus dados, acompanhando as coisas que você tenta ao longo do caminho? Você quer que os leitores da sua pesquisa possam seguir os lados teóricos e técnicos do seu argumento sem alternar entre um PDF e uma pasta de scripts? Quer ministrar oficinas de programação mais acessíveis aos participantes com uma gama de conhecimentos técnicos? Você quer usar ou adaptar notebooks que outras pessoas escreveram? Tenha seu objetivo em mente enquanto você trabalha nesta lição. Dependendo de como você imagina usar Jupyter Notebooks, você pode ser capaz de pular seções que são mais aplicáveis em outro contexto. + +## Metas de lição + +Nesta lição você aprenderá: + +- O que são Jupyter Notebooks + +- Como instalar, configurar e usar o pacote de software do Jupyter Notebook + +- Quando os cadernos podem ser úteis em pesquisas e contextos pedagógicos + +Para esta lição, vamos trabalhar em um cenário de uso de Jupyter Notebooks para analisar dados e, em seguida, adaptar esse mesmo notebook e dados para uso em sala de aula. A aula também abordará temas mais avançados relacionados aos Jupyter Notebooks, tais como: + +- Usando Jupyter Notebook para linguagens de programação que não sejam Python + +- Convertendo o código Python existente em Jupyter Notebooks + +- Usando Jupyter Notebooks para ampliar a capacidade computacional em ambientes como clusters de computação de alto desempenho + +## Pré-requisitos + +Esta lição é adequada para iniciantes intrépidos, assumindo pouca experiência técnica anterior. + +Na verdade, o Jupyter Notebook é um ótimo recurso para pessoas que estão aprendendo a escrever código. + +Dependendo do notebook que você quer executar, você pode precisar [instalar alguns módulos Python com pip](/pt/licoes/instalacao-modulos-python-pip), que assume alguma familiaridade com a linha de comando (para [windows aqui](/en/lessons/intro-to-powershell), ou [Mac/Linux aqui](/en/lessons/intro-to-bash) (em inglês)). + +A lição é escrita usando o Jupyter Notebook 6.0, mas a interface do usuário e a funcionalidade do software tem sido bastante consistente entre as versões. + +## Computação Letrada + +A relação entre código legível por computador e texto legível por humanos ganhou visibilidade dentro da ciência da computação na década de 1970, quando Donald Knuth propôs o paradigma da "programação letrada" (ou “programação alfabetizada”). Em vez de organizar o código de acordo com os requisitos que privilegiam a execução do código pelo computador, a programação letrada trata um programa como literatura compreensível aos seres humanos, priorizando o próprio processo de pensamento do programador. A programação letrada projetada por Knuth assume a forma de prosa escrita, com código acionável por computador incorporado em macros (um formato abreviado para escrever código). Ferramentas de programação letrada são usadas para gerar duas saídas do programa letrado: código "emaranhado" que pode ser executado pelo computador e documentação formatada "tecida".[^1] + +Fernando Pérez, o criador do ambiente de programação iPython que acabou se tornando o Projeto Jupyter, cunhou o termo computação letrada para o modelo usado pelos Jupyter Notebooks: + +> Um ambiente de computação letrado é aquele que permite aos usuários não apenas executar comandos, mas também armazenar os resultados desses comandos em um formato de documento literário, juntamente com figuras e com texto em formato livre que pode incluir expressões matemáticas formatadas. Na prática, ele pode ser visto como uma mistura de um ambiente de linha de comando, como o shell Unix, com um processador de texto, uma vez que os documentos resultantes podem ser lidos como texto, mas contêm blocos de código que foram executados pelo sistema computacional subjacente.[^2] + +Jupyter não é nem o primeiro e nem o único exemplo de cadernos computacionais. Já na década de 1980, interfaces de notebook estavam disponíveis através de softwares como Wolfram Mathematica e MATLAB. Em 2013, Stéfan Sinclair e Geoffrey Rockwell propuseram "cadernos Voyant" baseados no modelo de Mathematica, que exporia algumas das suposições que sustentam as [Ferramentas Voyant](https://perma.cc/9M5K-JWU7) e as tornaram configuráveis pelo usuário.[^3] Eles desenvolveram ainda esse conceito em [A Arte da Análise de Texto Literário Cadernos Spyral](https://perma.cc/53HW-GGSJ). + + +Jupyter ganhou força em muitos campos como um ambiente de código aberto compatível com inúmeras linguagens de programação. O nome Jupyter é uma referência às três linguagens principais suportadas pelo projeto (Julia, Python e R), mas [núcleos estão disponíveis que tornam o Jupyter compatível com dezenas de idiomas](https://perma.cc/B448-XMJQ), incluindo Ruby, PHP, Javascript, SQL e Node.js. Pode não fazer sentido implementar projetos em todas essas línguas usando Jupyter Notebooks (por exemplo, Omeka não permitirá que você instale um plugin escrito como um Jupyter Notebook), mas o ambiente Jupyter ainda pode ser valioso para documentar códigos, ensinar linguagens de programação e fornecer aos alunos um espaço onde eles podem facilmente experimentar com exemplos fornecidos. + + +## Instalando o Jupyter Notebooks + +Desde o final de 2019, existem dois grandes ambientes que você pode usar para executar Jupyter Notebooks: O Jupyter Notebook (não confundir com os próprios ficheiro(s) do Jupyter Notebook, que possuem uma extensão `.ipynb`), e o mais novo Jupyter Lab. O Jupyter Notebook é amplamente usado e bem documentado, e fornece um navegador simples de ficheiro(s), juntamente com o ambiente para criar, editar e executar os notebooks. Jupyter Lab é mais complexo, com um ambiente de usuário mais parecido com um Ambiente de Desenvolvimento Integrado (discutido em tutoriais anteriores do Programming Historian para [Windows](/pt/licoes/instalacao-windows), [Mac](/pt/licoes/instalacao-mac) e [Linux](/pt/licoes/instalacao-linux)). Embora o Jupyter Lab seja feito para, eventualmente, substituir o Jupyter Notebook, não há indicação de que o Jupyter Notebook deixará de ser suportado tão cedo. Devido à sua simplicidade comparativa e facilidade de uso para iniciantes, este tutorial usa o Jupyter Notebook como o software para executar ficheiro(s) de notebook. Ambos os pacotes de software estão incluídos na Anaconda, descrita abaixo. É mais fácil usar a Anaconda para instalar o Jupyter Notebook, mas se você já tem Python instalado em seu sistema e não quer lidar com o grande pacote Anaconda, você pode executar `pip3 install jupyter` (para Python 3). + + +## Anaconda + +Anaconda é uma distribuição gratuita de código aberto de Python e R que vem com mais de 1.400 pacotes, o gerenciador de pacotes Conda para instalação de pacotes adicionais, e o navegador Anaconda, que permite gerenciar ambientes (por exemplo, você pode instalar diferentes conjuntos de pacotes para diferentes projetos, para que eles não causem conflitos uns para os outros) usando uma interface gráfica. Após a instalação da Anaconda, você pode usar o navegador Anaconda para instalar novos pacotes (ou `conda install` através da linha de comando), mas muitos pacotes estão disponíveis apenas através de pip (ou seja, usando `pip install` através da linha de comando ou em seu Jupyter Notebook). + +Para a maioria dos propósitos, você deve optar pela versão Python 3 do Anaconda, mas alguns códigos ainda podem ser escritos em Python 2. Nesta lição, você usará Python 3. O instalador Anaconda tem mais de 500 MB, e após a instalação pode levar mais de 3 GB de espaço no disco rígido, por isso certifique-se de que você tem espaço suficiente no computador e uma conexão de rede rápida antes de começar. + +
    +Se o espaço do disco rígido é uma preocupação, você pode empacotar um notebook para que ele possa ser executado usando recursos gratuitos de computação em nuvem, em vez de fazer com que os usuários instalem o Anaconda. Isso pode ser especialmente útil em situações de oficina. Veja a seção abaixo. +
    + +Para baixar e instalar a Anaconda, acesse o [site da Anaconda](https://www.anaconda.com/data-science-platform). Certifique-se de ter clicado no ícone do seu sistema operacional (que deve alterar o texto Anaconda [número da versão] para [sistema operacional selecionado], de forma a indicar o seu sistema operacional) e, em seguida, clique no botão Baixar na caixa para a versão atual do Python 3. Se você estiver no Windows, deve baixar um ficheiro `.exe`; em Mac, é `.pkg`; no Linux, é `.sh`. + +Abra normalmente o ficheiro para instalar o software em seu sistema operacional. Mais detalhes de instalação estão disponíveis nos [documentos da Anaconda](https://docs.anaconda.com/anaconda/install/), incluindo como instalar a Anaconda através da linha de comando em cada sistema operacional. Se o computador não conseguir abrir o ficheiro que você baixou, certifique-se de selecionar o sistema operacional correto antes de baixar o instalador. No Windows, não deixe de escolher a opção de "Adicionar Anaconda à PATH Variable" durante o processo de instalação, ou você não poderá lançar Jupyter Notebook a partir da linha de comando. + +## Usando Jupyter Notebook para pesquisa + +Esta lição descreve como você pode inicialmente escrever um Jupyter Notebook para análise de dados como parte de um projeto de pesquisa e, em seguida, adaptá-lo para uso em sala de aula. Embora este exemplo em particular seja extraído de estudos de fãs, ele se concentra na conversão de datas, que é amplamente necessária na análise de dados históricos e literários. + +## Abrindo o Jupyter Notebook + +Supondo que você já tenha instalado a Anaconda como descrito acima, você pode abrir o Anaconda Navigator como qualquer outro aplicativo de software (você pode fechar o prompt sobre a criação de uma conta na nuvem do Anaconda; você não precisa de uma conta para trabalhar com o Anaconda). Na tela inicial, você deve ver um conjunto de ícones e breves sinopses sobre cada aplicativo incluído no Anaconda. + +Clique no botão "Iniciar" sob o ícone do Jupyter Notebook. + +{% include figure.html filename="tr-pt-introducao-jupyter-notebooks-1.png" alt="Imagem com captura de tela do interface do Anaconda Navigator" caption="Figura 1. Interface do Anaconda Navigator" %} + +Se você preferir usar a linha de comando em vez do navegador Anaconda, uma vez que você tenha o Anaconda instalado, você deve ser capaz de abrir uma nova janela Terminal (Mac) ou Command Prompt (Win) e executar `jupyter notebook` para iniciar o navegador web com o aplicativo Jupyter Notebook. Se você estiver usando a linha de comando para iniciar o Jupyter Notebook, preste atenção no diretório em que você está quando o iniciar. Essa pasta se torna o diretório doméstico que aparecerá imediatamente na interface Jupyter Notebook, conforme descrito abaixo. + +As duas abordagens abrirão uma nova janela ou guia no seu navegador padrão com a interface Jupyter Notebook. O Jupyter Notebook é baseado no navegador: você só interage com ele através do seu navegador, mesmo quando o Jupyter Notebook está sendo executado no seu próprio computador. + +
    Se você estiver usando notebooks que importam pacotes Python que têm dependências de versões específicas de outros pacotes, você deve configurar um ambiente para usar com esses notebooks, para que você não lide com conflitos de versão (por exemplo, se um notebook requer a versão 1.0 de um pacote, e outro requer a versão 2.0). [A documentação do Anaconda Navegador para Gerenciar Ambientes](https://perma.cc/E9TC-YMCU) (ou, se preferir usar a linha de comando, a [documentação Conda](https://perma.cc/KHB8-U3CT)) fornece instruções passo a passo para criar, atualizar e ativar um ambiente. Para lançar o Jupyter Notebook dentro de um ambiente específico, você precisa primeiro ativar esse ambiente.
    + +## Navegando na interface do Jupyter Notebook + +A interface do gerenciador de ficheiro do Jupyter Notebook é a principal maneira de abrir um ficheiro Jupyter Notebook (.ipynb). Se você tentar abrir em um editor de texto simples, o notebook será exibido como um ficheiro JSON, não com blocos interativos de código. Para visualizar um notebook através da interface Jupyter, você tem que abrir o Jupyter Notebook primeiro (que será exibido em uma janela do navegador), e abrir o ficheiro de dentro do Jupyter Notebook. Infelizmente, não há como definir o Jupyter Notebook como o aplicativo de software padrão para abrir `ficheiro.ipynb` quando você clica duas vezes neles. + +Quando você lança o Jupyter Notebook do navegador Anaconda, ele exibe automaticamente o diretório doméstico. Este é geralmente o diretório com seu nome de usuário em um Mac (/Users/seu nome de usuário). Em um PC geralmente é `C: \` . Se você abrir o Jupyter Notebook a partir da linha de comando, ele exibirá o conteúdo da pasta em que você estava quando o lançou (usando a linha de comando, você também pode lançar diretamente um notebook específico, por exemplo, `jupyter-notebook-example.ipynb`.) + +Para evitar desordenar esta pasta, você pode fazer uma nova pasta dentro deste diretório para seus notebooks. Você pode fazer isso na sua interface usual de gerenciamento de ficheiro(s)(Finder no Mac, ou File Explorer no Windows), ou dentro do próprio Jupyter Notebook, já que o Jupyter Notebook, assim como o Google Drive, fornece uma interface de gerenciamento de ficheiro(s) dentro de um navegador, bem como uma interface de menu e de barra de ferramentas para a criação de ficheiro(s). Para adicionar uma nova pasta no Jupyter Notebook, clique em Novo no canto superior direito e escolha Pasta. Isso criará uma nova pasta chamada "Pasta Sem Título". Para alterar o nome, clique na caixa de seleção à esquerda da "Pasta Sem Título", em seguida, clique no botão "Renomear" que aparece na guia "ficheiro(s)". Nomeie os notebooks da pasta. Clique nele para abrir essa pasta. + +## Upload dos dados do exemplo +O ficheiro CSV de exemplo para esta lição é um extrato de metadados de fan fiction de Harry Potter coletados do site de fanfic italiano https://efpfanfic.net, depois limpos usando uma combinação de [expressões regulares](/en/lessons/understanding-regular-expressions) e [OpenRefine](/pt/licoes/limpar-dados-openrefine). O CSV tem três colunas: a classificação da história (similar a uma classificação de filme), a data que foi originalmente publicada, e a data mais recente de atualização. As opções de classificação são verde (verde), giallo (amarelo), arancione (laranja), e rosso (vermelho). A publicação e as datas atualizadas são criadas automaticamente; quando consistente a história é postada no site ou atualizado, assim você pode tomá-las como consistentes. + +Baixe o [ficheiro CSV](/assets/jupyter-notebooks/ph-jupyter-notebook-example.csv). + +Dentro do navegador de ficheiro(s) Jupyter Notebook, você deve estar dentro do diretório de notebooks que acabou de criar. No canto superior direito, clique no botão "Carregar" e carregue o ficheiro CSV de amostra. Será mais fácil de acessar se estiver no mesmo diretório do Jupyter Notebook que você criará na próxima etapa a fim de converter as datas. + +{% include figure.html filename="tr-pt-introducao-jupyter-notebooks-2.png" alt="Imagem com captura de tela sobre o upload de ficheiros no interface Jupyter Notebook" caption="Figura 2. Upload de ficheiro(s) na interface Jupyter Notebook" %} + +Observe que esta não é a única maneira de fazer os ficheiro(s) aparecerem no gerenciador de ficheiro(s) do Jupyter Notebook. A pasta de notebooks que você criou é um diretório regular em seu computador, e assim você também pode usar sua interface usual de gerenciamento de ficheiro(s) (por exemplo, Finder no Mac, ou File Explorer no Windows) para colocar ficheiro(s) `.ipynb` e/ou de dados neste diretório. Os Jupyter Notebooks usam a localização do próprio ficheiro do notebook (o `ficheiro.ipynb`) como o caminho de partida padrão. Para oficinas e cursos, pode fazer sentido criar uma pasta onde você pode armazenar o notebook, qualquer imagem anexada e os dados com os quais você vai trabalhar, todos juntos. Se tudo não estiver na mesma pasta, você terá que incluir o caminho ao referenciá-lo ou usar o código Python dentro do notebook para alterar o diretório de trabalho. + +## Criando um novo notebook + +Dentro da pasta de notebooks, crie um novo Jupyter Notebook para converter as datas para o seu projeto de pesquisa. Clique no botão "new" no canto superior direito da interface do gerenciador de ficheiro(s) do Jupyter Notebook. Se você acabou de instalar o Anaconda como descrito acima, sua única opção será criar um Jupyter Notebook usando o _kernel_ Python 3 (o componente de backend que realmente executa o código escrito no notebook), mas vamos discutir abaixo como adicionar kernels para outras linguagens de programação. Clique em "Python 3", e o Jupyter Notebook abrirá uma nova guia com a interface para os próprios Jupyter Notebooks. Por padrão, o notebook será chamado de "Sem título"; você pode clicar nesse texto na parte superior da tela para renomeá-lo. + +{% include figure.html filename="tr-pt-introducao-jupyter-notebooks-3.png" alt="Imagem com captura de tela da interface do Jupyter Notebook para criar novo ficheiro" caption="Figura 3. Criando um novo Jupyter Notebook" %} + +## Trabalhando em Jupyter Notebooks + +Um notebook é composto de células: caixas que contêm código ou texto legível por humanos. Cada célula tem um tipo, que pode ser selecionado a partir das opções drop-down do menu (“menu deslizante”). A opção padrão é "Code"; as caixas de textos legíveis por humanos devem usar o tipo "Markdown" e precisarão ser escritas usando as convenções de formatação do Markdown. Para saber mais sobre Markdown, veja a lição do Programming Historian “[Introdução ao Markdown](/pt/licoes/introducao-ao-markdown)”. + +Quando você cria um novo Jupyter Notebook, a primeira célula será uma célula de código. No topo da interface do Jupyter Notebook está uma barra de ferramentas com funções que se aplicam à célula selecionada atualmente. A primeira função do menu deslizante é, por padrão, "Code". Clique nesse menu e selecione "Markdown" (você também pode usar um atalho de teclado, _esc + m_, para alterar a célula atual para Markdown, e _esc + y_ muda de volta para uma célula de código). Vamos começar este caderno com um título e uma breve explicação do que o caderno está fazendo. No momento, isso é apenas para sua própria memória e referência; você não quer investir muito em prosa e formatação nesta fase do projeto, quando você não sabe se você vai acabar usando este código como parte de seu projeto final, ou se você vai usar uma ferramenta ou método diferente. Mas ainda pode ser útil incluir algumas células de marcação com notas para ajudá-lo a reconstruir seu processo. + +Cole o seguinte na primeira célula. Se a primeira linha não aparecer com uma fonte grande (como um cabeçalho), certifique-se de ter selecionado "Markdown" no menu suspenso na parte superior. + + +``` +# Fanfic date conversion +Converting published & updated dates for Italian fanfic into days of the week. +``` + +{% include figure.html filename="tr-pt-introducao-jupyter-notebooks-4.png" alt="Imagem com captura de tela da interface do Jupyter Notebook para editar Markdown" caption="Figura 4. Editando a célula Markdown em um Jupyter Notebook" %} + +Quando você está editando uma célula, você pode usar _Ctrl + Z_ (Win) ou _Command + Z_ (Mac) para desfazer as alterações que você fez. Cada célula mantém seu próprio histórico de edição; mesmo que você passe para uma célula diferente e faça edições lá, você pode posteriormente clicar de volta na primeira célula e desfazer suas alterações anteriores lá, sem perder as alterações realizadas para a segunda célula. + +Para deixar o modo de edição e "executar" esta célula (para uma célula Markdown, isso não faz nada, apenas move o cursor mais para baixo no notebook), você pode clicar na barra de ferramentas ou pressione Ctrl+Enter (Ctrl+Return no Mac). Se você quiser retomar a edição mais tarde, você pode clicar duas vezes nela ou selecionar a célula (que mostrará uma linha azul vertical à esquerda uma vez selecionada) clicando-a uma vez e, em seguida, pressionando a tecla Enter (Win) ou Return (Mac). Para deixar o modo de edição, você pode clicar na barra de ferramentas ou pressionar Ctrl+Enter (Ctrl+Return no Mac). Se você quiser executar sua célula atual e adicionar uma nova célula (por padrão, uma célula de código) imediatamente abaixo dela, você pode pressionar Alt+Enter (Option+Enter no Mac). + +Em seguida, você precisa descobrir como fazer a conversão. A busca por termos relevantes pode levá-lo a essa [discussão do StackOverflow](https://perma.cc/JG6H-KZAZ), e a primeira resposta envolve o uso do módulo Python datetime. Como primeiro passo, você precisa importar datetime, usando uma célula de código. Você também sabe que o seu ficheiro de entrada é um CSV, então você deve importar o módulo csv também. + +Para adicionar uma nova célula, clique no botão + (mais) na barra de ferramentas (ou use o atalho do teclado _esc + b_). Isso criará uma nova célula de código abaixo da célula que está atualmente selecionada. Crie uma nova célula de código e cole o código a seguir para importar um módulo Python: + + +``` +import datetime +import csv + +``` + +Pensando desde já na possibilidade de compartilhar este notebook ou parte dele, pode ser útil dividir as importações de módulos em células individuais, e colocar o código em si em outra célula, para que você possa incluir uma célula Markdown que explique o que cada uma delas está fazendo. + +Ambos os pacotes que você está importando para este notebook já estão instalados como parte do Anaconda, mas existem muitos pacotes de nicho relevantes para a pesquisa (por exemplo, o [Classic Languages Toolkit, CLTK](https://perma.cc/Q9Q8-9TNZ), para fazer análise de texto em línguas históricas) que não estão incluídos com a Anaconda, e não estão disponíveis através do _instalador conda_. Se você precisa de um pacote como esse, você tem que instalá-lo usando _pip_. Instalar pacotes de dentro do Jupyter notebook pode ser um pouco complicado, porque pode haver diferenças entre o kernel Jupyter que o notebook está usando, e outras versões do Python que você pode ter instalado no seu computador. Você pode encontrar uma longa discussão técnica sobre esses problemas neste [post de blog](https://perma.cc/N6M6-ZF5G). + +Se você está trabalhando em um notebook que deseja compartilhar, e ele inclui pacotes menos comuns, você pode incluir uma célula Markdown instruindo os usuários a instalar os pacotes com antecedência usando conda ou pip, ou você pode usar: + +``` +import sys +!conda install --yes --prefix {sys.prefix} YourModuleNameHere + +``` + +para instalar algo do notebook usando conda; a sintaxe `!` indica que o código está executando algo da linha de comando, em vez do kernel Jupyter. Ou, se o pacote não estiver disponível na conda (muitos pacotes de nicho relevantes para a pesquisa não estão), você pode usar `pip`: + +``` +import sys + +!{sys.executable} -m pip install YourModuleNameHere + +``` + +Se você não tinha instalado o Python no computador antes de instalar o Anaconda para esta lição, talvez seja necessário adicionar o pacote pip para poder usá-lo para instalar outros pacotes. Você pode adicioná-lo através da GUI (interface gráfica do usuário) do navegador Anaconda, ou executar `conda install pip` a partir da linha de comando. + +Voltando ao nosso exemplo, em seguida adicione uma nova célula de código e cole o seguinte código (certifique-se de que incluiu os espaçamentos): + +``` +with open('ph-jupyter-notebook-example.csv') as f: + csv_reader = csv.reader(f, delimiter=',') + for row in csv_reader: + datetime.datetime.strptime(row[1], '%d/%m/%Y').strftime('%A') + print(row) +``` + +Clicar no botão 'play' na barra de ferramentas quando você tem uma célula de código selecionada executa o código dentro da célula (se você tentar executar este código depois de executar as declarações de importação, verá um erro: "ValueError: time data ‘1/7/18’ does not match format ‘%d/%m/%Y’". Não se preocupe, vamos depurar isso a seguir). + +Depois de executar uma célula de código, um número aparecerá entre colchetes à esquerda da célula. Este número indica a ordem em que a célula foi executada. Se você voltar e executar o celular novamente, o número é atualizado. + +Se um número não aparecer imediatamente ao lado da célula, você verá um asterisco entre os colchetes. Isso significa que a célula de código não terminou de funcionar. Isso é comum para códigos computação intensiva (por exemplo, processamento de linguagem natural) ou tarefas de longa duração, como extração de conteúdo na web. Sempre que uma célula de código está sendo executada, o favicon na guia do navegador do notebook muda para uma ampulheta. Se você quiser alterar as guias e fazer outra coisa enquanto o código estiver em execução, você pode saber que a ação anterior foi concluída quando a ampulheta muda de volta para o ícone do notebook. + + +{% include figure.html filename="tr-pt-introducao-jupyter-notebooks-5.png" alt="Imagem com captura de tela sobre a execução de código no Jupyter Notebook" caption="Figura 5. Executando uma célula de código em um Jupyter Notebook" %} + +``` +O Jupyter notebook funciona melhor se você executar as células sequencialmente. Às vezes, você pode obter erros ou saídas incorretas se executar as células fora de ordem ou tentar editar e executar iterativamente diferentes partes do notebook. Se você fez muitas alterações e executou blocos de código de forma não linear e descobrir que você está recebendo uma saída estranha, você pode redefinir o Jupyter Notebook clicando no _Kernel_ no menu e escolhendo _Restart & Clear Output_. Mesmo que você não tenha notado nada de estranho, é uma boa ideia utilizar o Restart & Clear Output em seu código, uma vez que você tenha terminado de escrevê-lo, para ter certeza de que o resultado está correto. +``` + +Depois de executar a segunda célula de código, você verá um erro. Para descobrir o que está acontecendo, você pode consultar a +[documentação para datação](https://perma.cc/S92Z-3QVM) que explica cada uma das diferentes opções de formatação. Lá, você verá que a única opção de valores para “dia” assume o uso de dois dígitos (ou seja, dias de um dígito são prefixados com um 0). Olhando para os dados do exemplo, os meses (listados em segundo lugar nesta ordem de data) já são acrescidos de zero, quando tem apenas um dígito, mas não os dias. Você tem duas opções: você pode tentar alterar os dados, ou você pode tentar alterar seu código. + +Digamos que você queira tentar uma abordagem diferente, mas quer deixar o que você fez até agora, no caso de você querer revisitar esse código, e talvez usá-lo depois de alterar os dados. Para lembrar do que aconteceu, adicione uma célula Markdown acima da sua segunda célula do código. Clique na primeira célula do código e clique no botão mais na barra de ferramentas. Se você clicar no botão de adição na barra de ferramentas depois de executar a última célula de código, a nova célula aparecerá na parte inferior do notebook. Você pode movê-la para onde quiser clicando no botão de seta para cima. Certifique-se de que está no modo Markdown e cole o seguinte texto: + +``` + ### Não funciona, precisa de datas precedidas por zero + [documentação do datetime](https://docs.python.org/2/library/datetime.html?highlight=strftime#strftime-and-strptime-behavior). + Modificar o ficheiro de origem? + +``` + +Lendo ainda mais na [discussão do StackOverflow](https://perma.cc/EN55-P57H), há outra abordagem que usa uma biblioteca diferente, dateutil, que parece ser mais flexível com os tipos de datas que ela aceita. Volte para a célula usada para importar módulos e edite-a para adicionar a nova biblioteca (em qualquer lugar dessa célula, desde que cada declaração de importação esteja em sua própria linha): + +``` +import dateutil + +``` + +Re-execute essa célula de código; note que o número ao lado da célula muda na segunda vez que você executá-lo. + +Agora crie uma nova célula Markdown na parte inferior do notebook e cole: + + +``` +#### tentando dateutil para analisar datas, conforme https://stackoverflow.com/a/16115575 + +``` + +Abaixo dele, adicione uma nova célula de código com o seguinte código (prestando atenção ao espaçamento, de modo que o código seja indentado assim como você vê abaixo): + +``` +with open('ph-jupyter-notebook-example.csv') as f: + csv_reader = csv.reader(f, delimiter=',') + for row in csv_reader: + parseddate = dateutil.parser.parse(row[1]) + print(parseddate) +``` + +Execute a célula com o código que você acabou de adicionar. Pode levar mais tempo; continue esperando até que o asterisco ao lado da célula de código se transforme em um número. O resultado deve mostrar a lista de datas de publicação, formatadas de forma diferente, com hífen em vez de barras, e com a adição das horas, minutos e segundos (como zeros, porque as datas registradas não incluem esses dados). À primeira vista, parece que funcionou, mas se você compará-lo mais de perto com o ficheiro de origem, você verá que o módulo dateutil não está sendo consistente em como analisa as datas. Datas em que o valor do dia é maior que 12 estão sendo analisadas corretamente (ele sabe que um valor maior que 12 não pode ser um mês), mas quando o valor da data é 12 ou menos, a data está sendo identificada com o mês primeiro. A primeira linha do ficheiro de origem tem a data 1/7/18, que é entendida como "2018-01-07 00:00:00". Na documentação para dateutil, você descobrirá que você pode [especificar `dayfirst=true`](https://perma.cc/W54E-SP5Z) para corrigir isso. Edite a última célula de código e altere a penúltima linha para ler: + +``` +parseddate = dateutil.parser.parse(row[1], dayfirst=True) + + ``` + +Quando você executar a linha novamente, você verá que todas as datas foram analisadas corretamente. + +Analisar a data é apenas o primeiro passo – você ainda precisa usar o módulo datetime para converter as datas em dias da semana. + +Exclua a última linha do bloco de código e substitua-a pelo seguinte (certificando-se de que você tenha o mesmo nível de recuo da última linha anterior, para ambas as linhas): + + +``` +dayofweek = datetime.date.strftime(parseddate, '%A') + +print(dayofweek) + +``` + +Execute o bloco de códigos novamente. Isso deve lhe dar uma lista de dias da semana. + +Agora que você tem código para analisar e re-formatar uma data, você precisa fazê-lo para ambas as datas em cada linha do seu ficheiro de origem. Porque você sabe que tem código funcionante na célula de código atual, se você não se sentir muito confortável com Python, você pode querer copiar a célula de código atual antes de fazer modificações. Selecione a célula que deseja copiar e clique no botão copiar na barra de ferramentas; o botão de colar irá colar a célula abaixo de qualquer célula atualmente selecionada. Fazer uma cópia permite que você faça livremente alterações no código, sabendo que você sempre pode voltar facilmente para uma versão que funciona. + +Se você não quiser resolver isso por conta própria, você pode copiar e colar esse código em uma nova célula de código ou substituir a célula de código atual: + +``` +#identifica o ficheiro fonte a ser aberto, chama-o f +with open('ph-jupyter-notebook-example.csv') as f: + #cria um ficheiro de saída (referido como "out" no notebook) para ser gravado + with open('ph-jupyter-notebook-example-dayofweek.csv', 'w') as out: + #define "csv_reader" como executando a função csv.reader no ficheiro + csv_reader = csv.reader(f, delimiter=',') + #define "csv_writer" como executando a função csv.writer para "out" (o ficheiro de saída) + csv_writer = csv.writer(out) + #para cada linha que está sendo lida pelo csv_reader... + for row in csv_reader: + #define "csv_reader" como executando a função csv.reader no ficheiro + csv_reader = csv.reader(f, delimiter=',') + #para cada linha que está sendo lida pelo csv_reader... + for row in csv_reader: + #cria uma lista chamada "values" com o conteúdo da linha + values = list(row) + #define "rating" como a primeira coisa na lista + #contagem em Python começa com 0, não 1 + rating = values[0] + #define "parseddatepub" como a segunda coisa (1, porque começamos com 0) na lista, + #convertido em um formato de data padrão usando dateutil.parser + #e quando essas datas são analisadas, o analisador deve saber + #que o primeiro valor na sequência é o dia + parseddatepub = dateutil.parser.parse(values[1], dayfirst=True) + #mesmo que acima para a data atualizada, a terceira coisa (2) na lista + parseddateupdate = dateutil.parser.parse(values[2], dayfirst=True) + #define "dayofweekpub" como parseddatepub (definido acima), convertido para o dia da semana + #%A é usado para mudar para o dia da semana + #Pode ver outros formatos aqui: https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior + dayofweekpub = datetime.date.strftime(parseddatepub, '%A') + #mesma coisa para data de atualização + dayofweekupdate = datetime.date.strftime(parseddateupdate, '%A') + #cria uma lista da classificação e as novas datas formatadas + updatedvalues = [rating, dayofweekpub, dayofweekupdate] + #escreve todos os valores nesta célula de código + csv_writer.writerow(updatedvalues) + print(updatedvalues) +``` + +Depois de executar este código, você terá um novo ficheiro ph-jupyter-notebook-exemplo-dayofweek.csv, com seus dados no formato que você precisa para a análise. + +Agora que você tem um código que funciona para converter as datas do formulário que você tem para o formulário que você precisa, você pode limpar as falsas partidas e notas para si mesmo. Você vai querer manter o primeiro código com as declarações de importação, e a primeira célula Markdown com o título e a descrição, mas você deve excluir outras células de código e Markdown que não são o seu código final. Para excluir uma célula, clique nela e clique no botão tesoura na barra de ferramentas. Se você excluir uma célula por engano, você pode clicar em Editar no menu e escolher "Desfazer excluir células". + +## Salvando, exportando e publicando Jupyter Notebooks + +O Jupyter salva automaticamente seu trabalho de forma periódica, criando "pontos de verificação". Se algo der errado com seu notebook, você pode reverter para um ponto de verificação anterior indo em "File", em seguida, "Revert to Checkpoint", e escolhendo um horário. Dito isto, ainda é importante salvar seu notebook (usando o botão de salvar), porque se você fechar e desligar o kernel do notebook (incluindo reiniciar o kernel), os pontos de verificação serão perdidos. + +Você também pode baixar o notebook (_File> Download as_) em vários formatos de ficheiro diferentes. Baixar o formato Notebook (`.ipynb`) é útil se você quiser compartilhar seu código em seu formato completo de notebook. Você também pode baixá-lo como código em qualquer linguagem em que seu notebook estiver (por exemplo, `.r` se em R ou `.py` se Python ou `.js` se JavaScript), como um ficheiro de `.html`, como um ficheiro de marcação (`.md`) ou como um PDF via LaTeX. Se você baixá-lo como código, as células Markdown se tornam comentários (se você quiser converter um ficheiro, `ficheiro.ipynb` para outro formato depois de baixá-lo, você pode usar a ferramenta [nbconvert](https://perma.cc/6J73-KCK5)). + +Se você está trabalhando em um projeto de pesquisa, você pode usar um Jupyter notebook, ou uma série de notebooks, ao longo do caminho para acompanhar seu fluxo de trabalho. Alguns estudiosos postam esses cadernos no GitHub, juntamente com slides ou PDFs de pôsteres e dados de origem (ou metadados, se os direitos autorais permitirem), para acompanhar apresentações e palestras. O GitHub renderiza versões não interativas de ficheiro(s) de notebook, para que possam ser visualizados dentro de um repositório. Alternativamente, você pode colar a URL de um repositório do GitHub que tem notebooks Jupyter em [nbviewer,](https://nbviewer.jupyter.org/) o que às vezes pode ser uma visualização mais rápida e confiável. Você pode querer incluir uma célula Markdown com uma citação recomendada para o seu Jupyter notebook, e uma referência para o repositório do GitHub onde ela está armazenada, especialmente se o seu notebook inclui código que outros possam reutilizar para análises semelhantes. + +O código que você acabou de desenvolver como parte desta lição pertence a algum lugar no meio de um projeto real. Se você estiver usando notebooks para documentar seu fluxo de trabalho, você pode optar por adicionar a nova célula de código a um notebook existente, em vez de baixá-lo como um notebook separado e autônomo. Os Jupyter notebooks podem ser particularmente úteis para documentar fluxos de trabalho de projetos quando você está trabalhando com colaboradores que só podem estar envolvidos por um curto período de tempo (como estagiários de graduação no período de férias escolares). Com colaboradores de curto prazo, é importante ajudá-los a entender e começar a usar os fluxos de trabalho do projeto sem muito tempo de iniciação, e os Jupyter notebooks podem definir esses fluxos de trabalho passo a passo, explicar onde e como os ficheiro(s) são armazenados e fornecer dicas para tutoriais externos e materiais de treinamento para ajudar os colaboradores que estão menos familiarizados com os fundamentos técnicos do projeto a serem iniciados. Por exemplo, dois projetos que usaram Jupyter notebooks para publicar fluxos de trabalho são o Projeto Realismo Socialista de Sarah McEleney e a [“mineração de texto da literatura infantil inglesa 1789-1914 para a representação de insetos e outros rastejantes assustadores”](https://perma.cc/JD8N-P79G). + +À medida que seu projeto progride, se você estiver publicando através de canais de acesso aberto e se seus conjuntos de dados podem ser compartilhados livremente, os Jupyter notebooks podem fornecer um formato ideal para tornar o código que sustenta seu argumento acadêmico visível, testável e reutilizável. Embora os periódicos e publicações possam não aceitar os Jupyter notebooks como um formato de submissão, você pode desenvolver uma "versão" do seu artigo que inclui o texto completo (como células Markdown),com células de código integradas ao fluxo da narrativa acadêmica como uma ilustração imediatamente acessada da análise que você está descrevendo. Você também pode incluir as células de código que compõem os fluxos de trabalho de preparação de dados como um apêndice, seja no mesmo notebook, ou em um separado. Integrar o código com o texto de um artigo acadêmico torna muito mais provável que os leitores realmente se envolvam com o código, já que eles podem simplesmente executá-lo dentro do mesmo caderno onde estão lendo o argumento. Alguns estudiosos, particularmente na Europa, também postam seus cadernos no [Zenodo](https://zenodo.org/), um ficheiro para dados de pesquisa, independentemente do país de origem, financiador ou disciplina. O Zenodo suporta configurações de dados de até 50 GB (vs. o limite de tamanho de ficheiro de 100 MB no Github), e fornece DOIs para o material carregado, incluindo notebooks. Alguns estudiosos combinam arquivamento no Zenodo para sustentabilidade com a publicação no GitHub para a possibilidade de encontrar, incluindo o Zenodo DOI como parte do ficheiro readme.md no repositório do GitHub que inclui os notebooks. Como exemplo, o caderno de workshop ["Análise de Dados Aplicados" por Giovanni Colavizza e Matteo Romanello para o DHOxSS 2019](https://perma.cc/6S7H-LQEA) é publicado no GitHub, mas inclui [um Zenodo DOI](https://doi.org/10.5281/zenodo.3352830). + +Embora a argumentação e o código totalmente integrados ainda sejam difíceis de encontrar devido à falta de um local para publicar esse tipo de trabalho, os estudiosos começaram a usar os Jupyter notebooks como um passo incremental mais interativo para publicações computacionais dinâmicas. José Calvo tem um exemplo de um [caderno acompanhando um artigo sobre estilizometria](https://perma.cc/Y9CK-CFK8) (em espanhol), e Jed Dobson publicou um [conjunto de cadernos](https://perma.cc/UDA3-467P) para acompanhar seu livro Critical Digital Humanities: The Search for a Methodology, que aborda diretamente os Jupyter Notebooks como objetos acadêmicos (p.39-41). + +## Usando Jupyter Notebook para ensinar + +O Jupyter Notebook é uma ótima ferramenta para ensinar programação, ou para ensinar conceitos como modelagem de tópicos ou vetores de palavras que envolvem programação. A capacidade de fornecer instruções e explicações como Markdown permite que os educadores forneçam notas detalhadas sobre o código através de marcação alternada e células de código, de modo que o texto de Markdown explique o código na célula logo abaixo. Isso é útil para oficinas práticas, pois as instruções e o código podem ser escritos com antecedência. Isso permite que os participantes abram o notebook, baixem um conjunto de dados e executem o código conforme está. Se você espera ministrar uma oficina onde os alunos terão diferentes níveis de familiaridade com a programação, você pode configurar o notebook para ter tarefas suplementares para os alunos que se sentem confortáveis em modificar o código. Ao mesmo tempo, mesmo os alunos que hesitam em tocar no código ainda poderão alcançar o resultado principal da oficina apenas executando células de código pré-escritas. + +Como outra abordagem, você também pode usar Jupyter notebooks para escrever código na medida em que o desenvolve. Em tal oficina, os alunos podem começar com um caderno em branco, e escrever o código junto com você. As células ajudam a segmentar o código como você o escreve, em vez de usar um editor de texto ou IDE (Ambiente de Desenvolvimento Integrado) que não quebra o código de forma tão clara e pode causar confusão, especialmente quando ensina iniciantes. + +Você pode usar Jupyter notebooks para tarefas em sala de aula dando instruções em Markdown e fazendo com que os alunos escrevam código em uma célula em branco com base nas instruções. Dessa forma, você pode criar uma tarefa de programação interativa que ensina aos alunos não apenas a sintaxe e o vocabulário de uma linguagem de programação, mas também pode explicar as melhores práticas de programação em geral. + +Se você já está usando Jupyter notebooks para documentar o fluxo de trabalho do seu projeto, você pode ser capaz de reformular esses cadernos de pesquisa para uso em sala de aula, como uma maneira de trazer sua pesquisa para a sala de aula. Este [exemplo de caderno pedagógico](/assets/jupyter-notebooks/ph-jupyter-notebook-example.ipynb) é um híbrido de algumas das abordagens pedagógicas descritas acima. A primeira seção do caderno destina-se a estudantes que têm pouca ou nenhuma experiência anterior executando o código; o principal resultado do aprendizado é comparar o tempo necessário para converter manualmente formatos de dados, em comparação com fazê-lo com código. Você poderia usar este caderno para uma sessão de laboratório prática em uma introdução à humanidades digitais ou história digital, onde todos os alunos instalam Anaconda e aprendem o básico do Jupyter Notebook. Se a turma tem uma mistura de alunos sem formação técnica e alunos com exposição prévia ao Python, você pode orientar os alunos com experiência de programação a trabalhar em conjunto em grupos de dois ou três para propor soluções para os prompts na segunda parte do notebook. Tenha em mente que se você usar uma tarefa de classe como esta como uma forma de fazer com que os alunos de ciência da computação escrevem código que ajude seu projeto de pesquisa, eles devem ser creditados como colaboradores e reconhecidos em publicações subsequentes vindas do projeto.[^4] + +Existem muitos cursos e workshops de 'Introdução ao Python' nas Humanidades Digitais que utilizam Jupyter Notebook (incluindo [Introdução ao Python e Desenvolvimento web com Python para as Humanidades](https://perma.cc/ANL2-K7SM) by Thibault Clérice, traduzido do material por Matthew Munson). O Jupyter Notebook também é comumente usado em oficinas de análise de texto, como a [oficina de vetores de palavras na DH 2018](https://perma.cc/5UZ9-25XW), ministrada por Eun Seo Jo, Javier de la Rosa e Scott Bailey. + +Ensinar com Jupyter Notebook nem sempre tem que envolver o processo demorado de baixar e instalar a Anaconda, especialmente se você está imaginando ter apenas uma ou duas lições que envolvem notebooks. Se suas atividades em sala de aula com Jupyter notebooks envolvem o uso de dados de exemplo que você já preparou, e se você já escreveu pelo menos parte do código, você pode querer explorar a execução de Jupyter Notebooks usando recursos gratuitos de computação em nuvem, desde que seus alunos tenham a garantia de ter conectividade confiável com a internet em sala de aula. Rodar notebooks na nuvem também fornece um ambiente consistente para todos os alunos, poupando você de ter que negociar diferenças entre Windows e Mac, ou fornecer uma alternativa para estudantes cujos laptops não têm espaço ou memória para executar Anaconda efetivamente. + +Como as opções estão evoluindo rapidamente, é melhor usar seu mecanismo de busca favorito para encontrar uma lista mais atualizada com opções de computação em nuvem para Jupyter Notebook. Um projeto que tem visto uma absorção particular entre usuários acadêmicos de notebooks é o [MyBinder](https://mybinder.org/). Nele você levará um repositório do GitHub que contém dados relacionados a ficheiro(s) `jupyter.ipynb` (imagens incorporadas, conjuntos de dados que você deseja usar os notebooks, etc.), as informações sobre pacotes e dependências necessários (em um `requisito.txt` ou `ficheiro-environment.yml`) e torná-lo incializável usando um servidor de nuvem. Uma vez que você tenha o pacote MyBinder até o seu repo GitHub, você pode adicionar um "crachá" binder ao ficheiro readme para o repo. Quem estiver vendo o relatório pode lançar o notebook diretamente do seu navegador, sem ter que baixar ou instalar nada. + +Como os dados que o notebook precisa acessar devem ser incluídos no repo, isso não funcionará para todas as situações (por exemplo, se os dados não podem ser redistribuídos legalmente no GitHub, excede o tamanho máximo de ficheiro(s) do GitHub e não podem ser baixados de outros lugares como parte da configuração do ambiente Binder, ou se você quiser que as pessoas usem o notebook com seus próprios dados), mas é uma ótima opção para oficinas ou aulas onde todos estão trabalhando com os mesmos dados compartilháveis. + +Se você quiser começar a explorar opções de nuvem, Shawn Graham criou [alguns modelos para configurar notebooks Python e R Jupyter para uso no Binder](https://perma.cc/T25E-BFH4). + +Finalmente, se você precisa manter seus notebooks fora da nuvem (por exemplo, devido a dados sensíveis ou de outra forma restritos), mas quiser fornecer um ambiente consistente para todos os seus alunos, você pode explorar o [JupyterHub,](https://perma.cc/8EH7-N22K) que tem sido adotado como infraestrutura técnica central para um número crescente de programas de ciência de dados. + +## Convertendo códigos Python + +Mesmo que você goste da ideia de usar Jupyter Notebooks, qualquer conversão de formato requer trabalho adicional. Se você já tem seu código escrito como scripts Python, a conversão para Os Jupyter Notebooks é bastante simples. Você pode copiar e colar o código do seu ficheiro.py em uma única célula de código de um novo notebook e, em seguida, dividir a célula de código em segmentos e adicionar células de Markdown adicionais conforme necessário. + +Alternativamente, pode ser mais fácil segmentar à medida que você transfere o código, copiando um segmento de cada vez em uma nova célula de código. Qualquer método funciona e é uma questão de preferência pessoal. + +Há também ferramentas como o [pacote 'p2j'](https://perma.cc/5YUE-YBH7) que convertem automaticamente o código Python existente em notebooks Jupyter, seguindo um conjunto documentado de convenções (por exemplo, transformando comentários em células Markdown). + +## Cadernos Jupyter para outras linguagens de programação + +Os Jupyter Notebooks permitem que você use muitas linguagens de programação diferentes, incluindo R, Julia, JavaScript, PHP ou Ruby. Uma lista atual de linguagens disponíveis pode ser encontrada na página do [Jupyter Kernels](https://perma.cc/B448-XMJQ) GitHub. + +Enquanto o Python é suportado por padrão quando você instala o Jupyter Notebook através da Anaconda, as outras linguagens de programação precisam ter seus núcleos de linguagens instalados antes que eles possam ser executados no Jupyter Notebook. As instruções de instalação são diferentes para cada núcleo de linguagem, por isso é melhor apenas encontrar e seguir as instruções para a sua linguagem preferida. Pelo menos para R, isso é relativamente simples. A página Jupyter Kernels GitHub tem links para instruções para todos os kernels de linguagens disponíveis. + +Uma vez que você tenha o kernel para a linguagem desejada instalado, você pode executar cadernos escritos nessa linguagem de programação, ou você pode criar seus próprios cadernos que executam essa linguagem. Cada linguagem com um kernel instalado em seu computador estará disponível como uma opção quando você criar um novo notebook como descrito acima. + +Como exemplo de um notebook R, [veja esta adaptação jupyter do código R de Andrew Piper de "Enumerações"](https://perma.cc/656B-U9SB). + +## Dimensionando a computação com Jupyter Notebooks + +Especialmente se você é novo em programar em Python, apenas conseguir qualquer coisa para trabalhar pode parecer uma vitória. No entanto, se você começar a trabalhar com conjuntos de dados maiores, poderá descobrir que algumas das “soluções” iniciais encontradas (como usar `readlines()` para ler um ficheiro de texto linha por linha) se tornam computacionalmente ineficientes, a ponto de causar problemas. Uma maneira de começar a entender as ineficiências em seu código é adicionar `%%timeit` ao topo de uma célula. O notebook escolherá um número de iterações para executar o código, dependendo da complexidade da tarefa, imprimirá o número de iterações e o tempo médio. Fazer várias iterações, em vez de apenas uma, pode ser útil para contabilizar pequenos atrasos no âmbito do sistema (por exemplo, se seu laptop estiver momentaneamente atolado com outros processos). Você pode colocar `%timeit` na frente da linha. Tenha cuidado com a ordenação significativa: ordenar uma aplicação pequena de muito mais tempo para a primeira iteração do que para a segunda, depois que a lista já estiver em ordem. Em casos como a classificação de listas em que não faz sentido medir várias iterações ou para tarefas de longa duração onde pequenos atrasos no sistema não terão um impacto significativo, você pode usar `%%time` no topo de uma célula ou `%time` na frente de uma linha, que mede o tempo que uma única execução leva. Esses comandos fazem parte de uma família de “comandos mágicos” integrados disponíveis em Jupyter Notebooks. Veja a [documentação do Jupyter](https://perma.cc/ED9F-DNDA) para saber de mais detalhes. + +Ter alguma ideia de aumento do tempo previsto para ser implementado é um requisito necessário para aumentar o tempo dos clusters em uso, como no caso dos clusters de programação de alto desempenho (HPC) financiados de forma centralizadamente, disponíveis em muitas instituições. A maioria esmagadora dos pesquisadores que usam esses recursos está nas ciências duras, mas geralmente qualquer membro do corpo docente pode solicitar acesso. É possível que você também possa ter acesso a recursos de HPC regionais ou nacionais. Esses recursos de computação podem acelerar significativamente grandes trabalhos de computação, especialmente tarefas como modelagem 3D que podem tirar proveito de nós computacionais com poderosas unidades de processamento gráfico (GPUs). Aprender a usar clusters HPC é um tópico suficientemente grande para sua própria lição, mas os notebooks Jupyter podem permitir que você pegue um atalho. Alguns grupos de computação de pesquisa oferecem maneiras mais fáceis para os pesquisadores executarem Jupyter Notebooks usando recursos de cluster HPC, e você pode encontrar [vários guias e exemplos de uso geral](https://perma.cc/A5R4-9ZD7) para fazê-lo. Se você conseguir acesso aos recursos do HPC, vale a pena contatar a equipe de TI que com computação para uma área de e pesquisar sobre como você pode executar o Jupyter Notebook caso você não lidou com sua redação a respeito no site da sua instituição. O TI que trabalha majoritariamente com pesquisa pode se comunicar de forma brusca do que você é de forma mais pessoal, mas não permite que a maioria dos humanos querem, porque usam a diversidade da sua base de usuários é importante para suas medidas de atuação na universidade. + +## Conclusão +Desde a experimentação do código até a documentação de fluxos de trabalho, da pedagogia à publicação acadêmica, o Jupyter Notebook é uma ferramenta flexível e multiuso que pode apoiar a pesquisa digital em diversos contextos. Mesmo que você não tenha certeza de como exatamente você vai usá-los, é bastante fácil instalar o software Jupyter Notebook e baixar e explorar notebooks existentes, ou experimentar alguns dos seus próprios. Os Jupyter Notebooks têm uma grande promessa de fazer a ponte das facetas críticas e computacionais da pesquisa de humanidades digitais. Para concluir com uma citação de Jed Dobson's _Critical Digital Humanities: The Search for a Methodology_. + +>Notebooks são teoria - não apenas código como teoria, mas teoria como reflexo reflexivo com o trabalho teórico e implicações do próprio código. As normas disciplinares, incluindo enquadramento contextual, teoria e autocrítica, precisam acompanhar, complementar e informar qualquer crítica computacional. Revelar o máximo possível do código, dos dados e dos métodos é essencial para permitir a conversa disciplinar em curso. Compilando-os juntos em um único objeto, que pode ser exportado, compartilhado, examinado e executado por outros, produz um tipo dinâmico de teorização que é modular, mas firmemente ligado ao seu objeto.[^5] + +## Links +- Uma lista crescente de notebooks [Jupyter para DH](https://perma.cc/V5JX-VPP8), em múltiplas linguagens humanas e de programação. Obrigado a todos que enviaram sugestões no Twitter; referências adicionais são bem-vindas. + - Uma descrição técnica detalhada da [instalação de pacotes Python do Jupyter](https://perma.cc/N6M6-ZF5G). + +## Agradecimentos +- Obrigado a Stéfan Sinclair pelas referências a discussões anteriores sobre o uso de notebooks em humanidades digitais. + +- Obrigado a Rachel Midura por sugerir o uso de Jupyter Notebooks para colaboração. + +[^1]: Knuth, Donald. 1992. Literate Programming Stanford, Califórnia: Centro para o Estudo da Linguagem e da Informação. + +[^2]: Millman, KJ e Fernando Perez. 2014. “Developing open source scientific practice”. In Implementing Reproducible Research, Ed. Victoria Stodden, Friedrich Leisch, and Roger D. Peng. [https://osf.io/h9gsd/](https://perma.cc/M8R7-9JTL) + +[^3]: Sinclair, Stéfan & Geoffrey Rockwell. 2013. “Voyant Notebooks: Literate Programming and Programming Literacy”. Journal of Digital Humanities, Vol. 2, No. 3 Summer 2013. [https://journalofdigitalhumanities.org/2-3/voyant-notebooks-literate-programming-and-programming-literacy/](https://perma.cc/R253-BP2B) + +[^4]: Haley Di Pressi, Stephanie Gorman, Miriam Posner, Raphael Sasayama, and Tori Schmitt, with contributions from Roderic Crooks, Megan Driscoll, Amy Earhart, Spencer Keralis, Tiffany Naiman, and Todd Presner. “A Student Collaborator’s Bill of Rights”. [https://humtech.ucla.edu/news/a-student-collaborators-bill-of-rights/](https://perma.cc/A8G2-BBL9) + +[^5]: Dobson, James. 2019. Critical Digital Humanities: The Search for a Methodology. Urbana-Champaign: University of Illinois Press. p. 40. diff --git a/pt/licoes/introducao-linha-comando-bash.md b/pt/licoes/introducao-linha-comando-bash.md index c5e80e0d16..1cf314c8dd 100644 --- a/pt/licoes/introducao-linha-comando-bash.md +++ b/pt/licoes/introducao-linha-comando-bash.md @@ -38,7 +38,7 @@ Muitas das lições do *Programming Historian* exigem que você insira comandos {% include figure.html filename="en-or-intro-to-bash-01.png" alt="Screenshot mostrando interface gráfica de um computador" caption="Figura 1. GUI do computador de Ian Milligan" %} -Interfaces de linha de comando possuem vantagens para usuários de computador que precisam de maior precisão em seu trabalho – tal como historiadores(as) digitais. Elas permitem maior detalhamento quando executando alguns programas, ao passo que você pode adicionar modificações para especificar exatamente como deseja que o programa seja executado. Além do mais, elas podem ser facilmente automatizadas através de [scripts](http://www.tldp.org/LDP/Bash-Beginners-Guide/html/chap_01.html), que são basicamente conjuntos de comandos baseados em texto. +Interfaces de linha de comando possuem vantagens para usuários de computador que precisam de maior precisão em seu trabalho – tal como historiadores(as) digitais. Elas permitem maior detalhamento quando executando alguns programas, ao passo que você pode adicionar modificações para especificar exatamente como deseja que o programa seja executado. Além do mais, elas podem ser facilmente automatizadas através de [scripts](https://www.tldp.org/LDP/Bash-Beginners-Guide/html/chap_01.html), que são basicamente conjuntos de comandos baseados em texto. Existem duas interfaces de linha de comando principais, ou "shells", que muitos historiadores e historiadoras digitais utilizam. No macOS[^1] e muitas distribuições Linux, o shell é conhecido como `bash`, ou o "bourne-again shell" (shell renascido). Para usuários(as) de sistemas baseados no Windows, a interface de linha de comando é por norma baseada em `MS-DOS`, que utiliza comandos e [sintaxe](https://perma.cc/WPA6-LJG8) distinta, mas que comumente pode desempenhar tarefas similares. Essa lição oferece uma introdução básica ao terminal `bash`, e usuários Windows podem acompanhá-la instalando shells populares como [Cygwin](https://www.cygwin.com/) ou Git Bash (veja abaixo). @@ -60,7 +60,7 @@ Quando você o executa, verá esta janela. {% include figure.html filename="en-or-intro-to-bash-03.png" alt="Screenshot de uma tela vazia do Terminal" caption="Figura 3. Uma tela vazia do terminal em nosso macOS" %} -Você pode querer alterar a aparência padrão de seu terminal, pois os olhos podem se cansar ao olhar repetidamente para um texto preto em fundo branco. Na aplicação padrão do macOS, você pode abrir o menu **Settings** nas **Preferences** no Terminal. Clique na guia **Settings** e altere-a para um novo esquema de cores. Pessoalmente, preferimos algo com um pouco menos de contraste entre o fundo e o texto, já que você estará olhando para esta tela por muito tempo. "Novel" é agradável, assim como o popular conjunto de paleta de cores "[Solarized]"(http://ethanschoonover.com/solarized). Para usuários Windows, um efeito similar pode ser alcançado utilizando a aba **Properties** do Git Bash. Para alcançá-la, clique com o botão direito do mouse em qualquer lugar na barra superior e selecione **Properties**. +Você pode querer alterar a aparência padrão de seu terminal, pois os olhos podem se cansar ao olhar repetidamente para um texto preto em fundo branco. Na aplicação padrão do macOS, você pode abrir o menu **Settings** nas **Preferences** no Terminal. Clique na guia **Settings** e altere-a para um novo esquema de cores. Pessoalmente, preferimos algo com um pouco menos de contraste entre o fundo e o texto, já que você estará olhando para esta tela por muito tempo. "Novel" é agradável, assim como o popular conjunto de paleta de cores "[Solarized]"(https://ethanschoonover.com/solarized). Para usuários Windows, um efeito similar pode ser alcançado utilizando a aba **Properties** do Git Bash. Para alcançá-la, clique com o botão direito do mouse em qualquer lugar na barra superior e selecione **Properties**. {% include figure.html filename="en-or-intro-to-bash-04.png" alt="Screenshot da tela de configurações do Terminal do macOS" caption="Figura 4. A tela de configurações da Aplicação Shell Terminal do macOS" %} @@ -198,7 +198,7 @@ Esse comando cria um diretório com o nome, como você pode imaginar, `ProgHist- Mas espere! Há um truque para tornar as coisas um pouco mais rápidas. Vá para o diretório anterior (`cd ..` - o que o levará de volta para a área de trabalho). Para navegar até o diretório `ProgHist-Text`, você poderia digitar `cd ProgHist-Text`. Alternativamente, você poderia digitar `cd Prog` e depois pressionar a tecla Tab. Você notará que a interface completa automaticamente a linha para `cd ProgHist-Text`. Pressionar a tecla tab a qualquer momento no shell irá tentar completar a linha com base nos ficheiros ou subdiretórios no diretório atual. No entanto, é sensível a maiúsculas e minúsculas. No exemplo anterior, `cd prog` não seria autocompletado para `ProgHist-Text`. Quando dois ou mais ficheiros têm os mesmos caracteres, o completar preencherá apenas até o primeiro ponto de diferença. Encorajamos o uso desse método ao longo da lição para ver como ele funciona. -Agora você precisa encontrar um ficheiro de texto simples para nos ajudar com o exemplo. Porque não usar um livro que sabemos ser longo, tal como o épico *Guerra e Paz* (em inglês), de Leon Tolstoy? O ficheiro de texto está disponível no [Projeto Gutenberg](http://www.gutenberg.org/ebooks/2600). Se você já instalou o [wget](/en/lessons/automated-downloading-with-wget) (em inglês), pode simplesmente digitar: +Agora você precisa encontrar um ficheiro de texto simples para nos ajudar com o exemplo. Porque não usar um livro que sabemos ser longo, tal como o épico *Guerra e Paz* (em inglês), de Leon Tolstoy? O ficheiro de texto está disponível no [Projeto Gutenberg](https://www.gutenberg.org/ebooks/2600). Se você já instalou o [wget](/en/lessons/automated-downloading-with-wget) (em inglês), pode simplesmente digitar: `wget http://www.gutenberg.org/files/2600/2600-0.txt` diff --git a/pt/licoes/introducao-mysql-r.md b/pt/licoes/introducao-mysql-r.md index 1696e597b7..1d5f1706ed 100644 --- a/pt/licoes/introducao-mysql-r.md +++ b/pt/licoes/introducao-mysql-r.md @@ -1,944 +1,944 @@ ---- -title: Introdução ao MySQL com R -layout: lesson -slug: introducao-mysql-r -authors: -- Jeff Blackadar -date: 2018-05-03 -translation_date: 2021-12-18 -editors: -- Amanda Visconti -reviewers: -- Jesse Sadler -- Simon Appleford -translator: -- Jéssica Evelyn Santos -translation-editor: -- Daniel Alves -translation-reviewer: -- Dália Guerreiro -- Leonardo F. Nascimento -difficulty: 2 -review-ticket: https://github.com/programminghistorian/ph-submissions/issues/439 -collection: lessons -activity: transforming -topics: [data-manipulation, distant-reading, r, data-visualization] -abstract: "Esta lição ajudará a armazenar grandes quantidades de dados históricos de maneira estruturada, pesquisar e filtrar esses dados e visualizar alguns dos dados como um gráfico." -original: getting-started-with-mysql-using-r -avatar_alt: Uma mão a segurar um jornal -doi: 10.46430/phpt0025 ---- - -Esta lição é direcionada aos que desejam armazenar grandes quantidades de dados de projetos de história digital de uma forma estruturada. Usaremos um sistema de gerenciamento de dados chamado MySQL para armazenar os dados. - -A linguagem R permite realizar análises e armazenar dados sem que um banco de dados relacional seja utilizado. No entanto, há situações nas quais a inclusão de bancos de dados é muito útil, dentre elas: - -- Publicar os resultados de um script em R num *web site* com dados interativos -- Manipular mais dados do que o R pode armazenar em sua própria memória -- Quando os dados já estão armazenados num banco de dados relacional -- Trabalhar com dados de entidades diferentes que são relacionados uns com os outros. Um exemplo seria um banco de dados de soldados de dois exércitos distintos que lutaram numa batalha, onde gostaríamos de saber qual esquadrão, pelotão, companhia e brigada cada soldado fazia parte. - -Uma breve discussão do tema pode ser encontrada no [*blog* de Jason A. French's](hhttps://perma.cc/5VYV-L5PG)[^1]. - -Ao final desta lição, será possível instalar um sistema de gerenciamento de banco de dados em seu computador, criar uma tabela de banco de dados, armazenar informações na tabela e realizar consultas dos dados. Na conclusão da lição, utilizaremos uma consulta do banco de dados para construir um gráfico. - -Usaremos a linguagem de programação R para os exemplos, mas as técnicas podem ser utilizadas com outras linguagens, como Python. - -Para fazer essa lição será necessário um computador com permissão para instalar os programas R e RStudio, entre outros, se já não estiverem instalados. Além da programação em R, também instalaremos alguns componentes de um sistema de gerenciamento de banco de dados chamado MySQL, que funciona nos sistemas operacionais Windows, Mac e Linux. - -Possuir algum conhecimento de instalação de programas e organização de dados em campos é útil para essa lição, cujo nível de dificuldade é mediano. - -{% include toc.html %} - -# Introdução - -O MySQL é um banco de dados relacional usado para armazenar e consultar informações. Esta lição utilizará a linguagem R para fornecer um tutorial e exemplos para: - -- Configurar e realizar uma conexão a uma tabela no MySQL -- Armazenar registros em tabelas -- Consultar informações de tabelas - -Neste tutorial, construiremos um banco de dados de artigos de periódicos que contém palavras de uma busca numa hemeroteca digital. O script armazenará o título, a data publicada e a URL de cada artigo num banco de dados. Utilizaremos outro script para realizar consultas no banco de dados e procurar por padrões historicamente relevantes. Os dados de amostra serão fornecidos pelo arquivo de periódicos [Welsh Newspapers Online](https://perma.cc/9EHD-EVEX). Estamos trabalhando com o objetivo de produzir uma lista de artigos à qual possamos consultar informações. Ao final da lição, vamos executar uma consulta para gerar um gráfico do número de artigos de periódicos no banco de dados, para verificar se há um padrão relevante. - -# Programas necessários - -R, R Studio, MySQL Server e MySQL Workbench são os programas necessários para esta lição. Algumas notas sobre a instalação desses pacotes de programas podem ser encontradas abaixo. - -## R - -Na lição [Processamento Básico de Texto em R](/pt/licoes/processamento-basico-texto-r)[^2], Taylor Arnold e Lauren Tilton fornecem um resumo excelente do conhecimento da linguagem R necessária para esta lição. Apenas um conhecimento básico de R é esperado. A lição [Noções básicas de R com dados tabulares](/pt/licoes/nocoes-basicas-R-dados-tabulares), de Taryn Dewar,[^3] aborda como instalar o R e se familiarizar com a linguagem. - -### Download do R - -Você pode realizar o download do R no [Comprehensive R Archive Network](https://cran.r-project.org/). Clique no link que corresponde ao sistema operacional do seu computador. Selecione *base* para instalar o R pela primeira vez. Uma vez que o ficheiro foi baixado, clique no ficheiro para executar o instalador. - -## RStudio - -Os exemplos desta lição utilizam o RStudio, que é uma interface de desenvolvimento para escrever e executar scripts em R. Esta lição usou a versão 1.4.1717 do RStudio. - -### Download do RStudio - -Faça o download do RStudio através do [rstudio.com](https://www.rstudio.com/products/rstudio/#Desktop) e instale-o. Já que o RStudio é de código aberto, você pode selecionar a versão gratuita do RStudio Desktop, rolar a página para baixo e clicar num dos instaladores que corresponda ao sistema operacional de seu computador. Uma vez que o download foi realizado, clique no ficheiro para executar o instalador. - -## MySQL - -SQL significa *Structured Query Language* (Linguagem estruturada de consulta), que é um conjunto de comandos para armazenar e recuperar informações a partir de um banco de dados relacional. MySQL é um tipo de sistema de gerenciamento de banco de dados relacionais. Há muitos outros, como Microsoft SQL Server, IBM DB2 e Microsoft Access. Esta lição utiliza o MySQL porque é um programa de código aberto, utilizado por uma grande comunidade, tem uma longa trajetória e possui uma versão gratuita que pode ser utilizada. - -### Realizando o download e instalando o MySQL - -Nesta seção, iremos instalar o MySQL, que é o sistema que mantém o banco de dados, assim como o MySQL Workbench, que é onde se trabalha para configurar a estrutura do banco de dados. Para usar o MySQL,o MySQL Workbench não é necessário, podem ser utilizados apenas comandos digitados. Esta lição utiliza o MySQL Workbench porque é uma *GUI* (Interface gráfica do usuário) que facilita o aprendizado de MySQL. - -Conclua essas instruções para instalar o MySQL Community Server e o MySQL Workbench em seu computador. - -### MySQL Community Server - -Este é o servidor onde o banco de dados é armazenado. Sua instalação é necessária para que seja possível conectar e armazenar os dados. Abaixo, faremos o download dos ficheiros, a instalação e iniciaremos o servidor. Esta lição utilizou a versão 8.0.21 do MySQL e 8.0.26 do MySQL Workbench. - -#### Fazendo o download do ficheiro de instalação do MySQL Community Server - -Clique neste link: [https://dev.mysql.com/downloads/mysql/](https://dev.mysql.com/downloads/mysql/). Role a página para baixo e selecione o sistema operacional que corresponde ao seu computador. Se necessário, clique em **Select Operating System** para selecionar o sistema operacional. Uma vez feita essa operação, clique no botão azul **Go to Download Page**. Depois clique no botão azul **Download**. Na página de download, role para baixo e terá a opção de começar o download clicando em **No thanks, just start my download** (Não, obrigado, apenas inicie o download). - -#### Instalação do MySQL Community Server - -Abaixo se encontram as dicas de instalação para PC e Mac: - -##### Dicas de instalação para PC - -A maneira recomendada de instalar os componentes do MySQL é através do instalador do MySQL para Windows. Com o ficheiro já baixado, clique duas vezes no ficheiro para instalá-lo. Siga as instruções para aceitar a licença (nota de tradução: com o instalador MySQL para Windows pode optar por fazer de uma vez só a instalação do MySQL Server e do MySQL Workbench; para isso, escolha os respectivos componentes e siga as instruções abaixo). -Depois que os componentes forem instalados, serão solicitadas as seguintes opções: - -###### 1. Escolhendo um tipo de configuração - -Selecione: **Developer Default** (Padrão do desenvolvedor). Esta opção *instala o MySQL Server e as ferramentas necessárias para o desenvolvimento da aplicação. Isto é útil se pretendes desenvolver aplicações para um servidor existente.* -(Ver abaixo) - -{% include figure.html filename="introducao-ao-mysql-e-r-1.PNG" caption="Configure o tipo de padrão do desenvolvedor" %} - -###### 2. Verificar Requisitos - -Clique no botão **Execute** caso haja requisitos pendentes (*failing requirements*) listados na checagem de requisitos. A lista de requisitos pode ser diferente da mostrada aqui. Uma vez que o processo de executar instalar os requisitos pendentes, clique no botão *Next* . -(Ver abaixo) - -{% include figure.html filename="introducao-ao-mysql-e-r-2.PNG" caption="Clique no botão *Execute* se necessário" %} - -###### 3. Tipo e Rede (1) - -Selecione: **Standalone MySQL Server** -(Ver abaixo) - -{% include figure.html filename="getting-started-with-mysql-7.png" caption="Select Standalone MySQL Server" %} - -###### 4. Tipo e Rede (2) - -*Config type*: Selecione: **Development Computer** -Checar: TCP/IP. Port number (Número da porta): 3306. -(Ver abaixo) - -{% include figure.html filename="introducao-ao-mysql-e-r-4.png" caption="Development Computer TCPIP Port 3306" %} - -###### 5. Contas e Funções - -{% include figure.html filename="introducao-ao-mysql-e-r-5.png" caption="Digite a senha *root* e depois guarde-a em local seguro" %} - -###### 6. Serviço do Windows - -As configurações aqui são opcionais, mas achamos mais fácil configurar o MySQL como um serviço do Windows e inclui-lo na inicialização automática. Um serviço do Windows é um processo que é executado no computador enquanto se está trabalhando. É possível mudar as configurações do serviço do Windows posteriormente, para iniciar o MySQL manualmente, para impedir que o programa inicialize quando não for necessário. - -{% include figure.html filename="introducao-ao-mysql-e-r-6.png" caption="MySQL como um serviço do Windows" %} - -Clique nos botões *Execute* e *Next* para finalizar a instalação e inicializar o servidor. - -###### 7. MySQL Workbench e Senha Root - -Procure por MySQL Workbench no menu de inicialização do Windows, sob o item MySQL. Se está lá, clique para iniciar. Caso não esteja, clique no instalador do MySQL - Community para executar novamente a instalação e adicionar o MySQL Workbench aos componentes instalados. -Depois de aberto o MySQL Workbench, clique na instância local do seu MySQL Server. -Quando a senha *root* for solicitada, digite a senha criada na etapa *5. Accounts and Roles*. -(Ver abaixo) - -{% include figure.html filename="introducao-ao-mysql-e-r-7.png" caption="Senha Root" %} - -##### Dicas de instalação para um Mac - -###### 1. Instalação do MySQL Community Server - -Com o ficheiro de instalação do MySQL Community Server baixado, clique duas vezes no ficheiro para instalá-lo. (Ver abaixo) - -{% include figure.html filename="introducao-ao-mysql-e-r-8.png" caption="Ficheiro de instalação" %} - -###### 2. Guarde a senha temporária - -Siga as instruções para aceitar a licença e o local de instalação. **Importante: Uma senha temporária será solicitada. Guarde-a cuidadosamente.** (Veja o exemplo abaixo. Sua senha temporária será diferente da mostrada abaixo.) Se um erro for cometido, é possível remover o servidor instalado e reinstalá-lo, mas essa é uma pequena complicação. Um dos revisores dessa lição achou que [essa resposta do StackOverflow](https://perma.cc/J4Q5-SLK5) pode auxiliar nesta etapa. - -{% include figure.html filename="getting-started-with-mysql-18.png" caption="Senha temporária" %} - -Concluída a instalação, iremos alterar a senha *root* para o servidor do MySQL. - -###### 3. Modifique a senha do servidor do MySQL - -**Esta seção da lição causou dificuldade para algumas pessoas. Leve o tempo que for necessário e note, por favor, que os comandos do MySQL terminam com um ponto e vírgula. Observe-os em alguns dos comandos abaixo.** - -3.1. Abra uma janela do terminal - -3.2. Adicione /usr/local/mysql/bin ao PATH através do comando abaixo. O PATH é uma lista de diretórios que o computador considera quando um comando é digitado para executar um programa. No próximo passo abaixo, ao executar o *mysql*, o PATH busca pelos diretórios que contém o programa *mysql*. O PATH procura pelo *mysql* no diretório */usr/local/mysql/bin* e o executa. O PATH apenas salva o caminho completo que for digitado, nesse caso, */usr/local/mysql/bin/mysql*, para um programa quando se quer executá-lo. - -``` -export PATH=${PATH}:/usr/local/mysql/bin -``` - -3.3. Inicie o servidor do MySQL. - -Vá até System Preferences > imagem do MySQL > clique em "Start MySQL server". - -3.4. Inicie uma sessão no MySQL. No comando abaixo, depois de *--password*, digite a senha guardada no passo *2. Guarde a senha temporária*. - -``` -mysql --user=root --password=senha_root_guardada_acima -``` - -3.5. Configure a senha *root* para uma **nova** senha. Escolha e guarde a nova senha cuidadosamente. No *prompt* mysql> , digite o seguinte comando, substituindo a nova senha entre aspas simples no comando SET PASSWORD=PASSWORD('password') com a nova senha criada. - -``` -SET PASSWORD=PASSWORD('nova_senha_criada_na_etapa_3.5'); -``` - -3.6. Reinicie o computador. Depois de reiniciar, é possível que seja necessário repetir a etapa *3.3 Inicie o servidor do MySQL* acima. - -###### 4. Download do MySQL Workbench - -Clique nesse link: [http://dev.mysql.com/downloads/workbench/](http://dev.mysql.com/downloads/workbench/). Role a página para baixo e clique em **Select Operating System** para selecionar o sistema operacional que corresponde ao seu computador. Se necessário, clique em **Select OS Version** para selecionar a versão do sistema operacional. Feito isso, clique no botão azul de **Download**. Na página de download, role para baixo e terá a opção de iniciar o download ao clicar em **No thanks, just start my download.** (Não, obrigado, apenas inicie o download.) - -Com o ficheiro baixado, clique duas vezes para instalá-lo. Feita a instalação do MySQL Workbench de acordo com as instruções na tela, arraste o ícone para a pasta de aplicações da esquerda. (Ver abaixo) - -{% include figure.html filename="introducao-ao-mysql-e-r-10.png" caption="MySQL Workbench" %} - -# Crie um banco de dados - -Aqui iremos criar um banco de dados que serve como um contentor para as tabelas nas quais armazenaremos informações. Uma tabela é a estrutura que mantém os dados que queremos armazenar. Tabelas contém muitas linhas de registros. Um exemplo de informações básicas de contato conteria campos para nome, número de telefone e endereço de e-mail. Numa tabela, os campos são organizados por *colunas*. - -Aqui está uma tabela de amostra com uma linha de dados que representa um registro: - -| nome | número de telefone | endereço de e-mail | -| ----------- | ------------------ | ------------------ | -| Pat Abraham | 613-555-1212 | pat@zmail.ca | - -## Abra o MySQL Workbench - -Abra o MySQL Workbench. Clique duas vezes em *Local Instance MySQL80* (num Mac isto pode aparecer como *Local Instance 3306*). É possível que a senha *root* criada nas etapas acima seja solicitada. Em alguns Macs, uma aba de *Query* será aberta; se não for, abra uma aba de *Query* utilizando: *File > New Query Tab*. - -## Crie um banco de dados - -Agora iremos criar um novo banco de dados. Utilizando o MySQL Workbench, realize os seguintes passos: - -1. Na janela de **Query**, digite: - - ``` - CREATE DATABASE periodicos_resultados_pesquisa; - ``` - -2. Execute o comando CREATE DATABASE. Clique no **relâmpago/raio** ou, utilizando o menu, clique em *Query* e então em *Execute Current Statement*. - -3. O novo banco de dados **periodicos_resultados_pesquisa** deve estar visível na aba **SCHEMAS**, no canto superior esquerdo da tela. Se não conseguir visualizar um item chamado periodicos_resultados_pesquisa, clique no botão de atualizar. - -(Ver abaixo:) - -{% include figure.html filename="introducao-ao-mysql-e-r-11.png" caption="Crie um banco de dados no MySQL Workbench" %} - -## USE o banco de dados - -Em seguida, iremos inserir uma declaração USE para informar ao MySQL qual banco de dados será usado. Isto se torna mais importante quando se tem mais de um banco de dados no computador. - -Na janela de **Query**, apague todo o comando CREATE DATABASE e digite: - -``` -USE periodicos_resultados_pesquisa; -``` - -Novamente, clique no **relâmpago/raio** ou, usando o menu, clique em *Query* e então em *Execute Current Statement*. É possível usar a tecla de teclado para isso. Num Mac, use *Command+Return*. Num PC, use *Ctrl+Shift+Enter*. A partir desse ponto da lição, todas as vezes que um comando for digitado na janela de *Query* será executado desta maneira. - -(Ver abaixo:) - -{% include figure.html filename="introducao-ao-mysql-e-r-12.png" caption="USE um banco de dados no MySQL Workbench" %} - -# Adicione uma tabela - -1. No MySQL Workbench, procure no lado esquerdo no painel **Navigator**, na aba **SCHEMAS**, por **periodicos_resultados_pesquisa**. -2. Clique em **Tables** com o lado direito do mouse e depois clique em **Create Table**. -3. Para **Table Name:** digite **tbl_periodicos_resultados_pesquisa** - -## Adicione colunas à tabela - -Adicione essas colunas: - -1. **id** Data type: **INT**. Clique PK (Primary Key), NN (Not Null) e AI (Auto Increment). Esta coluna de *id* será usada para relacionar registros nesta tabela com registros em outras tabelas. -2. **titulo_artigo** Data type: **VARCHAR(99)**. Esta coluna armazenará o título de cada resultado de artigo que coletarmos da busca. -3. **data_publicacao_artigo** Data type: **DATETIME**. Esta coluna armazenará a data em que o periódico foi publicado. -4. **url_artigo** Data type: **VARCHAR(99)**. Esta coluna armazenará a url de cada resultado que coletarmos da pesquisa. -5. **termo_busca_usado** Data type: **VARCHAR(45)**. Esta coluna irá armazenar a palavra que usamos para buscar os periódicos. - Clique no botão **Apply**. - -Se preferir, todas as etapas acima podem ser realizadas com um comando. Este comando pode ser executado na janela de *Query* para criar a tabela com as colunas indicadas acima. - -``` -CREATE TABLE periodicos_resultados_pesquisa.tbl_periodicos_resultados_pesquisa ( -id INT NOT NULL AUTO_INCREMENT, -titulo_artigo VARCHAR(99) NULL, -data_publicacao_artigo DATETIME NULL, -url_artigo VARCHAR(99) NULL, -termo_busca_usado VARCHAR(45) NULL, -PRIMARY KEY (id)); -``` - -*Dica: Leve o tempo que for necessário para pensar sobre a elaboração da tabela e sua nomeação, uma vez que um banco de dados bem elaborado será mais fácil de trabalhar e entender.* - -## Adicione um usuário para se conectar ao banco de dados - -Um usuário é uma conta que tem permissão para se conectar a um banco de dados. Abaixo, adicionaremos um novo usuário para que essa conta apenas se conecte a esse novo banco de dados. Usar essa conta de usuário para uma conexão com esse banco de dados limita a exposição a outros bancos de dados, caso a senha para este usuário seja comprometida. Dar ao usuário os privilégios mínimos requeridos para realizar o necessário reduz o risco, caso outra pessoa tiver acesso à senha de usuário. Por exemplo, se um usuário pode apenas ler um banco de dados, é um risco menor se a senha for descoberta do que um usuário que também pode alterar ou apagar o banco de dados. - -No menu do MySQL Workbench, clique em **Server** e depois em **Users and Privileges** - -**Usuários de Mac** - Em alguns computadores Mac, como meu laptop de teste, o painel de **Schema Privileges** não é exibido corretamente. Veja a nota abaixo da captura de tela se isso ocorrer. - -Clique no botão **Add Account** e complete os detalhes para a nova conta de usuário na caixa de diálogo: - -1. Login Name: **periodicos_pesquisa_usuario** -2. Authentication Type: selecione **Standard** -3. Limit to Hosts Matching: **localhost** -4. Tecle *Enter* e confirme uma senha *AlgoDificil* -5. Clique na aba **Administrative Roles**. Certifique-se de que nada está marcado. Esta conta é apenas para acessar o banco de dados. -6. Clique na aba **Schema Privileges** e clique **Add Entry**. -7. Na caixa de diálogo **New Schema Privilege Definition**, clique na caixa de seleção **Selected schema:** e selecione **periodicos_resultados_pesquisa**. Clique OK. -8. Clique em todas as opções de *Object Rights*: SELECT, INSERT, UPDATE, DELETE, EXECUTE, SHOW VIEW, como mostrado na imagem abaixo. (Este usuário precisará fazer muitas coisas posteriormente na lição, por isso, estamos lhe concendendo várias permissões.) -9. Clique em **Apply**. - -{% include figure.html filename="introducao-ao-mysql-e-r-13.PNG" caption="Configurando permissões para a nova conta" %} - -### Schema Privileges não exibidos corretamente - -Alguns computadores Mac, como meu laptop de teste, não exibem corretamente o painel de **Schema Privileges**. Nesse caso, é possível realizar a tarefa acima através de um *script* usando uma janela de Query. - -Se o usuário já foi criado acima, execute o seguinte comando para lhe conceder privilégios de usuário: - -``` -GRANT SELECT, INSERT, UPDATE, DELETE, EXECUTE, SHOW VIEW ON periodicos_resultados_pesquisa.* TO 'periodicos_pesquisa_usuario'@'localhost'; -``` - -Se o usuário não foi criado ainda, execute estes dois comandos para criar um usuário e depois lhe conceder privilégios de usuário: - -``` -CREATE USER 'periodicos_pesquisa_usuario'@'localhost' IDENTIFIED BY 'AlgoDificil'; -GRANT SELECT, INSERT, UPDATE, DELETE, EXECUTE, SHOW VIEW ON periodicos_resultados_pesquisa.* TO 'periodicos_pesquisa_usuario'@'localhost'; -``` - -### MySQL versão 8 e tipo de autenticação de usuário. - -Quando um usuário é criado no MySQL 8 Workbench o **Authentication Type** (tipo de autenticação) é configurado para o padrão **caching_sha2_password**. Esse tipo de autenticação causa um erro para o pacote R que usaremos para conectar o banco de dados mais tarde nesta lição. O erro é *Authentication plugin 'caching_sha2_password' cannot be loaded* e é descrito no [Stack Overflow](https://perma.cc/7NVR-TSYT). - -Para evitar esse erro, podemos modificar o tipo de autenticação do usuário para padrão (Standard). Para fazer isso, execute o seguinte comando na janela de *Query*: - -``` -ALTER USER 'periodicos_pesquisa_usuario'@'localhost' IDENTIFIED WITH mysql_native_password BY 'AlgoDificil'; -``` - -# Crie um R Script que se conecte ao banco de dados - -Abra o RStudio, que foi instalado anteriormente na lição. Veja a seção [RStudio](#rstudio). - -Agora usaremos o RStudio para escrever um novo R Script e salvá-lo com o nome periodicos_resultados_pesquisa.R. - -Vá em File > New File > R Script e depois salve o novo ficheiro com o nome periodicos_resultados_pesquisa.R. - -Usaremos o pacote RMariaDB para realizar a conexão com o MySQL. (Se tiver curiosidade, a documentação para o pacote RMariaDB pode ser encontrada [aqui](https://perma.cc/FX5P-DAW7).) - -Se não possui o pacote RMariaDB instalado (o que é provável, caso seja a primeira vez que usa o RStudio), instale-o utilizando o _console_ do RStudio. Após abrir o RStudio, copie e cole o seguinte para a janela da esquerda no >, e depois dê enter: - -``` -install.packages("RMariaDB") -``` - -Adicione o seguinte comando ao script periodicos_resultados_pesquisa.R (janela de cima, à esquerda) - -``` -library(RMariaDB) -``` - -## Conectando a um banco de dados com uma senha - -Primeiro, nos conectaremos ao banco de dados usando uma senha. (Depois utilizaremos um meio de conexão melhor). Por hora, usaremos uma variável para armazenar a senha. Cada vez que iniciar o R, será necessário apagar esta variável, mas isso é melhor do que publicar uma senha *hardcoded* caso compartilhe seus scripts, como pode fazer usando o GitHub. - -No console do RStudio, digite o comando abaixo, substituindo *AlgoDificil* com a senha criada para periodicos_pesquisa_usuario nos passos realizados acima para adicionar um usuário ao banco de dados. - -``` -senhadeusuariolocal <- "AlgoDificil" -``` - -Adicione as seguintes declarações em R ao ficheiro periodicos_resultados_pesquisa.R file e salve-o. - -Para executar este script, selecione todo o texto e clique no botão *Run* (Executar). (Há outras maneiras de executar apenas uma parte do script ou o script inteiro. Se tiver curiosidade, procure no menu abaixo de Code > Run Region. O comando CTRL+ALT+R executa todo o código em R no script.) - -``` -library(RMariaDB) -# O método de conexão abaixo utiliza uma senha armazenada numa variável. -# Para utilizar isto, configure senhadeusuariolocal="A senha de periodicos_pesquisa_usuario" - -artigosDb <- dbConnect(RMariaDB::MariaDB(), user='periodicos_pesquisa_usuario', password=senhadeusuariolocal, dbname='periodicos_resultados_pesquisa', host='localhost') -dbListTables(artigosDb) -dbDisconnect(artigosDb) -``` - -No console, deverá visualizar: - -``` -> dbListTables(artigosDb) -[1] "tbl_periodicos_resultados_pesquisa" -> dbDisconnect(artigosDb) -``` - -Sucesso! O que conseguiu: - -1. Conectar ao banco de dados com dbConnect. -2. Listar a tabela no banco de dados com dbListTables. -3. Desconectar do banco de dados usando dbDisconnect. - -### Conectar-se ao banco de dados com uma senha armazenada num ficheiro de configuração - -O exemplo acima de conexão é uma das maneiras de conectar-se. O método de conexão descrito abaixo armazena a informação da conexão do banco de dados num ficheiro de configuração, para que não seja necessário digitar uma senha numa variável todas as vezes que uma sessão no R for iniciada. Acredito que esse é um processo minucioso, mas é uma maneira mais padronizada e segura de proteger as credenciais usadas para acessar seu banco de dados. Esse método de conexão será usado no código para o restante desse tutorial, mas pode ser substituído pelo método de conexão mais simples mostrado acima se preferir. - -#### Crie o ficheiro .cnf para armazenar a informação de conexão com o banco de dados MySQL - -1. Abra um editor de texto, como o notepad, nano ou TextEdit e cole os itens abaixo, modificando a senha para a criada para periodicos_pesquisa_usuario nas etapas acima para adicionar um usuário e conectá-lo ao banco de dados. - -``` -[periodicos_resultados_pesquisa] -user=periodicos_pesquisa_usuario -password=AlgoDificil -host=127.0.0.1 -port=3306 -database=periodicos_resultados_pesquisa -``` - -2. Salve este ficheiro em algum local fora do diretório de trabalho do R. Salvei o meu no mesmo diretório de outros ficheiros de configuração do MySQL. No PC, o caminho foi o seguinte: C:\Program Files\MySQL\MySQL Server 8.0. Dependendo de seu sistema operacional e da versão do MySQL, esse local pode estar em outro lugar. No Mac, usei /Users/blackadar/Documents/ como a pasta de destino. Testei colocar este ficheiro em lugares diferentes, apenas é necessário que o R possa localizá-lo quando o script for executado. Nomeie o ficheiro como **periodicos_resultados_pesquisa.cnf**. - -3. Atualize o script periodicos_resultados_pesquisa.R acima para conectar-se ao banco de dados usando o ficheiro de configuração. - -``` -library(RMariaDB) -# O método de conexão abaixo utiliza uma senha armazenada num ficheiro de configuração. - -# O R precisa de um caminho completo para encontrar o ficheiro de configuração. -rmariadb.settingsfile<-"C:/Program Files/MySQL/MySQL Server 8.0/periodicos_resultados_pesquisa.cnf" - -rmariadb.db<-"periodicos_resultados_pesquisa" -artigosDb<-dbConnect(RMariaDB::MariaDB(),default.file=rmariadb.settingsfile,group=rmariadb.db) - -# Lista a tabela. Isso confirma que estamos conectados ao banco de dados. -dbListTables(artigosDb) - -# Desconecta para limpar a conexão com o banco de dados. -dbDisconnect(artigosDb) -``` - -4. Execute seu script. - -No console, entre outras linhas, deverá ver novamente: - -``` -> dbListTables(artigosDb) -[1] "tbl_periodicos_resultados_pesquisa" -``` - -De maneira bem sucedida, a conexão com o banco de dados foi realizada utilizando um ficheiro de configuração. - -# Armazenando dados numa tabela com o SQL - -Nesta seção da lição, criaremos uma declaração no SQL para inserir uma linha de dados no banco de dados sobre esse [artigo de periódico](https://perma.cc/C8MR-WYV2). Iremos inserir primeiro o registro no MySQL Workbench e depois faremos isso no R. - -1. No MySQL Workbench, clique na imagem categorizada como SQL+ para criar uma nova aba para o SQL executar consultas (ou vá ao menu "File" e escolha a opção "New Query Tab"). - -2. Cole a declaração abaixo na janela de Query. Esta ação irá inserir um registro na tabela. - - ``` - INSERT INTO tbl_periodicos_resultados_pesquisa ( - titulo_artigo, - data_publicacao_artigo, - url_artigo, - termo_busca_usado) - VALUES('THE LOST LUSITANIA.', - '1915-05-21', - LEFT(RTRIM('http://newspapers.library.wales/view/4121281/4121288/94/'),99), - 'German+Submarine'); - ``` - -3. Clique na imagem de relâmpago/raio na aba do SQL para executar a declaração SQL. - -{% include figure.html filename="introducao-ao-mysql-e-r-14.png" caption="Inserindo um registro numa tabela usando MySQL Workbench" %} - -## Explicação da declaração INSERT - -| SQL | Significado | -| --------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| INSERT INTO tbl_periodicos_resultados_pesquisa ( | Insere um registro na tabela nomeada tbl_periodicos_resultados_pesquisa | -| titulo_artigo, | nome do campo a ser preenchido por um valor | -| data_publicacao_artigo, | " | -| url_artigo, | " | -| termo_busca_usado) | " | -| VALUES('THE LOST LUSITANIA.', | O valor a ser inserido no campo titulo_artigo | -| '1915-05-21', | campo data_publicacao_artigo | -| LEFT(RTRIM('http://newspapers.library.wales/view/4121281/4121288/94/'),99), | campo url_artigo. Este campo é um VARCHAR(99), portanto tem um máximo de 99 caracteres. Inserir uma URL mais longa que 99 caracteres causaria um erro, portanto, duas funções são utilizadas para controlar isso. RTRIM() reduz espaços residuais à direita da URL. LEFT(value,99) retorna apenas os 99 caracteres mais à esquerda da URL reduzida. Esta URL é mais curta que isso, então essas funções estão aqui apenas como exemplo. | -| 'German+Submarine'); | campo termo_busca_usado | - -Opcional: Modifique a declaração INSERT acima e execute-a algumas vezes. Por exemplo: - -``` -INSERT INTO tbl_periodicos_resultados_pesquisa ( -titulo_artigo, -data_publicacao_artigo, -url_artigo, -termo_busca_usado) -VALUES('test insert.', -'1916-07-01', -LEFT(RTRIM('http://newspapers.library.wales/view/4121281/4121288/94/'),99), -'German+Submarine'); -``` - -## Consultando dados numa tabela com o SQL - -Nesta seção da lição, criaremos uma declaração no SQL para selecionar uma linha de dados do banco de dados que inserimos. Selecionaremos o primeiro registro no MySQL Workbench e depois faremos isso no R. - -1. Cole a declaração abaixo numa janela de query no MySQL Workbench. Isto irá selecionar registros da tabela. - - ``` - SELECT titulo_artigo FROM tbl_periodicos_resultados_pesquisa; - ``` - -2. Clique na imagem de relâmpago/raio na aba do SQL para executá-la. Deverá visualizar o título do artigo "THE LOST LUSITANIA." na grade de resultados. Ver abaixo. - -{% include figure.html filename="introducao-ao-mysql-e-r-15.png" caption="Selecionando registros de uma tabela usando MySQL Workbench" %} - -Opcional: Modifique a declaração SELECT acima alterando os campos selecionados e execute novamente. Adicione mais de um campo para a declaração SELECT e execute: - -``` -SELECT titulo_artigo, data_publicacao_artigo FROM tbl_periodicos_resultados_pesquisa; -``` - -## Armazenando dados numa tabela com SQL usando R - -Vamos fazer isso usando R! Abaixo se encontra uma versão expandida do R Script que usamos para nos conectar ao banco de dados. Para sermos concisos, os três primeiros comentários que tínhamos no R Script mostrado acima foram removidos. Não são mais necessários. - -Na linha 4 do script abaixo, lembre-se de modificar o caminho do rmariadb.settingsfile que corresponde à localização desse ficheiro em seu computador. - -``` -library(RMariaDB) -# O método de conexão abaixo utiliza uma senha armazenada num ficheiro de configuração. - -# O R precisa de um caminho completo para encontrar o ficheiro de configuração. -rmariadb.settingsfile<-"C:/Program Files/MySQL/MySQL Server 8.0/periodicos_resultados_pesquisa.cnf" - -rmariadb.db<-"periodicos_resultados_pesquisa" -artigosDb<-dbConnect(RMariaDB::MariaDB(),default.file=rmariadb.settingsfile,group=rmariadb.db) - -# Opcional. Liste a tabela. Isso confirma que nos conectamos ao banco de dados. -dbListTables(artigosDb) - -# Cria a declaração de query. -query<-"INSERT INTO tbl_periodicos_resultados_pesquisa ( -titulo_artigo, -data_publicacao_artigo, -url_artigo, -termo_busca_usado) -VALUES('THE LOST LUSITANIA.', -'1915-05-21', -LEFT(RTRIM('http://newspapers.library.wales/view/4121281/4121288/94/'),99), -'German+Submarine');" - -# Opcional. Exibe o query para o caso de ser necessário solucionar problemas. -print(query) - -# Executa o query no artigoDb que conectamos abaixo. -rsInsert <- dbSendQuery(artigosDb, query) - -# Limpa o resultado. -dbClearResult(rsInsert) - -# Desconecta para limpar a conexão com o banco de dados. -dbDisconnect(artigosDb) -``` - -No script acima, realizamos duas etapas para inserir um registro: - -1. Defina a declaração INSERT na linha com: query <- "INSERT INTO tbl_periodicos_resultados_pesquisa ( -2. Execute a declaração INSERT armazenada na variável da consulta com: rsInsert <- dbSendQuery(artigosDb, query) - -Execute o script acima no R Studio e depois execute uma declaração SELECT no MySQL Workbench. Consegue visualizar o novo registro adicionado? - -### Realize uma limpeza nos dados de teste - -Neste ponto é provável que haja mais de um registro com o título de artigo "THE LOST LUSITANIA.", o que é razoável para a testagem, mas não queremos dados duplicados. Iremos remover os dados de teste e começar novamente. Usando a janela de query no MySQL Workbench, execute a declaração SQL: - -``` -TRUNCATE tbl_periodicos_resultados_pesquisa; -``` - -No painel Action Output do MySQL Workbench deverá visualizar: - -``` -TRUNCATE tbl_periodicos_resultados_pesquisa; 0 row(s) affected 0.093 sec -``` - -Para praticar o que acabamos de fazer: - -1. Execute uma declaração SELECT novamente. Não deverá receber linhas de retorno. -2. Execute novamente o script em R acima para inserir um registro. -3. Realize uma declaração SELECT. Deverás visualizar uma linha de dados. - -### Modifique a declaração INSERT para usar variáveis - -Iremos inserir muitos dados na tabela usando o R, então mudaremos a declaração INSERT para usar variáveis. Veja no código abaixo o destaque *# Compila o query.* - -``` -library(RMariaDB) -# O método de conexão abaixo utiliza uma senha armazenada num ficheiro de configuração. - -# O R precisa de um caminho completo para encontrar o ficheiro de configuração. -rmariadb.settingsfile<-"C:/Program Files/MySQL/MySQL Server 8.0/periodicos_resultados_pesquisa.cnf" - -rmariadb.db<-"periodicos_resultados_pesquisa" -artigosDb<-dbConnect(RMariaDB::MariaDB(),default.file=rmariadb.settingsfile,group=rmariadb.db) - -# Opcional. Lista a tabela. Isso confirma que nos conectamos ao banco de dados. -dbListTables(artigosDb) - -# Compila o query. - -# Atribui variáveis. -entradaTitulo <- "THE LOST LUSITANIA." -entradaPublicacao <- "21 05 1916" -# Converte o valor da string para uma data para armazená-la no banco de dados. -entradaDataPublicacao <- as.Date(entradaPublicacao, "%d %M %Y") -entradaUrl <- "http://newspapers.library.wales/view/4121281/4121288/94/" -buscaSimplesTermos <- "German+Submarine" - -# Cria a declaração de query. -query<-paste( - "INSERT INTO tbl_periodicos_resultados_pesquisa ( - titulo_artigo, - data_publicacao_artigo, - url_artigo, - termo_busca_usado) - VALUES('",entradaTitulo,"', - '",entradaDataPublicacao,"', - LEFT(RTRIM('",entradaUrl,"'),99), - '",buscaSimplesTermos,"')", - sep = '' -) - -# Opcional. Exibe o query para o caso de ser necessário solucionar problemas. -print(query) - -# Executa o query no banco de dados artigosDb que conectamos acima. -rsInsert <- dbSendQuery(artigosDb, query) - -# Limpa o resultado. -dbClearResult(rsInsert) - -# Desconecta para limpar a conexão com o banco de dados. -dbDisconnect(artigosDb) -``` - -Vamos testar esse script: - -1. Execute uma declaração SELECT e observe as linhas que possui. -2. Execute o script em R acima para inserir outro registro. -3. Realize a declaração SELECT. Deverá visualizar uma linha adicional de dados. - -### Erros do SQL - -Vamos criar um simples erro no SQL para visualizar o que acontece. - -No R, modifique: - -``` -entradaTitulo <- "THE LOST LUSITANIA." -``` - -para - -``` -entradaTitulo <- "THE LOST LUSITANIA'S RUDDER." -``` - -e execute novamente o script. - -No console R, há um erro: - -``` -> rsInsert <- dbSendQuery(artigosDb, query) -Error in result_create(conn@ptr, statement, is_statement) : - You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'S RUDDER.', - '1916-05-21', - LEFT(RTRIM('http://newspapers.library.wales/view/4' at line 6 [1064] -``` - -É possível verificar, com uma declaração SELECT, se não há registro na tabela com um título de artigo denominado *THE LOST LUSITANIA'S RUDDER*. - -As aspas simples fazem parte da sintaxe do SQL e indicam uma entrada textual. Se estiverem no lugar errado, provocam um erro. Temos que lidar com casos nos quais há dados com aspas. O SQL aceita duas aspas numa declaração de inserção para representar aspas em dados(''). - -Lidaremos com as aspas utilizando uma função `gsub` para substituir aspas simples por aspas duplas, como mostrado abaixo. - -``` -entradaTitulo <- "THE LOST LUSITANIA'S RUDDER." -# altera aspas simples para aspas duplas -entradaTitulo <- gsub("'", "''", entradaTitulo) -``` - -Agora que a questão das aspas no título do artigo está resolvida, execute novamente o script e depois confira com uma declaração SELECT no MySQL Workbench. - -``` -SELECT * FROM periodicos_resultados_pesquisa.tbl_periodicos_resultados_pesquisa WHERE titulo_artigo = "THE LOST LUSITANIA'S RUDDER."; -``` - -Uma vez que o registro teste foi visualizado, digite TRUNCATE tbl_periodicos_resultados_pesquisa para remover esses dados de teste. - -# Armazenando um ficheiro de valores separados por vírgulas (.csv) no banco de dados MySQL - -Na próxima parte da lição, vamos realizar consultas na tabela do banco de dados. Nosso objetivo é obter dados suficientes na tabela para construir um gráfico. Para nos prepararmos para isso, carregaremos alguns dados de amostra de um ficheiro de valores separados por vírgulas (.csv). - -Faça o download dos ficheiros .csv para o seu diretório de trabalho do R. Esses ficheiros estão armazenados no GitHub, então faça o download da versão *Raw* dos ficheiros. - -1. [dados-amostra-jardim.csv](/assets/getting-started-with-mysql-using-r/dados-amostra-jardim.csv) Esta é uma lista de artigos de periódicos galeses publicados durante a Primeira Guerra Mundial que correspondem aos termos de busca "*allotment*"(loteamento) e "*garden*"(jardim). -2. [dados-amostra-submarino.csv](/assets/getting-started-with-mysql-using-r/dados-amostra-submarino.csv) Esta é uma lista de artigos de periódicos galeses publicados durante a Primeira Guerra Mundial que correspondem aos termos de busca "*German*"(alemão) e "*submarine*"(submarino). - -No R, execute a função read.csv() e depois visualize o data frame com os dados amostrais. - -``` -dadosAmostraJardim <- read.csv(file="dados-amostra-jardim.csv", header=TRUE, sep=",") -dadosAmostraJardim -``` - -Muitos dados serão visualizados, incluindo os que se encontram abaixo. Cheque a aba "Environment" (ambiente) na parte direita do RStudio. O Data Frame dadosAmostraJardim deve conter "1242 obs. of 4 variables". - -``` - titulo_artigo -1 -."e;'N'III GARDEN REQUISITES. -<...the result of the data frame results have been removed...> - data_publicacao_artigo url_artigo termo_busca_usado -1 1918-05-11 http://newspapers.library.wales/view/3581057/3581061/27/ AllotmentAndGarden -<...the result of the data frame results have been removed...> -``` - -Observe que nesses dados de amostra, os nomes dos campos estão incluídos no cabeçalho por conveniência: titulo_artigo, data_publicacao_artigo, url_artigo e termo_busca_usado. - -Como observado acima, nosso objetivo aqui é inserir os dados de amostra que estão armazenados no data frame dadosAmostraJardim na tabela MySQL periodicos_resultados_pesquisa. Podemos fazer isso de diferentes maneiras. Uma delas é repetir para cada linha de dado do data frame e executar um comando INSERT, como fizemos acima. Aqui, no entanto, usaremos um comando para inserir todas as linhas em dadosAmostraJardim de uma vez: *dbWriteTable*. Não execute essa declaração ainda, apenas a leia. - -``` -dbWriteTable(artigosDb, value = dadosAmostraJardim, row.names = FALSE, name = "tbl_periodicos_resultados_pesquisa", append = TRUE ) -``` - -| Função | Significado | -| -------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| dbWriteTable(artigosDb, | Use a conexão do banco de dados MySQL artigosDb. | -| value = dadosAmostraJardim, | Insere os valores do data frame dadosAmostraJardim para a tabela. | -| row.names = FALSE, | Nenhum nome de linha foi especificado. | -| name = "tbl_periodicos_resultados_pesquisa", | Insere os valores de dadosAmostraJardim para a tabela tbl_periodicos_resultados_pesquisa | -| append = TRUE ) | Adiciona os valores ao que já existe na tabela. Se esse script rodar novamente, todas as linhas em dadosAmostraJardim serão adicionadas à mesma tabela novamente. | - -Ainda não estamos preparados para executar o comando dbWriteTable(). Primeiro precisamos nos conectar ao banco de dados. Aqui está o script para fazer isso, assim como para carregar o data frame dados-amostra-submarino.csv. Leia-o e execute-o. - -``` -library(RMariaDB) -rmariadb.settingsfile<-"/Program Files/MySQL/MySQL Server 8.0/periodicos_resultados_pesquisa.cnf" - -rmariadb.db<-"periodicos_resultados_pesquisa" -artigosDb<-dbConnect(RMariaDB::MariaDB(),default.file=rmariadb.settingsfile,group=rmariadb.db) - -# A função "setwd" define o directório de trabalho. Deve mudar o caminho desse directório para o directório onde guardou os ficheiros .csv. -setwd("C:/Users/User/Documents") - -# Realiza uma busca nos dados de amostra dos periódicos pelos termos "Allotment" e "Garden" -dadosAmostraJardim <- read.csv(file="dados-amostra-jardim.csv", header=TRUE, sep=",") - -# Uma coluna titulo_artigo na tabela do banco de dados pode armazenar valores até 99 caracteres. -# Esta declaração reduz qualquer título de artigo maior que 99 caracteres. -dadosAmostraJardim$titulo_artigo <- substr(dadosAmostraJardim$titulo_artigo,0,99) - -# Esta declaração formata data_publicacao_artigo para representar o tipo de dado DATETIME. -dadosAmostraJardim$data_publicacao_artigo <- paste(dadosAmostraJardim$data_publicacao_artigo," 00:00:00",sep="") - -dbWriteTable(artigosDb, value = dadosAmostraJardim, row.names = FALSE, name = "tbl_periodicos_resultados_pesquisa", append = TRUE ) - -# Realiza um busca nos dados de amostra dos periódicos pelos termos German+Submarine. -dadosAmostraSubmarino <- read.csv(file="dados-amostra-submarino.csv", header=TRUE, sep=",") - -dadosAmostraSubmarino$titulo_artigo <- substr(dadosAmostraSubmarino$titulo_artigo,0,99) -dadosAmostraSubmarino$data_publicacao_artigo <- paste(dadosAmostraSubmarino$data_publicacao_artigo," 00:00:00",sep="") - -dbWriteTable(artigosDb, value = dadosAmostraSubmarino, row.names = FALSE, name = "tbl_periodicos_resultados_pesquisa", append = TRUE ) - -# Desconecta para limpar a conexão com o banco de dados. -dbDisconnect(artigosDb) -``` - -Se o script for executado mais de uma vez, serão gerados registros duplicados. Se isso acontecer, apenas execute o comando TRUNCATE na tabela e execute o script novamente, mas apenas uma vez. É possível verificar se o número de registros é o correto. No MySQL Workbench, execute o seguinte na janela de Query: - -``` -SELECT COUNT(*) FROM tbl_periodicos_resultados_pesquisa; -``` - -A contagem deve retornar 2880 registros. 1242 de dadosAmostraJardim e 1638 de dadosAmostraSubmarino. - -# Selecionado dados de uma tabela com SQL usando R - -Nosso objetivo aqui é usar a tabela de artigos que importamos e criar um gráfico do número de artigos publicados nos *Welsh Newspapers* (jornais galeses) ao longo de cada mês da Primeira Guerra Mundial que corresponda aos termos de busca *allotment*(loteamento) e *garden* (jardim), e *German* (alemão) e *submarine*(submarino). - -O script abaixo consulta o banco de dados e produz o gráfico de linha abaixo. Leia o script e observe o que está acontecendo. Segue uma explicação do script. - -``` -library(RMariaDB) -rmariadb.settingsfile<-"/Program Files/MySQL/MySQL Server 8.0/periodicos_resultados_pesquisa.cnf" - -rmariadb.db<-"periodicos_resultados_pesquisa" -artigosDb<-dbConnect(RMariaDB::MariaDB(),default.file=rmariadb.settingsfile,group=rmariadb.db) - -termoBuscaUsado = "German+Submarine" -# Solicita uma contagem do número de artigos que correspondem ao termoBuscaUsado que foram publicados a cada mês. -query<-paste("SELECT ( COUNT(CONCAT(MONTH(data_publicacao_artigo), ' ',YEAR(data_publicacao_artigo)))) as 'count' - FROM tbl_periodicos_resultados_pesquisa - WHERE termo_busca_usado ='", termoBuscaUsado,"' - GROUP BY YEAR(data_publicacao_artigo),MONTH(data_publicacao_artigo) - ORDER BY YEAR(data_publicacao_artigo),MONTH(data_publicacao_artigo);",sep="") - -print(query) -rs = dbSendQuery(artigosDb,query) -dbRows<-dbFetch(rs) - -contagemArtigos<-c(as.integer(dbRows$count)) - -# Coloca os resultados da consulta numa série temporal. -qts1 = ts(contagemArtigos, frequency = 12, start = c(1914, 8)) -print(qts1) - -# Plota a série temporal qts1 dos dados com uma linha de espessura 3 na cor vermelha. -plot(qts1, - lwd=3, - col = "red", - xlab="Mês da Guerra", - ylab="Números de artigos de periódicos", - xlim=c(1914,1919), - ylim=c(0,150), - main=paste("Número de artigos nos jornais galeses (Welsh newspapers) que correspondem aos termos de busca listados.",sep=""), - sub="Legenda do termo de busca: Vermelho = German+Submarine. Verde = Allotment And Garden.") - -termoBuscaUsado="AllotmentAndGarden" - -# Solicita uma contagem do número de artigos que correspondem ao termoBuscaUsado que foram publicados a cada mês. -query<-paste("SELECT ( COUNT(CONCAT(MONTH(data_publicacao_artigo),' ',YEAR(data_publicacao_artigo)))) as 'count' FROM tbl_periodicos_resultados_pesquisa WHERE termo_busca_usado='",termoBuscaUsado,"' GROUP BY YEAR(data_publicacao_artigo),MONTH(data_publicacao_artigo) ORDER BY YEAR(data_publicacao_artigo),MONTH(data_publicacao_artigo);",sep="") -print(query) -rs = dbSendQuery(artigosDb,query) -dbRows<-dbFetch(rs) - -contagemArtigos<-c(as.integer(dbRows$count)) - -# Coloca os resultados da consulta numa série temporal. -qts2 = ts(contagemArtigos, frequency = 12, start = c(1914, 8)) - -# Adiciona esta linha com a série temporal qts2 à plotagem existente. -lines(qts2, lwd=3,col="darkgreen") - -# Limpa o resultado. -dbClearResult(rs) - -# Desconecta para limpar a conexão com o banco de dados. -dbDisconnect(artigosDb) -``` - -## Explicação do script de seleção de dados e criação do gráfico. - -O método que conecta o banco de dados é explicado [acima](#Conectando-a-um-banco-de-dados-com-uma-senha). - -Este script seleciona dois resultados de um conjunto de dados e cria um gráfico com esses dados. Um dos resultados é a combinação dos artigos de periódicos com a busca pelos termos "German+Submarine". Eles são consultados através da declaração SELECT: - -``` -SELECT ( - COUNT(CONCAT(MONTH(data_publicacao_artigo),' ',YEAR(data_publicacao_artigo)))) as 'count' - FROM tbl_periodicos_resultados_pesquisa - WHERE termo_busca_usado='",termoBuscaUsado,"' - GROUP BY YEAR(data_publicacao_artigo),MONTH(data_publicacao_artigo) - ORDER BY YEAR(data_publicacao_artigo),MONTH(data_publicacao_artigo); -``` - -| SQL | Significado | -| ----------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| SELECT ( | SELECT - Seleciona os dados que correspondem à condição na cláusula WHERE na tabela do banco de dados nomeado . | -| COUNT(CONCAT(MONTH(data_publicacao_artigo),' ',YEAR(data_publicacao_artigo)))) as 'count' | Fornece uma contagem do número de artigos publicados que compartilham o mesmo mês e ano de publicação. CONCAT representa a ação concatenar, que cria um único valor textual de dois ou mais valores textuais, nesse caso, o mês e o ano. | -| FROM tbl_periodicos_resultados_pesquisa | Este é o banco de dados a partir do qual estamos selecionando os dados. | -| GROUP BY YEAR(data_publicacao_artigo),MONTH(data_publicacao_artigo) | Esta declaração GROUP BY é importante para a contagem (COUNT) acima. Aqui os dados estão agrupados por mês e ano, para que seja possível contar todos os registros no grupo. | -| ORDER BY YEAR(data_publicacao_artigo),MONTH(data_publicacao_artigo); | Coloca os resultados ordenados por data, o que é útil já que queremos construir um gráfico por data. | - -As declarações abaixo executam a consulta e colocam o resultado *rs* num data frame *dbRows*: - -``` -rs = dbSendQuery(artigosDb,query) -dbRows<-dbFetch(rs) -``` - -Abaixo, o data frame *dbRows* é colocado numa série temporal com a função *ts()*, para que seja possível plotar para cada mês, iniciando de agosto de 1914. - -``` -# Coloca os resultados da consulta numa série temporal. -qts1 = ts(contagemArtigos, frequency = 12, start = c(1914, 8)) -``` - -Abaixo, os dados na série temporal *qts1* são plotados num gráfico: - -``` -plot(qts1, - lwd=3, - col = "red", - xlab="Mês da Guerra", - ylab="Números de artigos de periódicos", - xlim=c(1914,1919), - ylim=c(0,150), - main=paste("Número de artigos nos jornais galeses (Welsh newspapers) que correspondem aos termos de busca listados.",sep=""), - sub="Legenda do termo de busca: Vermelho = German+Submarine. Verde = Allotment And Garden.") -``` - -O que isso difere da parte do script que gera o gráfico dos artigos correspondentes à busca dos termos "Allotment And Garden"? Não muito, definitivamente. Apenas usamos a função *lines()* para plotar os resultados no mesmo gráfico que construímos acima. - -``` -lines(qts2, lwd=3,col="darkgreen") -``` - -### Resultados da seleção de dados e da criação do gráfico - -Aqui abaixo está o gráfico que deveria aparecer: - -{% include figure.html filename="introducao-ao-mysql-e-r-16.png" caption="Plotagem do número de artigos de periódicos publicados cada mês que correspondem aos termos de busca" %} - -# Indo mais longe com o MySQL - -Se deseja colocar um banco de dados num website, uma maneira de fazê-lo é usando MySQL e a linguagem PHP para construir as páginas do site. Um exemplo deste tipo de website é o que construí para [buscar edições do "the Equity newspaper"](https://perma.cc/237N-DD9E). O livro de Larry Ullman's, *PHP and MySQL for Dynamic Web Sites*, aborda como configurar e conectar um banco de dados usando MySQL e PHP de uma maneira resistente à hackers. - -Para exemplos do uso de SQL para ordenar e agrupar dados, assim com também realizar cálculos, veja: [MySQL by Examples for Beginners](http://web.archive.org/web/20171228130133/https://www.ntu.edu.sg/home/ehchua/programming/sql/MySQL_Beginner.html) ou MySQL [Examples of Common Queries](https://perma.cc/84HN-9DBL). - -# Conclusão - -Espero que tenha obtido o conhecimento para configurar uma tabela de banco de dados, conectá-lo e armazenar registros. Embora tenhamos abordado apenas uma pequena parte das diferentes maneiras de realizar consultas nos dados, espero também que saiba a técnica de uso das declarações SELECT para que possa utilizá-las em seus futuros projetos de história digital. - -# Créditos - -Finalizei esta lição graças ao suporte do [George Garth Graham Undergraduate Digital History Research Fellowship](https://perma.cc/S7PP-FY5U). - -Agradeço à Drª. Amanda Visconti pelo suporte e orientação ao longo da preparação desta lição. - -# Referências - -Ullman, L. 2005. *PHP and MySQL for Dynamic Web Sites, 2nd ed.* Berkeley, Calif: Peachpit. - -# Notas - -[^1]: Jason A. French, "Using R With MySQL Databases," blog (3 July 2014), [http://www.jason-french.com/blog/2014/07/03/using-r-with-mysql-databases/](https://perma.cc/5VYV-L5PG). - -[^2]: Taylor Arnold and Lauren Tilton, "Basic Text Processing in R," Programming Historian (27 March 2017), [tradução para português](/pt/licoes/processamento-basico-texto-r). - -[^3]: Taryn Dewar, "R Basics with Tabular Data," Programming Historian (05 September 2016), [tradução para português](/pt/licoes/nocoes-basicas-R-dados-tabulares). - -O script em R usado para recolher dados de amostra se encontra [aqui](https://perma.cc/87AE-LJRG). +--- +title: Introdução ao MySQL com R +layout: lesson +slug: introducao-mysql-r +authors: +- Jeff Blackadar +date: 2018-05-03 +translation_date: 2021-12-18 +editors: +- Amanda Visconti +reviewers: +- Jesse Sadler +- Simon Appleford +translator: +- Jéssica Evelyn Santos +translation-editor: +- Daniel Alves +translation-reviewer: +- Dália Guerreiro +- Leonardo F. Nascimento +difficulty: 2 +review-ticket: https://github.com/programminghistorian/ph-submissions/issues/439 +collection: lessons +activity: transforming +topics: [data-manipulation, distant-reading, r, data-visualization] +abstract: "Esta lição ajudará a armazenar grandes quantidades de dados históricos de maneira estruturada, pesquisar e filtrar esses dados e visualizar alguns dos dados como um gráfico." +original: getting-started-with-mysql-using-r +avatar_alt: Uma mão a segurar um jornal +doi: 10.46430/phpt0025 +--- + +Esta lição é direcionada aos que desejam armazenar grandes quantidades de dados de projetos de história digital de uma forma estruturada. Usaremos um sistema de gerenciamento de dados chamado MySQL para armazenar os dados. + +A linguagem R permite realizar análises e armazenar dados sem que um banco de dados relacional seja utilizado. No entanto, há situações nas quais a inclusão de bancos de dados é muito útil, dentre elas: + +- Publicar os resultados de um script em R num *web site* com dados interativos +- Manipular mais dados do que o R pode armazenar em sua própria memória +- Quando os dados já estão armazenados num banco de dados relacional +- Trabalhar com dados de entidades diferentes que são relacionados uns com os outros. Um exemplo seria um banco de dados de soldados de dois exércitos distintos que lutaram numa batalha, onde gostaríamos de saber qual esquadrão, pelotão, companhia e brigada cada soldado fazia parte. + +Uma breve discussão do tema pode ser encontrada no [*blog* de Jason A. French's](hhttps://perma.cc/5VYV-L5PG)[^1]. + +Ao final desta lição, será possível instalar um sistema de gerenciamento de banco de dados em seu computador, criar uma tabela de banco de dados, armazenar informações na tabela e realizar consultas dos dados. Na conclusão da lição, utilizaremos uma consulta do banco de dados para construir um gráfico. + +Usaremos a linguagem de programação R para os exemplos, mas as técnicas podem ser utilizadas com outras linguagens, como Python. + +Para fazer essa lição será necessário um computador com permissão para instalar os programas R e RStudio, entre outros, se já não estiverem instalados. Além da programação em R, também instalaremos alguns componentes de um sistema de gerenciamento de banco de dados chamado MySQL, que funciona nos sistemas operacionais Windows, Mac e Linux. + +Possuir algum conhecimento de instalação de programas e organização de dados em campos é útil para essa lição, cujo nível de dificuldade é mediano. + +{% include toc.html %} + +# Introdução + +O MySQL é um banco de dados relacional usado para armazenar e consultar informações. Esta lição utilizará a linguagem R para fornecer um tutorial e exemplos para: + +- Configurar e realizar uma conexão a uma tabela no MySQL +- Armazenar registros em tabelas +- Consultar informações de tabelas + +Neste tutorial, construiremos um banco de dados de artigos de periódicos que contém palavras de uma busca numa hemeroteca digital. O script armazenará o título, a data publicada e a URL de cada artigo num banco de dados. Utilizaremos outro script para realizar consultas no banco de dados e procurar por padrões historicamente relevantes. Os dados de amostra serão fornecidos pelo arquivo de periódicos [Welsh Newspapers Online](https://perma.cc/9EHD-EVEX). Estamos trabalhando com o objetivo de produzir uma lista de artigos à qual possamos consultar informações. Ao final da lição, vamos executar uma consulta para gerar um gráfico do número de artigos de periódicos no banco de dados, para verificar se há um padrão relevante. + +# Programas necessários + +R, R Studio, MySQL Server e MySQL Workbench são os programas necessários para esta lição. Algumas notas sobre a instalação desses pacotes de programas podem ser encontradas abaixo. + +## R + +Na lição [Processamento Básico de Texto em R](/pt/licoes/processamento-basico-texto-r)[^2], Taylor Arnold e Lauren Tilton fornecem um resumo excelente do conhecimento da linguagem R necessária para esta lição. Apenas um conhecimento básico de R é esperado. A lição [Noções básicas de R com dados tabulares](/pt/licoes/nocoes-basicas-R-dados-tabulares), de Taryn Dewar,[^3] aborda como instalar o R e se familiarizar com a linguagem. + +### Download do R + +Você pode realizar o download do R no [Comprehensive R Archive Network](https://cran.r-project.org/). Clique no link que corresponde ao sistema operacional do seu computador. Selecione *base* para instalar o R pela primeira vez. Uma vez que o ficheiro foi baixado, clique no ficheiro para executar o instalador. + +## RStudio + +Os exemplos desta lição utilizam o RStudio, que é uma interface de desenvolvimento para escrever e executar scripts em R. Esta lição usou a versão 1.4.1717 do RStudio. + +### Download do RStudio + +Faça o download do RStudio através do [rstudio.com](https://www.rstudio.com/products/rstudio/#Desktop) e instale-o. Já que o RStudio é de código aberto, você pode selecionar a versão gratuita do RStudio Desktop, rolar a página para baixo e clicar num dos instaladores que corresponda ao sistema operacional de seu computador. Uma vez que o download foi realizado, clique no ficheiro para executar o instalador. + +## MySQL + +SQL significa *Structured Query Language* (Linguagem estruturada de consulta), que é um conjunto de comandos para armazenar e recuperar informações a partir de um banco de dados relacional. MySQL é um tipo de sistema de gerenciamento de banco de dados relacionais. Há muitos outros, como Microsoft SQL Server, IBM DB2 e Microsoft Access. Esta lição utiliza o MySQL porque é um programa de código aberto, utilizado por uma grande comunidade, tem uma longa trajetória e possui uma versão gratuita que pode ser utilizada. + +### Realizando o download e instalando o MySQL + +Nesta seção, iremos instalar o MySQL, que é o sistema que mantém o banco de dados, assim como o MySQL Workbench, que é onde se trabalha para configurar a estrutura do banco de dados. Para usar o MySQL,o MySQL Workbench não é necessário, podem ser utilizados apenas comandos digitados. Esta lição utiliza o MySQL Workbench porque é uma *GUI* (Interface gráfica do usuário) que facilita o aprendizado de MySQL. + +Conclua essas instruções para instalar o MySQL Community Server e o MySQL Workbench em seu computador. + +### MySQL Community Server + +Este é o servidor onde o banco de dados é armazenado. Sua instalação é necessária para que seja possível conectar e armazenar os dados. Abaixo, faremos o download dos ficheiros, a instalação e iniciaremos o servidor. Esta lição utilizou a versão 8.0.21 do MySQL e 8.0.26 do MySQL Workbench. + +#### Fazendo o download do ficheiro de instalação do MySQL Community Server + +Clique neste link: [https://dev.mysql.com/downloads/mysql/](https://dev.mysql.com/downloads/mysql/). Role a página para baixo e selecione o sistema operacional que corresponde ao seu computador. Se necessário, clique em **Select Operating System** para selecionar o sistema operacional. Uma vez feita essa operação, clique no botão azul **Go to Download Page**. Depois clique no botão azul **Download**. Na página de download, role para baixo e terá a opção de começar o download clicando em **No thanks, just start my download** (Não, obrigado, apenas inicie o download). + +#### Instalação do MySQL Community Server + +Abaixo se encontram as dicas de instalação para PC e Mac: + +##### Dicas de instalação para PC + +A maneira recomendada de instalar os componentes do MySQL é através do instalador do MySQL para Windows. Com o ficheiro já baixado, clique duas vezes no ficheiro para instalá-lo. Siga as instruções para aceitar a licença (nota de tradução: com o instalador MySQL para Windows pode optar por fazer de uma vez só a instalação do MySQL Server e do MySQL Workbench; para isso, escolha os respectivos componentes e siga as instruções abaixo). +Depois que os componentes forem instalados, serão solicitadas as seguintes opções: + +###### 1. Escolhendo um tipo de configuração + +Selecione: **Developer Default** (Padrão do desenvolvedor). Esta opção *instala o MySQL Server e as ferramentas necessárias para o desenvolvimento da aplicação. Isto é útil se pretendes desenvolver aplicações para um servidor existente.* +(Ver abaixo) + +{% include figure.html filename="introducao-ao-mysql-e-r-1.PNG" caption="Configure o tipo de padrão do desenvolvedor" %} + +###### 2. Verificar Requisitos + +Clique no botão **Execute** caso haja requisitos pendentes (*failing requirements*) listados na checagem de requisitos. A lista de requisitos pode ser diferente da mostrada aqui. Uma vez que o processo de executar instalar os requisitos pendentes, clique no botão *Next* . +(Ver abaixo) + +{% include figure.html filename="introducao-ao-mysql-e-r-2.PNG" caption="Clique no botão *Execute* se necessário" %} + +###### 3. Tipo e Rede (1) + +Selecione: **Standalone MySQL Server** +(Ver abaixo) + +{% include figure.html filename="getting-started-with-mysql-7.png" caption="Select Standalone MySQL Server" %} + +###### 4. Tipo e Rede (2) + +*Config type*: Selecione: **Development Computer** +Checar: TCP/IP. Port number (Número da porta): 3306. +(Ver abaixo) + +{% include figure.html filename="introducao-ao-mysql-e-r-4.png" caption="Development Computer TCPIP Port 3306" %} + +###### 5. Contas e Funções + +{% include figure.html filename="introducao-ao-mysql-e-r-5.png" caption="Digite a senha *root* e depois guarde-a em local seguro" %} + +###### 6. Serviço do Windows + +As configurações aqui são opcionais, mas achamos mais fácil configurar o MySQL como um serviço do Windows e inclui-lo na inicialização automática. Um serviço do Windows é um processo que é executado no computador enquanto se está trabalhando. É possível mudar as configurações do serviço do Windows posteriormente, para iniciar o MySQL manualmente, para impedir que o programa inicialize quando não for necessário. + +{% include figure.html filename="introducao-ao-mysql-e-r-6.png" caption="MySQL como um serviço do Windows" %} + +Clique nos botões *Execute* e *Next* para finalizar a instalação e inicializar o servidor. + +###### 7. MySQL Workbench e Senha Root + +Procure por MySQL Workbench no menu de inicialização do Windows, sob o item MySQL. Se está lá, clique para iniciar. Caso não esteja, clique no instalador do MySQL - Community para executar novamente a instalação e adicionar o MySQL Workbench aos componentes instalados. +Depois de aberto o MySQL Workbench, clique na instância local do seu MySQL Server. +Quando a senha *root* for solicitada, digite a senha criada na etapa *5. Accounts and Roles*. +(Ver abaixo) + +{% include figure.html filename="introducao-ao-mysql-e-r-7.png" caption="Senha Root" %} + +##### Dicas de instalação para um Mac + +###### 1. Instalação do MySQL Community Server + +Com o ficheiro de instalação do MySQL Community Server baixado, clique duas vezes no ficheiro para instalá-lo. (Ver abaixo) + +{% include figure.html filename="introducao-ao-mysql-e-r-8.png" caption="Ficheiro de instalação" %} + +###### 2. Guarde a senha temporária + +Siga as instruções para aceitar a licença e o local de instalação. **Importante: Uma senha temporária será solicitada. Guarde-a cuidadosamente.** (Veja o exemplo abaixo. Sua senha temporária será diferente da mostrada abaixo.) Se um erro for cometido, é possível remover o servidor instalado e reinstalá-lo, mas essa é uma pequena complicação. Um dos revisores dessa lição achou que [essa resposta do StackOverflow](https://perma.cc/J4Q5-SLK5) pode auxiliar nesta etapa. + +{% include figure.html filename="getting-started-with-mysql-18.png" caption="Senha temporária" %} + +Concluída a instalação, iremos alterar a senha *root* para o servidor do MySQL. + +###### 3. Modifique a senha do servidor do MySQL + +**Esta seção da lição causou dificuldade para algumas pessoas. Leve o tempo que for necessário e note, por favor, que os comandos do MySQL terminam com um ponto e vírgula. Observe-os em alguns dos comandos abaixo.** + +3.1. Abra uma janela do terminal + +3.2. Adicione /usr/local/mysql/bin ao PATH através do comando abaixo. O PATH é uma lista de diretórios que o computador considera quando um comando é digitado para executar um programa. No próximo passo abaixo, ao executar o *mysql*, o PATH busca pelos diretórios que contém o programa *mysql*. O PATH procura pelo *mysql* no diretório */usr/local/mysql/bin* e o executa. O PATH apenas salva o caminho completo que for digitado, nesse caso, */usr/local/mysql/bin/mysql*, para um programa quando se quer executá-lo. + +``` +export PATH=${PATH}:/usr/local/mysql/bin +``` + +3.3. Inicie o servidor do MySQL. + +Vá até System Preferences > imagem do MySQL > clique em "Start MySQL server". + +3.4. Inicie uma sessão no MySQL. No comando abaixo, depois de *--password*, digite a senha guardada no passo *2. Guarde a senha temporária*. + +``` +mysql --user=root --password=senha_root_guardada_acima +``` + +3.5. Configure a senha *root* para uma **nova** senha. Escolha e guarde a nova senha cuidadosamente. No *prompt* mysql> , digite o seguinte comando, substituindo a nova senha entre aspas simples no comando SET PASSWORD=PASSWORD('password') com a nova senha criada. + +``` +SET PASSWORD=PASSWORD('nova_senha_criada_na_etapa_3.5'); +``` + +3.6. Reinicie o computador. Depois de reiniciar, é possível que seja necessário repetir a etapa *3.3 Inicie o servidor do MySQL* acima. + +###### 4. Download do MySQL Workbench + +Clique nesse link: [https://dev.mysql.com/downloads/workbench/](https://dev.mysql.com/downloads/workbench/). Role a página para baixo e clique em **Select Operating System** para selecionar o sistema operacional que corresponde ao seu computador. Se necessário, clique em **Select OS Version** para selecionar a versão do sistema operacional. Feito isso, clique no botão azul de **Download**. Na página de download, role para baixo e terá a opção de iniciar o download ao clicar em **No thanks, just start my download.** (Não, obrigado, apenas inicie o download.) + +Com o ficheiro baixado, clique duas vezes para instalá-lo. Feita a instalação do MySQL Workbench de acordo com as instruções na tela, arraste o ícone para a pasta de aplicações da esquerda. (Ver abaixo) + +{% include figure.html filename="introducao-ao-mysql-e-r-10.png" caption="MySQL Workbench" %} + +# Crie um banco de dados + +Aqui iremos criar um banco de dados que serve como um contentor para as tabelas nas quais armazenaremos informações. Uma tabela é a estrutura que mantém os dados que queremos armazenar. Tabelas contém muitas linhas de registros. Um exemplo de informações básicas de contato conteria campos para nome, número de telefone e endereço de e-mail. Numa tabela, os campos são organizados por *colunas*. + +Aqui está uma tabela de amostra com uma linha de dados que representa um registro: + +| nome | número de telefone | endereço de e-mail | +| ----------- | ------------------ | ------------------ | +| Pat Abraham | 613-555-1212 | pat@zmail.ca | + +## Abra o MySQL Workbench + +Abra o MySQL Workbench. Clique duas vezes em *Local Instance MySQL80* (num Mac isto pode aparecer como *Local Instance 3306*). É possível que a senha *root* criada nas etapas acima seja solicitada. Em alguns Macs, uma aba de *Query* será aberta; se não for, abra uma aba de *Query* utilizando: *File > New Query Tab*. + +## Crie um banco de dados + +Agora iremos criar um novo banco de dados. Utilizando o MySQL Workbench, realize os seguintes passos: + +1. Na janela de **Query**, digite: + + ``` + CREATE DATABASE periodicos_resultados_pesquisa; + ``` + +2. Execute o comando CREATE DATABASE. Clique no **relâmpago/raio** ou, utilizando o menu, clique em *Query* e então em *Execute Current Statement*. + +3. O novo banco de dados **periodicos_resultados_pesquisa** deve estar visível na aba **SCHEMAS**, no canto superior esquerdo da tela. Se não conseguir visualizar um item chamado periodicos_resultados_pesquisa, clique no botão de atualizar. + +(Ver abaixo:) + +{% include figure.html filename="introducao-ao-mysql-e-r-11.png" caption="Crie um banco de dados no MySQL Workbench" %} + +## USE o banco de dados + +Em seguida, iremos inserir uma declaração USE para informar ao MySQL qual banco de dados será usado. Isto se torna mais importante quando se tem mais de um banco de dados no computador. + +Na janela de **Query**, apague todo o comando CREATE DATABASE e digite: + +``` +USE periodicos_resultados_pesquisa; +``` + +Novamente, clique no **relâmpago/raio** ou, usando o menu, clique em *Query* e então em *Execute Current Statement*. É possível usar a tecla de teclado para isso. Num Mac, use *Command+Return*. Num PC, use *Ctrl+Shift+Enter*. A partir desse ponto da lição, todas as vezes que um comando for digitado na janela de *Query* será executado desta maneira. + +(Ver abaixo:) + +{% include figure.html filename="introducao-ao-mysql-e-r-12.png" caption="USE um banco de dados no MySQL Workbench" %} + +# Adicione uma tabela + +1. No MySQL Workbench, procure no lado esquerdo no painel **Navigator**, na aba **SCHEMAS**, por **periodicos_resultados_pesquisa**. +2. Clique em **Tables** com o lado direito do mouse e depois clique em **Create Table**. +3. Para **Table Name:** digite **tbl_periodicos_resultados_pesquisa** + +## Adicione colunas à tabela + +Adicione essas colunas: + +1. **id** Data type: **INT**. Clique PK (Primary Key), NN (Not Null) e AI (Auto Increment). Esta coluna de *id* será usada para relacionar registros nesta tabela com registros em outras tabelas. +2. **titulo_artigo** Data type: **VARCHAR(99)**. Esta coluna armazenará o título de cada resultado de artigo que coletarmos da busca. +3. **data_publicacao_artigo** Data type: **DATETIME**. Esta coluna armazenará a data em que o periódico foi publicado. +4. **url_artigo** Data type: **VARCHAR(99)**. Esta coluna armazenará a url de cada resultado que coletarmos da pesquisa. +5. **termo_busca_usado** Data type: **VARCHAR(45)**. Esta coluna irá armazenar a palavra que usamos para buscar os periódicos. + Clique no botão **Apply**. + +Se preferir, todas as etapas acima podem ser realizadas com um comando. Este comando pode ser executado na janela de *Query* para criar a tabela com as colunas indicadas acima. + +``` +CREATE TABLE periodicos_resultados_pesquisa.tbl_periodicos_resultados_pesquisa ( +id INT NOT NULL AUTO_INCREMENT, +titulo_artigo VARCHAR(99) NULL, +data_publicacao_artigo DATETIME NULL, +url_artigo VARCHAR(99) NULL, +termo_busca_usado VARCHAR(45) NULL, +PRIMARY KEY (id)); +``` + +*Dica: Leve o tempo que for necessário para pensar sobre a elaboração da tabela e sua nomeação, uma vez que um banco de dados bem elaborado será mais fácil de trabalhar e entender.* + +## Adicione um usuário para se conectar ao banco de dados + +Um usuário é uma conta que tem permissão para se conectar a um banco de dados. Abaixo, adicionaremos um novo usuário para que essa conta apenas se conecte a esse novo banco de dados. Usar essa conta de usuário para uma conexão com esse banco de dados limita a exposição a outros bancos de dados, caso a senha para este usuário seja comprometida. Dar ao usuário os privilégios mínimos requeridos para realizar o necessário reduz o risco, caso outra pessoa tiver acesso à senha de usuário. Por exemplo, se um usuário pode apenas ler um banco de dados, é um risco menor se a senha for descoberta do que um usuário que também pode alterar ou apagar o banco de dados. + +No menu do MySQL Workbench, clique em **Server** e depois em **Users and Privileges** + +**Usuários de Mac** - Em alguns computadores Mac, como meu laptop de teste, o painel de **Schema Privileges** não é exibido corretamente. Veja a nota abaixo da captura de tela se isso ocorrer. + +Clique no botão **Add Account** e complete os detalhes para a nova conta de usuário na caixa de diálogo: + +1. Login Name: **periodicos_pesquisa_usuario** +2. Authentication Type: selecione **Standard** +3. Limit to Hosts Matching: **localhost** +4. Tecle *Enter* e confirme uma senha *AlgoDificil* +5. Clique na aba **Administrative Roles**. Certifique-se de que nada está marcado. Esta conta é apenas para acessar o banco de dados. +6. Clique na aba **Schema Privileges** e clique **Add Entry**. +7. Na caixa de diálogo **New Schema Privilege Definition**, clique na caixa de seleção **Selected schema:** e selecione **periodicos_resultados_pesquisa**. Clique OK. +8. Clique em todas as opções de *Object Rights*: SELECT, INSERT, UPDATE, DELETE, EXECUTE, SHOW VIEW, como mostrado na imagem abaixo. (Este usuário precisará fazer muitas coisas posteriormente na lição, por isso, estamos lhe concendendo várias permissões.) +9. Clique em **Apply**. + +{% include figure.html filename="introducao-ao-mysql-e-r-13.PNG" caption="Configurando permissões para a nova conta" %} + +### Schema Privileges não exibidos corretamente + +Alguns computadores Mac, como meu laptop de teste, não exibem corretamente o painel de **Schema Privileges**. Nesse caso, é possível realizar a tarefa acima através de um *script* usando uma janela de Query. + +Se o usuário já foi criado acima, execute o seguinte comando para lhe conceder privilégios de usuário: + +``` +GRANT SELECT, INSERT, UPDATE, DELETE, EXECUTE, SHOW VIEW ON periodicos_resultados_pesquisa.* TO 'periodicos_pesquisa_usuario'@'localhost'; +``` + +Se o usuário não foi criado ainda, execute estes dois comandos para criar um usuário e depois lhe conceder privilégios de usuário: + +``` +CREATE USER 'periodicos_pesquisa_usuario'@'localhost' IDENTIFIED BY 'AlgoDificil'; +GRANT SELECT, INSERT, UPDATE, DELETE, EXECUTE, SHOW VIEW ON periodicos_resultados_pesquisa.* TO 'periodicos_pesquisa_usuario'@'localhost'; +``` + +### MySQL versão 8 e tipo de autenticação de usuário. + +Quando um usuário é criado no MySQL 8 Workbench o **Authentication Type** (tipo de autenticação) é configurado para o padrão **caching_sha2_password**. Esse tipo de autenticação causa um erro para o pacote R que usaremos para conectar o banco de dados mais tarde nesta lição. O erro é *Authentication plugin 'caching_sha2_password' cannot be loaded* e é descrito no [Stack Overflow](https://perma.cc/7NVR-TSYT). + +Para evitar esse erro, podemos modificar o tipo de autenticação do usuário para padrão (Standard). Para fazer isso, execute o seguinte comando na janela de *Query*: + +``` +ALTER USER 'periodicos_pesquisa_usuario'@'localhost' IDENTIFIED WITH mysql_native_password BY 'AlgoDificil'; +``` + +# Crie um R Script que se conecte ao banco de dados + +Abra o RStudio, que foi instalado anteriormente na lição. Veja a seção [RStudio](#rstudio). + +Agora usaremos o RStudio para escrever um novo R Script e salvá-lo com o nome periodicos_resultados_pesquisa.R. + +Vá em File > New File > R Script e depois salve o novo ficheiro com o nome periodicos_resultados_pesquisa.R. + +Usaremos o pacote RMariaDB para realizar a conexão com o MySQL. (Se tiver curiosidade, a documentação para o pacote RMariaDB pode ser encontrada [aqui](https://perma.cc/FX5P-DAW7).) + +Se não possui o pacote RMariaDB instalado (o que é provável, caso seja a primeira vez que usa o RStudio), instale-o utilizando o _console_ do RStudio. Após abrir o RStudio, copie e cole o seguinte para a janela da esquerda no >, e depois dê enter: + +``` +install.packages("RMariaDB") +``` + +Adicione o seguinte comando ao script periodicos_resultados_pesquisa.R (janela de cima, à esquerda) + +``` +library(RMariaDB) +``` + +## Conectando a um banco de dados com uma senha + +Primeiro, nos conectaremos ao banco de dados usando uma senha. (Depois utilizaremos um meio de conexão melhor). Por hora, usaremos uma variável para armazenar a senha. Cada vez que iniciar o R, será necessário apagar esta variável, mas isso é melhor do que publicar uma senha *hardcoded* caso compartilhe seus scripts, como pode fazer usando o GitHub. + +No console do RStudio, digite o comando abaixo, substituindo *AlgoDificil* com a senha criada para periodicos_pesquisa_usuario nos passos realizados acima para adicionar um usuário ao banco de dados. + +``` +senhadeusuariolocal <- "AlgoDificil" +``` + +Adicione as seguintes declarações em R ao ficheiro periodicos_resultados_pesquisa.R file e salve-o. + +Para executar este script, selecione todo o texto e clique no botão *Run* (Executar). (Há outras maneiras de executar apenas uma parte do script ou o script inteiro. Se tiver curiosidade, procure no menu abaixo de Code > Run Region. O comando CTRL+ALT+R executa todo o código em R no script.) + +``` +library(RMariaDB) +# O método de conexão abaixo utiliza uma senha armazenada numa variável. +# Para utilizar isto, configure senhadeusuariolocal="A senha de periodicos_pesquisa_usuario" + +artigosDb <- dbConnect(RMariaDB::MariaDB(), user='periodicos_pesquisa_usuario', password=senhadeusuariolocal, dbname='periodicos_resultados_pesquisa', host='localhost') +dbListTables(artigosDb) +dbDisconnect(artigosDb) +``` + +No console, deverá visualizar: + +``` +> dbListTables(artigosDb) +[1] "tbl_periodicos_resultados_pesquisa" +> dbDisconnect(artigosDb) +``` + +Sucesso! O que conseguiu: + +1. Conectar ao banco de dados com dbConnect. +2. Listar a tabela no banco de dados com dbListTables. +3. Desconectar do banco de dados usando dbDisconnect. + +### Conectar-se ao banco de dados com uma senha armazenada num ficheiro de configuração + +O exemplo acima de conexão é uma das maneiras de conectar-se. O método de conexão descrito abaixo armazena a informação da conexão do banco de dados num ficheiro de configuração, para que não seja necessário digitar uma senha numa variável todas as vezes que uma sessão no R for iniciada. Acredito que esse é um processo minucioso, mas é uma maneira mais padronizada e segura de proteger as credenciais usadas para acessar seu banco de dados. Esse método de conexão será usado no código para o restante desse tutorial, mas pode ser substituído pelo método de conexão mais simples mostrado acima se preferir. + +#### Crie o ficheiro .cnf para armazenar a informação de conexão com o banco de dados MySQL + +1. Abra um editor de texto, como o notepad, nano ou TextEdit e cole os itens abaixo, modificando a senha para a criada para periodicos_pesquisa_usuario nas etapas acima para adicionar um usuário e conectá-lo ao banco de dados. + +``` +[periodicos_resultados_pesquisa] +user=periodicos_pesquisa_usuario +password=AlgoDificil +host=127.0.0.1 +port=3306 +database=periodicos_resultados_pesquisa +``` + +2. Salve este ficheiro em algum local fora do diretório de trabalho do R. Salvei o meu no mesmo diretório de outros ficheiros de configuração do MySQL. No PC, o caminho foi o seguinte: C:\Program Files\MySQL\MySQL Server 8.0. Dependendo de seu sistema operacional e da versão do MySQL, esse local pode estar em outro lugar. No Mac, usei /Users/blackadar/Documents/ como a pasta de destino. Testei colocar este ficheiro em lugares diferentes, apenas é necessário que o R possa localizá-lo quando o script for executado. Nomeie o ficheiro como **periodicos_resultados_pesquisa.cnf**. + +3. Atualize o script periodicos_resultados_pesquisa.R acima para conectar-se ao banco de dados usando o ficheiro de configuração. + +``` +library(RMariaDB) +# O método de conexão abaixo utiliza uma senha armazenada num ficheiro de configuração. + +# O R precisa de um caminho completo para encontrar o ficheiro de configuração. +rmariadb.settingsfile<-"C:/Program Files/MySQL/MySQL Server 8.0/periodicos_resultados_pesquisa.cnf" + +rmariadb.db<-"periodicos_resultados_pesquisa" +artigosDb<-dbConnect(RMariaDB::MariaDB(),default.file=rmariadb.settingsfile,group=rmariadb.db) + +# Lista a tabela. Isso confirma que estamos conectados ao banco de dados. +dbListTables(artigosDb) + +# Desconecta para limpar a conexão com o banco de dados. +dbDisconnect(artigosDb) +``` + +4. Execute seu script. + +No console, entre outras linhas, deverá ver novamente: + +``` +> dbListTables(artigosDb) +[1] "tbl_periodicos_resultados_pesquisa" +``` + +De maneira bem sucedida, a conexão com o banco de dados foi realizada utilizando um ficheiro de configuração. + +# Armazenando dados numa tabela com o SQL + +Nesta seção da lição, criaremos uma declaração no SQL para inserir uma linha de dados no banco de dados sobre esse [artigo de periódico](https://perma.cc/C8MR-WYV2). Iremos inserir primeiro o registro no MySQL Workbench e depois faremos isso no R. + +1. No MySQL Workbench, clique na imagem categorizada como SQL+ para criar uma nova aba para o SQL executar consultas (ou vá ao menu "File" e escolha a opção "New Query Tab"). + +2. Cole a declaração abaixo na janela de Query. Esta ação irá inserir um registro na tabela. + + ``` + INSERT INTO tbl_periodicos_resultados_pesquisa ( + titulo_artigo, + data_publicacao_artigo, + url_artigo, + termo_busca_usado) + VALUES('THE LOST LUSITANIA.', + '1915-05-21', + LEFT(RTRIM('http://newspapers.library.wales/view/4121281/4121288/94/'),99), + 'German+Submarine'); + ``` + +3. Clique na imagem de relâmpago/raio na aba do SQL para executar a declaração SQL. + +{% include figure.html filename="introducao-ao-mysql-e-r-14.png" caption="Inserindo um registro numa tabela usando MySQL Workbench" %} + +## Explicação da declaração INSERT + +| SQL | Significado | +| --------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| INSERT INTO tbl_periodicos_resultados_pesquisa ( | Insere um registro na tabela nomeada tbl_periodicos_resultados_pesquisa | +| titulo_artigo, | nome do campo a ser preenchido por um valor | +| data_publicacao_artigo, | " | +| url_artigo, | " | +| termo_busca_usado) | " | +| VALUES('THE LOST LUSITANIA.', | O valor a ser inserido no campo titulo_artigo | +| '1915-05-21', | campo data_publicacao_artigo | +| LEFT(RTRIM('https://newspapers.library.wales/view/4121281/4121288/94/'),99), | campo url_artigo. Este campo é um VARCHAR(99), portanto tem um máximo de 99 caracteres. Inserir uma URL mais longa que 99 caracteres causaria um erro, portanto, duas funções são utilizadas para controlar isso. RTRIM() reduz espaços residuais à direita da URL. LEFT(value,99) retorna apenas os 99 caracteres mais à esquerda da URL reduzida. Esta URL é mais curta que isso, então essas funções estão aqui apenas como exemplo. | +| 'German+Submarine'); | campo termo_busca_usado | + +Opcional: Modifique a declaração INSERT acima e execute-a algumas vezes. Por exemplo: + +``` +INSERT INTO tbl_periodicos_resultados_pesquisa ( +titulo_artigo, +data_publicacao_artigo, +url_artigo, +termo_busca_usado) +VALUES('test insert.', +'1916-07-01', +LEFT(RTRIM('http://newspapers.library.wales/view/4121281/4121288/94/'),99), +'German+Submarine'); +``` + +## Consultando dados numa tabela com o SQL + +Nesta seção da lição, criaremos uma declaração no SQL para selecionar uma linha de dados do banco de dados que inserimos. Selecionaremos o primeiro registro no MySQL Workbench e depois faremos isso no R. + +1. Cole a declaração abaixo numa janela de query no MySQL Workbench. Isto irá selecionar registros da tabela. + + ``` + SELECT titulo_artigo FROM tbl_periodicos_resultados_pesquisa; + ``` + +2. Clique na imagem de relâmpago/raio na aba do SQL para executá-la. Deverá visualizar o título do artigo "THE LOST LUSITANIA." na grade de resultados. Ver abaixo. + +{% include figure.html filename="introducao-ao-mysql-e-r-15.png" caption="Selecionando registros de uma tabela usando MySQL Workbench" %} + +Opcional: Modifique a declaração SELECT acima alterando os campos selecionados e execute novamente. Adicione mais de um campo para a declaração SELECT e execute: + +``` +SELECT titulo_artigo, data_publicacao_artigo FROM tbl_periodicos_resultados_pesquisa; +``` + +## Armazenando dados numa tabela com SQL usando R + +Vamos fazer isso usando R! Abaixo se encontra uma versão expandida do R Script que usamos para nos conectar ao banco de dados. Para sermos concisos, os três primeiros comentários que tínhamos no R Script mostrado acima foram removidos. Não são mais necessários. + +Na linha 4 do script abaixo, lembre-se de modificar o caminho do rmariadb.settingsfile que corresponde à localização desse ficheiro em seu computador. + +``` +library(RMariaDB) +# O método de conexão abaixo utiliza uma senha armazenada num ficheiro de configuração. + +# O R precisa de um caminho completo para encontrar o ficheiro de configuração. +rmariadb.settingsfile<-"C:/Program Files/MySQL/MySQL Server 8.0/periodicos_resultados_pesquisa.cnf" + +rmariadb.db<-"periodicos_resultados_pesquisa" +artigosDb<-dbConnect(RMariaDB::MariaDB(),default.file=rmariadb.settingsfile,group=rmariadb.db) + +# Opcional. Liste a tabela. Isso confirma que nos conectamos ao banco de dados. +dbListTables(artigosDb) + +# Cria a declaração de query. +query<-"INSERT INTO tbl_periodicos_resultados_pesquisa ( +titulo_artigo, +data_publicacao_artigo, +url_artigo, +termo_busca_usado) +VALUES('THE LOST LUSITANIA.', +'1915-05-21', +LEFT(RTRIM('http://newspapers.library.wales/view/4121281/4121288/94/'),99), +'German+Submarine');" + +# Opcional. Exibe o query para o caso de ser necessário solucionar problemas. +print(query) + +# Executa o query no artigoDb que conectamos abaixo. +rsInsert <- dbSendQuery(artigosDb, query) + +# Limpa o resultado. +dbClearResult(rsInsert) + +# Desconecta para limpar a conexão com o banco de dados. +dbDisconnect(artigosDb) +``` + +No script acima, realizamos duas etapas para inserir um registro: + +1. Defina a declaração INSERT na linha com: query <- "INSERT INTO tbl_periodicos_resultados_pesquisa ( +2. Execute a declaração INSERT armazenada na variável da consulta com: rsInsert <- dbSendQuery(artigosDb, query) + +Execute o script acima no R Studio e depois execute uma declaração SELECT no MySQL Workbench. Consegue visualizar o novo registro adicionado? + +### Realize uma limpeza nos dados de teste + +Neste ponto é provável que haja mais de um registro com o título de artigo "THE LOST LUSITANIA.", o que é razoável para a testagem, mas não queremos dados duplicados. Iremos remover os dados de teste e começar novamente. Usando a janela de query no MySQL Workbench, execute a declaração SQL: + +``` +TRUNCATE tbl_periodicos_resultados_pesquisa; +``` + +No painel Action Output do MySQL Workbench deverá visualizar: + +``` +TRUNCATE tbl_periodicos_resultados_pesquisa; 0 row(s) affected 0.093 sec +``` + +Para praticar o que acabamos de fazer: + +1. Execute uma declaração SELECT novamente. Não deverá receber linhas de retorno. +2. Execute novamente o script em R acima para inserir um registro. +3. Realize uma declaração SELECT. Deverás visualizar uma linha de dados. + +### Modifique a declaração INSERT para usar variáveis + +Iremos inserir muitos dados na tabela usando o R, então mudaremos a declaração INSERT para usar variáveis. Veja no código abaixo o destaque *# Compila o query.* + +``` +library(RMariaDB) +# O método de conexão abaixo utiliza uma senha armazenada num ficheiro de configuração. + +# O R precisa de um caminho completo para encontrar o ficheiro de configuração. +rmariadb.settingsfile<-"C:/Program Files/MySQL/MySQL Server 8.0/periodicos_resultados_pesquisa.cnf" + +rmariadb.db<-"periodicos_resultados_pesquisa" +artigosDb<-dbConnect(RMariaDB::MariaDB(),default.file=rmariadb.settingsfile,group=rmariadb.db) + +# Opcional. Lista a tabela. Isso confirma que nos conectamos ao banco de dados. +dbListTables(artigosDb) + +# Compila o query. + +# Atribui variáveis. +entradaTitulo <- "THE LOST LUSITANIA." +entradaPublicacao <- "21 05 1916" +# Converte o valor da string para uma data para armazená-la no banco de dados. +entradaDataPublicacao <- as.Date(entradaPublicacao, "%d %M %Y") +entradaUrl <- "http://newspapers.library.wales/view/4121281/4121288/94/" +buscaSimplesTermos <- "German+Submarine" + +# Cria a declaração de query. +query<-paste( + "INSERT INTO tbl_periodicos_resultados_pesquisa ( + titulo_artigo, + data_publicacao_artigo, + url_artigo, + termo_busca_usado) + VALUES('",entradaTitulo,"', + '",entradaDataPublicacao,"', + LEFT(RTRIM('",entradaUrl,"'),99), + '",buscaSimplesTermos,"')", + sep = '' +) + +# Opcional. Exibe o query para o caso de ser necessário solucionar problemas. +print(query) + +# Executa o query no banco de dados artigosDb que conectamos acima. +rsInsert <- dbSendQuery(artigosDb, query) + +# Limpa o resultado. +dbClearResult(rsInsert) + +# Desconecta para limpar a conexão com o banco de dados. +dbDisconnect(artigosDb) +``` + +Vamos testar esse script: + +1. Execute uma declaração SELECT e observe as linhas que possui. +2. Execute o script em R acima para inserir outro registro. +3. Realize a declaração SELECT. Deverá visualizar uma linha adicional de dados. + +### Erros do SQL + +Vamos criar um simples erro no SQL para visualizar o que acontece. + +No R, modifique: + +``` +entradaTitulo <- "THE LOST LUSITANIA." +``` + +para + +``` +entradaTitulo <- "THE LOST LUSITANIA'S RUDDER." +``` + +e execute novamente o script. + +No console R, há um erro: + +``` +> rsInsert <- dbSendQuery(artigosDb, query) +Error in result_create(conn@ptr, statement, is_statement) : + You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'S RUDDER.', + '1916-05-21', + LEFT(RTRIM('http://newspapers.library.wales/view/4' at line 6 [1064] +``` + +É possível verificar, com uma declaração SELECT, se não há registro na tabela com um título de artigo denominado *THE LOST LUSITANIA'S RUDDER*. + +As aspas simples fazem parte da sintaxe do SQL e indicam uma entrada textual. Se estiverem no lugar errado, provocam um erro. Temos que lidar com casos nos quais há dados com aspas. O SQL aceita duas aspas numa declaração de inserção para representar aspas em dados(''). + +Lidaremos com as aspas utilizando uma função `gsub` para substituir aspas simples por aspas duplas, como mostrado abaixo. + +``` +entradaTitulo <- "THE LOST LUSITANIA'S RUDDER." +# altera aspas simples para aspas duplas +entradaTitulo <- gsub("'", "''", entradaTitulo) +``` + +Agora que a questão das aspas no título do artigo está resolvida, execute novamente o script e depois confira com uma declaração SELECT no MySQL Workbench. + +``` +SELECT * FROM periodicos_resultados_pesquisa.tbl_periodicos_resultados_pesquisa WHERE titulo_artigo = "THE LOST LUSITANIA'S RUDDER."; +``` + +Uma vez que o registro teste foi visualizado, digite TRUNCATE tbl_periodicos_resultados_pesquisa para remover esses dados de teste. + +# Armazenando um ficheiro de valores separados por vírgulas (.csv) no banco de dados MySQL + +Na próxima parte da lição, vamos realizar consultas na tabela do banco de dados. Nosso objetivo é obter dados suficientes na tabela para construir um gráfico. Para nos prepararmos para isso, carregaremos alguns dados de amostra de um ficheiro de valores separados por vírgulas (.csv). + +Faça o download dos ficheiros .csv para o seu diretório de trabalho do R. Esses ficheiros estão armazenados no GitHub, então faça o download da versão *Raw* dos ficheiros. + +1. [dados-amostra-jardim.csv](/assets/getting-started-with-mysql-using-r/dados-amostra-jardim.csv) Esta é uma lista de artigos de periódicos galeses publicados durante a Primeira Guerra Mundial que correspondem aos termos de busca "*allotment*"(loteamento) e "*garden*"(jardim). +2. [dados-amostra-submarino.csv](/assets/getting-started-with-mysql-using-r/dados-amostra-submarino.csv) Esta é uma lista de artigos de periódicos galeses publicados durante a Primeira Guerra Mundial que correspondem aos termos de busca "*German*"(alemão) e "*submarine*"(submarino). + +No R, execute a função read.csv() e depois visualize o data frame com os dados amostrais. + +``` +dadosAmostraJardim <- read.csv(file="dados-amostra-jardim.csv", header=TRUE, sep=",") +dadosAmostraJardim +``` + +Muitos dados serão visualizados, incluindo os que se encontram abaixo. Cheque a aba "Environment" (ambiente) na parte direita do RStudio. O Data Frame dadosAmostraJardim deve conter "1242 obs. of 4 variables". + +``` + titulo_artigo +1 -."e;'N'III GARDEN REQUISITES. +<...the result of the data frame results have been removed...> + data_publicacao_artigo url_artigo termo_busca_usado +1 1918-05-11 http://newspapers.library.wales/view/3581057/3581061/27/ AllotmentAndGarden +<...the result of the data frame results have been removed...> +``` + +Observe que nesses dados de amostra, os nomes dos campos estão incluídos no cabeçalho por conveniência: titulo_artigo, data_publicacao_artigo, url_artigo e termo_busca_usado. + +Como observado acima, nosso objetivo aqui é inserir os dados de amostra que estão armazenados no data frame dadosAmostraJardim na tabela MySQL periodicos_resultados_pesquisa. Podemos fazer isso de diferentes maneiras. Uma delas é repetir para cada linha de dado do data frame e executar um comando INSERT, como fizemos acima. Aqui, no entanto, usaremos um comando para inserir todas as linhas em dadosAmostraJardim de uma vez: *dbWriteTable*. Não execute essa declaração ainda, apenas a leia. + +``` +dbWriteTable(artigosDb, value = dadosAmostraJardim, row.names = FALSE, name = "tbl_periodicos_resultados_pesquisa", append = TRUE ) +``` + +| Função | Significado | +| -------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| dbWriteTable(artigosDb, | Use a conexão do banco de dados MySQL artigosDb. | +| value = dadosAmostraJardim, | Insere os valores do data frame dadosAmostraJardim para a tabela. | +| row.names = FALSE, | Nenhum nome de linha foi especificado. | +| name = "tbl_periodicos_resultados_pesquisa", | Insere os valores de dadosAmostraJardim para a tabela tbl_periodicos_resultados_pesquisa | +| append = TRUE ) | Adiciona os valores ao que já existe na tabela. Se esse script rodar novamente, todas as linhas em dadosAmostraJardim serão adicionadas à mesma tabela novamente. | + +Ainda não estamos preparados para executar o comando dbWriteTable(). Primeiro precisamos nos conectar ao banco de dados. Aqui está o script para fazer isso, assim como para carregar o data frame dados-amostra-submarino.csv. Leia-o e execute-o. + +``` +library(RMariaDB) +rmariadb.settingsfile<-"/Program Files/MySQL/MySQL Server 8.0/periodicos_resultados_pesquisa.cnf" + +rmariadb.db<-"periodicos_resultados_pesquisa" +artigosDb<-dbConnect(RMariaDB::MariaDB(),default.file=rmariadb.settingsfile,group=rmariadb.db) + +# A função "setwd" define o directório de trabalho. Deve mudar o caminho desse directório para o directório onde guardou os ficheiros .csv. +setwd("C:/Users/User/Documents") + +# Realiza uma busca nos dados de amostra dos periódicos pelos termos "Allotment" e "Garden" +dadosAmostraJardim <- read.csv(file="dados-amostra-jardim.csv", header=TRUE, sep=",") + +# Uma coluna titulo_artigo na tabela do banco de dados pode armazenar valores até 99 caracteres. +# Esta declaração reduz qualquer título de artigo maior que 99 caracteres. +dadosAmostraJardim$titulo_artigo <- substr(dadosAmostraJardim$titulo_artigo,0,99) + +# Esta declaração formata data_publicacao_artigo para representar o tipo de dado DATETIME. +dadosAmostraJardim$data_publicacao_artigo <- paste(dadosAmostraJardim$data_publicacao_artigo," 00:00:00",sep="") + +dbWriteTable(artigosDb, value = dadosAmostraJardim, row.names = FALSE, name = "tbl_periodicos_resultados_pesquisa", append = TRUE ) + +# Realiza um busca nos dados de amostra dos periódicos pelos termos German+Submarine. +dadosAmostraSubmarino <- read.csv(file="dados-amostra-submarino.csv", header=TRUE, sep=",") + +dadosAmostraSubmarino$titulo_artigo <- substr(dadosAmostraSubmarino$titulo_artigo,0,99) +dadosAmostraSubmarino$data_publicacao_artigo <- paste(dadosAmostraSubmarino$data_publicacao_artigo," 00:00:00",sep="") + +dbWriteTable(artigosDb, value = dadosAmostraSubmarino, row.names = FALSE, name = "tbl_periodicos_resultados_pesquisa", append = TRUE ) + +# Desconecta para limpar a conexão com o banco de dados. +dbDisconnect(artigosDb) +``` + +Se o script for executado mais de uma vez, serão gerados registros duplicados. Se isso acontecer, apenas execute o comando TRUNCATE na tabela e execute o script novamente, mas apenas uma vez. É possível verificar se o número de registros é o correto. No MySQL Workbench, execute o seguinte na janela de Query: + +``` +SELECT COUNT(*) FROM tbl_periodicos_resultados_pesquisa; +``` + +A contagem deve retornar 2880 registros. 1242 de dadosAmostraJardim e 1638 de dadosAmostraSubmarino. + +# Selecionado dados de uma tabela com SQL usando R + +Nosso objetivo aqui é usar a tabela de artigos que importamos e criar um gráfico do número de artigos publicados nos *Welsh Newspapers* (jornais galeses) ao longo de cada mês da Primeira Guerra Mundial que corresponda aos termos de busca *allotment*(loteamento) e *garden* (jardim), e *German* (alemão) e *submarine*(submarino). + +O script abaixo consulta o banco de dados e produz o gráfico de linha abaixo. Leia o script e observe o que está acontecendo. Segue uma explicação do script. + +``` +library(RMariaDB) +rmariadb.settingsfile<-"/Program Files/MySQL/MySQL Server 8.0/periodicos_resultados_pesquisa.cnf" + +rmariadb.db<-"periodicos_resultados_pesquisa" +artigosDb<-dbConnect(RMariaDB::MariaDB(),default.file=rmariadb.settingsfile,group=rmariadb.db) + +termoBuscaUsado = "German+Submarine" +# Solicita uma contagem do número de artigos que correspondem ao termoBuscaUsado que foram publicados a cada mês. +query<-paste("SELECT ( COUNT(CONCAT(MONTH(data_publicacao_artigo), ' ',YEAR(data_publicacao_artigo)))) as 'count' + FROM tbl_periodicos_resultados_pesquisa + WHERE termo_busca_usado ='", termoBuscaUsado,"' + GROUP BY YEAR(data_publicacao_artigo),MONTH(data_publicacao_artigo) + ORDER BY YEAR(data_publicacao_artigo),MONTH(data_publicacao_artigo);",sep="") + +print(query) +rs = dbSendQuery(artigosDb,query) +dbRows<-dbFetch(rs) + +contagemArtigos<-c(as.integer(dbRows$count)) + +# Coloca os resultados da consulta numa série temporal. +qts1 = ts(contagemArtigos, frequency = 12, start = c(1914, 8)) +print(qts1) + +# Plota a série temporal qts1 dos dados com uma linha de espessura 3 na cor vermelha. +plot(qts1, + lwd=3, + col = "red", + xlab="Mês da Guerra", + ylab="Números de artigos de periódicos", + xlim=c(1914,1919), + ylim=c(0,150), + main=paste("Número de artigos nos jornais galeses (Welsh newspapers) que correspondem aos termos de busca listados.",sep=""), + sub="Legenda do termo de busca: Vermelho = German+Submarine. Verde = Allotment And Garden.") + +termoBuscaUsado="AllotmentAndGarden" + +# Solicita uma contagem do número de artigos que correspondem ao termoBuscaUsado que foram publicados a cada mês. +query<-paste("SELECT ( COUNT(CONCAT(MONTH(data_publicacao_artigo),' ',YEAR(data_publicacao_artigo)))) as 'count' FROM tbl_periodicos_resultados_pesquisa WHERE termo_busca_usado='",termoBuscaUsado,"' GROUP BY YEAR(data_publicacao_artigo),MONTH(data_publicacao_artigo) ORDER BY YEAR(data_publicacao_artigo),MONTH(data_publicacao_artigo);",sep="") +print(query) +rs = dbSendQuery(artigosDb,query) +dbRows<-dbFetch(rs) + +contagemArtigos<-c(as.integer(dbRows$count)) + +# Coloca os resultados da consulta numa série temporal. +qts2 = ts(contagemArtigos, frequency = 12, start = c(1914, 8)) + +# Adiciona esta linha com a série temporal qts2 à plotagem existente. +lines(qts2, lwd=3,col="darkgreen") + +# Limpa o resultado. +dbClearResult(rs) + +# Desconecta para limpar a conexão com o banco de dados. +dbDisconnect(artigosDb) +``` + +## Explicação do script de seleção de dados e criação do gráfico. + +O método que conecta o banco de dados é explicado [acima](#Conectando-a-um-banco-de-dados-com-uma-senha). + +Este script seleciona dois resultados de um conjunto de dados e cria um gráfico com esses dados. Um dos resultados é a combinação dos artigos de periódicos com a busca pelos termos "German+Submarine". Eles são consultados através da declaração SELECT: + +``` +SELECT ( + COUNT(CONCAT(MONTH(data_publicacao_artigo),' ',YEAR(data_publicacao_artigo)))) as 'count' + FROM tbl_periodicos_resultados_pesquisa + WHERE termo_busca_usado='",termoBuscaUsado,"' + GROUP BY YEAR(data_publicacao_artigo),MONTH(data_publicacao_artigo) + ORDER BY YEAR(data_publicacao_artigo),MONTH(data_publicacao_artigo); +``` + +| SQL | Significado | +| ----------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| SELECT ( | SELECT - Seleciona os dados que correspondem à condição na cláusula WHERE na tabela do banco de dados nomeado . | +| COUNT(CONCAT(MONTH(data_publicacao_artigo),' ',YEAR(data_publicacao_artigo)))) as 'count' | Fornece uma contagem do número de artigos publicados que compartilham o mesmo mês e ano de publicação. CONCAT representa a ação concatenar, que cria um único valor textual de dois ou mais valores textuais, nesse caso, o mês e o ano. | +| FROM tbl_periodicos_resultados_pesquisa | Este é o banco de dados a partir do qual estamos selecionando os dados. | +| GROUP BY YEAR(data_publicacao_artigo),MONTH(data_publicacao_artigo) | Esta declaração GROUP BY é importante para a contagem (COUNT) acima. Aqui os dados estão agrupados por mês e ano, para que seja possível contar todos os registros no grupo. | +| ORDER BY YEAR(data_publicacao_artigo),MONTH(data_publicacao_artigo); | Coloca os resultados ordenados por data, o que é útil já que queremos construir um gráfico por data. | + +As declarações abaixo executam a consulta e colocam o resultado *rs* num data frame *dbRows*: + +``` +rs = dbSendQuery(artigosDb,query) +dbRows<-dbFetch(rs) +``` + +Abaixo, o data frame *dbRows* é colocado numa série temporal com a função *ts()*, para que seja possível plotar para cada mês, iniciando de agosto de 1914. + +``` +# Coloca os resultados da consulta numa série temporal. +qts1 = ts(contagemArtigos, frequency = 12, start = c(1914, 8)) +``` + +Abaixo, os dados na série temporal *qts1* são plotados num gráfico: + +``` +plot(qts1, + lwd=3, + col = "red", + xlab="Mês da Guerra", + ylab="Números de artigos de periódicos", + xlim=c(1914,1919), + ylim=c(0,150), + main=paste("Número de artigos nos jornais galeses (Welsh newspapers) que correspondem aos termos de busca listados.",sep=""), + sub="Legenda do termo de busca: Vermelho = German+Submarine. Verde = Allotment And Garden.") +``` + +O que isso difere da parte do script que gera o gráfico dos artigos correspondentes à busca dos termos "Allotment And Garden"? Não muito, definitivamente. Apenas usamos a função *lines()* para plotar os resultados no mesmo gráfico que construímos acima. + +``` +lines(qts2, lwd=3,col="darkgreen") +``` + +### Resultados da seleção de dados e da criação do gráfico + +Aqui abaixo está o gráfico que deveria aparecer: + +{% include figure.html filename="introducao-ao-mysql-e-r-16.png" caption="Plotagem do número de artigos de periódicos publicados cada mês que correspondem aos termos de busca" %} + +# Indo mais longe com o MySQL + +Se deseja colocar um banco de dados num website, uma maneira de fazê-lo é usando MySQL e a linguagem PHP para construir as páginas do site. Um exemplo deste tipo de website é o que construí para [buscar edições do "the Equity newspaper"](https://perma.cc/237N-DD9E). O livro de Larry Ullman's, *PHP and MySQL for Dynamic Web Sites*, aborda como configurar e conectar um banco de dados usando MySQL e PHP de uma maneira resistente à hackers. + +Para exemplos do uso de SQL para ordenar e agrupar dados, assim com também realizar cálculos, veja: [MySQL by Examples for Beginners](https://web.archive.org/web/20171228130133/https://www.ntu.edu.sg/home/ehchua/programming/sql/MySQL_Beginner.html) ou MySQL [Examples of Common Queries](https://perma.cc/84HN-9DBL). + +# Conclusão + +Espero que tenha obtido o conhecimento para configurar uma tabela de banco de dados, conectá-lo e armazenar registros. Embora tenhamos abordado apenas uma pequena parte das diferentes maneiras de realizar consultas nos dados, espero também que saiba a técnica de uso das declarações SELECT para que possa utilizá-las em seus futuros projetos de história digital. + +# Créditos + +Finalizei esta lição graças ao suporte do [George Garth Graham Undergraduate Digital History Research Fellowship](https://perma.cc/S7PP-FY5U). + +Agradeço à Drª. Amanda Visconti pelo suporte e orientação ao longo da preparação desta lição. + +# Referências + +Ullman, L. 2005. *PHP and MySQL for Dynamic Web Sites, 2nd ed.* Berkeley, Calif: Peachpit. + +# Notas + +[^1]: Jason A. French, "Using R With MySQL Databases," blog (3 July 2014), [https://www.jason-french.com/blog/2014/07/03/using-r-with-mysql-databases/](https://perma.cc/5VYV-L5PG). + +[^2]: Taylor Arnold and Lauren Tilton, "Basic Text Processing in R," Programming Historian (27 March 2017), [tradução para português](/pt/licoes/processamento-basico-texto-r). + +[^3]: Taryn Dewar, "R Basics with Tabular Data," Programming Historian (05 September 2016), [tradução para português](/pt/licoes/nocoes-basicas-R-dados-tabulares). + +O script em R usado para recolher dados de amostra se encontra [aqui](https://perma.cc/87AE-LJRG). diff --git a/pt/licoes/introducao-omeka-net.md b/pt/licoes/introducao-omeka-net.md index e4680bbeba..14c15ff2b7 100644 --- a/pt/licoes/introducao-omeka-net.md +++ b/pt/licoes/introducao-omeka-net.md @@ -1,188 +1,188 @@ ---- -title: Introdução ao Omeka.net -slug: introducao-omeka-net -layout: lesson -date: 2016-02-17 -translation_date: 2021-06-07 -authors: -- Miriam Posner -editors: -- Adam Crymble -translator: -- Gabriela Kucuruza -translation-editor: -- Daniel Alves -translation-reviewer: -- Ângela Pité -- Rômulo Predes -difficulty: 1 -exclude_from_check: - - reviewers -review-ticket: https://github.com/programminghistorian/ph-submissions/issues/379 -activity: presenting -topics: [website] -abstract: "Com o Omeka.net é fácil criar sites na web para mostrar coleções de itens." -original: up-and-running-with-omeka -avatar_alt: Esqueleto de dinossauro num museu -doi: 10.46430/phpt0011 ---- - -{% include toc.html %} - - - - - -O [Omeka.net](http://www.omeka.net) facilita a criação de websites para mostrar coleções de itens. - -## Cadastre-se numa conta do Omeka - -{% include figure.html filename="intro-omeka-net-1.png" caption="Cadastre-se na conta de teste" %} - -Entre em www.omeka.net e clique em **Sign Up** (Cadastre-se). Escolha o Plano de Teste. Preencha o formulário de cadastro. Verifique o seu e-mail pelo link de ativação da conta. - -## Crie um novo site do Omeka - -{% include figure.html filename="intro-omeka-net-2.png" caption="Página da conta do Omeka.net" %} - -Depois de clicar no link no seu e-mail, clique em **Add a Site** (Adicionar um site). - -Preencha a informação sobre o URL do seu site, o título que quer usar e a descrição que preferir. Clique em **Add Your New Site** (Adicione o seu novo site). - -## Você tem um novo site do Omeka! - - -{% include figure.html filename="intro-omeka-net-3.png" caption="Veja o seu site" %} - -Para ver o seu site, clique em **View Site** (Ver Site). - -## Um site vazio no Omeka - -{% include figure.html filename="intro-omeka-net-4.png" caption="Vista pública do site" %} - -Esse é o seu site vazio do Omeka esperando para ser preenchido. Para retornar ao painel de controle (*dashboard*) clique no botão **Back** (Retornar) ou escreva `http://www.omeka.net/dashboard`. Agora, clique em **Manage Site** (Administre o site). - -## Instale alguns plugins - -{% include figure.html filename="intro-omeka-net-5.png" caption="Página dos Plugins" %} - -O seu site do Omeka vem com plugins que oferecem funções adicionais. Precisamos ativá-los. Para fazer isso, clique no item **Plugins** no menu, no canto superior direito. Na página seguinte, clique no botão **Install** (Instalar) em **Exhibit Builder** (construtor de exposições) (deixe as opções como estão na página seguinte) e em **Simple Pages** (Páginas simples). - -## Configurar o seu site para português (nota da tradução) - -A configuração padrão do Omeka é em inglês. Porém, podemos mudar a língua do seu site para português (pt-BR e pt-PT) através de um Plugin. Para realizar essa configuração, siga os passos a seguir: - -1. Clique em **Manage Site** (Administrar Site) no Menu Principal. -2. Clique em Plugins no menu superior ou acesse os Plugins através do link `https://nome_do_seu_site.omeka.net/admin/plugins`, sendo `nome_do_seu_site` o nome escolhido para o seu site. - -3. Encontre o Plugin **Locale** e clique no botão **Install** (Instalar). Ao clicar, a sua tela ficará parecida com a imagem abaixo. - -4. Ao clicar em instalar, aparecerá uma página com as opções de tradução. Escolha **Português - Brasil (pt_BR)** ou **Português - Portugal (pt_PT)**. - -5. Clique em **Save Changes** (Salvar Mudanças) - -{% include figure.html filename="intro-omeka-net-6.png" caption="A sua tela ficará parecida com a imagem acima. Nela, o Plugin Locale está indicado." %} - -Agora, o seu site e o painel de controle estarão em português. - -## Trocar temas - -{% include figure.html filename="intro-omeka-net-7.png" caption="Página de Configuração dos Temas" %} - -O Omeka permite que a aparência do site público seja alterada por meio dos temas. Para fazer isso, clique em **Aparência** (Appearence, à direita do canto superior do seu painel de controle). Mude os temas selecionando uma das opções disponíveis na página. Clique o botão verde **Utilizar este tema** (Use this theme) para atualizar o seu novo tema. Visite, então, o seu site público clicando no nome do seu site, no canto superior esquerdo da página. - -## Temos um novo tema! - -{% include figure.html filename="intro-omeka-net-8.png" caption="Vista pública com o novo tema" %} - -Confira o seu novo tema e volte para o seu painel de controle. É possível retornar para o seu antigo tema, continuar com esse ou selecionar uma das outras opções. - - -## Adicione um item - -{% include figure.html filename="intro-omeka-net-9.png" caption="Adicione um item" %} - -Clique em **Itens** no lado esquerdo do menu e depois (naturalmente!) **Adicione um item** (Add an item). - -## Descreva o seu novo item - -{% include figure.html filename="intro-omeka-net-10.png" caption="Torne o seu item público usando a caixa de seleção assinalada" %} - -Lembre, **Dublin Core** refere-se às informações descritivas (metadados) que você insere sobre um item. Todas essas informações são opcionais e não há como inseri-las incorretamente. Tente, porém, ser consistente. - -Não se esqueça de clicar na caixa de seleção **Público** (Public) para que o seu item fique visível para o público em geral. Se você não clicar nessa caixa, apenas pessoas cadastradas no seu site poderão ver o item. - -Para adicionar múltiplos campos - por exemplo, se você quiser adicionar vários assuntos ao seu item - use o botão verde **Adicionar informação** (Add input) à esquerda das caixas de texto. - -## Uma questão complexa - -{% include figure.html filename="intro-omeka-net-11.png" caption="O que é isto?" %} - -Eu estou a criar um registo de item para o meu cachorro, Boris. Mas eu estou a descrever o Boris _ele mesmo_ ou uma _fotografia_ do Boris? No caso da primeira opção, o **Criador** seria... bem, suponho que isso dependa das suas crenças religiosas. Se é o segundo caso, o criador seria Brad Wallace, quem tirou a foto. - -A decisão sobre descrever um objeto ou a representação de um objeto é sua. Uma vez que tenha decidido, seja consistente. - -## Anexe um ficheiro ao registo do seu item - -{% include figure.html filename="intro-omeka-net-12.png" caption="Adicionando ficheiros a um item" %} - -Uma vez que terminamos de adicionar os metadados do Dublin Core, podemos anexar um ficheiro ao registo do seu item clicando em **Arquivos** (Ficheiros em PT_PT / Files), no topo do formulário de Dublin Core. (Não é necessário clicar em **Adicionar Item** antes de fazer isso; o Omeka irá salvar automaticamente essa informação). Podemos adicionar múltiplos ficheiros, mas saiba que o plano Básico apenas vem com 500 MB de espaço de armazenamento. - -Tendo adicionado o ficheiro ou os ficheiros, podemos adicionar **Tags** (Etiquetas em PT_PT) clicando no botão. Também podemos clicar em **Metadados** (Meta-dados do Tipo de Item em PT_PT / Item Type Metadata) para escolher a tipologia - pessoa, lugar, animal, vegetal, mineral - do seu item. Se não encontrar um tipo de item apropriado para o seu item, não se preocupe. Nós podemos adicionar um novo tipo de item depois. - -Quando tudo estiver pronto, clique no botão verde **Adicionar item**. - -## Você tem um item! - -{% include figure.html filename="intro-omeka-net-13.png" caption="Explorar itens, vista de administrador" %} - -Esta lista contém todos os itens que foram adicionados. Se o item não fosse público, estaria escrito _Privado_ depois do título. Para ver como a página do seu novo item se parece, clique no nome do item. - -## Esta não é a página pública para o seu item - -{% include figure.html filename="intro-omeka-net-14.png" caption="Página de Item, vista de administrador" %} - -Pode parecer, mas essa página não é o que um usuário não-cadastrado irá ver quando navegar para a página do seu item. Para ver o que um usuário veria, clique no botão azul **Ver a Página Pública**, à direita. (Ou você pode editar o item clicando em **Editar** na direita). - -## Esta é a página pública para o seu item - -{% include figure.html filename="intro-omeka-net-15.png" caption="Página do item, vista pública" %} - -Isso é o que o usuário geral verá se ele navegar pela sua página. - -## Crie uma coleção - -{% include figure.html filename="intro-omeka-net-16.png" caption="Criar uma coleção" %} - -É possível começar a ordenar a sua lista de itens agrupando-os em coleções. Para fazer isso, retorne para o painel de controle (Dashboard), clique na aba de **Coleções** (Collections) e clique em **Adicionar uma coleção**. - -## Insira informações sobre a sua coleção - -{% include figure.html filename="intro-omeka-net-17.png" caption="Adicionar metadados da coleção" %} - -No Omeka, os metadados são fundamentais! Insira alguma informação sobre a sua nova coleção e lembre-se de clicar no botão **Público** perto do fim da página. Então salve a coleção. - -## Adicione itens à sua coleção - -{% include figure.html filename="intro-omeka-net-18.png" caption="Clique na caixa seleção de cada item para editar" %} - -Para preencher a coleção que acabou de criar, clique na aba de *Itens*. Da sua lista **Ver Itens** (Explorar Itens em PT_PT), clique nas caixas de verificação dos itens que pertencem à sua nova coleção. Então clique no botão **Editar**. - -## Escolha a coleção - -{% include figure.html filename="intro-omeka-net-19.png" caption="Escolha uma coleção do menu suspenso" %} - -Na página Editar Itens (Editar Itens em Lote em PT_PT), selecione a Coleção à qual gostaria de adicionar os seus itens. (Além disso, tenha atenção a todas as outras coisas que podem ser feitas nessa página). - -## Veja a sua nova coleção - -{% include figure.html filename="intro-omeka-net-20.png" caption="Ver coleção, vista pública" %} - -Retorne para o seu site público. Se clicarmos na aba de **Ver Coleções** (Explorar as Colecções em PT_PT) na face pública do seu site, deverá haver agora uma nova coleção contendo os itens que foram identificados. - -Agora que alguns itens foram adicionados e agrupados numa coleção, dedique algum tempo para editar ainda mais o seu site. Ele está a começar a tomar forma agora que há tanto itens individuais como unidades temáticas, mas o Omeka pode fazer ainda mais. Iremos falar sobre isso numa próxima lição. - -## Recursos Adicionais - -A equipe do Omeka compilou ótimos recursos nas [páginas de ajuda](http://info.omeka.net/)(em inglês) do software. - +--- +title: Introdução ao Omeka.net +slug: introducao-omeka-net +layout: lesson +date: 2016-02-17 +translation_date: 2021-06-07 +authors: +- Miriam Posner +editors: +- Adam Crymble +translator: +- Gabriela Kucuruza +translation-editor: +- Daniel Alves +translation-reviewer: +- Ângela Pité +- Rômulo Predes +difficulty: 1 +exclude_from_check: + - reviewers +review-ticket: https://github.com/programminghistorian/ph-submissions/issues/379 +activity: presenting +topics: [website] +abstract: "Com o Omeka.net é fácil criar sites na web para mostrar coleções de itens." +original: up-and-running-with-omeka +avatar_alt: Esqueleto de dinossauro num museu +doi: 10.46430/phpt0011 +--- + +{% include toc.html %} + + + + + +O [Omeka.net](https://www.omeka.net) facilita a criação de websites para mostrar coleções de itens. + +## Cadastre-se numa conta do Omeka + +{% include figure.html filename="intro-omeka-net-1.png" caption="Cadastre-se na conta de teste" %} + +Entre em www.omeka.net e clique em **Sign Up** (Cadastre-se). Escolha o Plano de Teste. Preencha o formulário de cadastro. Verifique o seu e-mail pelo link de ativação da conta. + +## Crie um novo site do Omeka + +{% include figure.html filename="intro-omeka-net-2.png" caption="Página da conta do Omeka.net" %} + +Depois de clicar no link no seu e-mail, clique em **Add a Site** (Adicionar um site). + +Preencha a informação sobre o URL do seu site, o título que quer usar e a descrição que preferir. Clique em **Add Your New Site** (Adicione o seu novo site). + +## Você tem um novo site do Omeka! + + +{% include figure.html filename="intro-omeka-net-3.png" caption="Veja o seu site" %} + +Para ver o seu site, clique em **View Site** (Ver Site). + +## Um site vazio no Omeka + +{% include figure.html filename="intro-omeka-net-4.png" caption="Vista pública do site" %} + +Esse é o seu site vazio do Omeka esperando para ser preenchido. Para retornar ao painel de controle (*dashboard*) clique no botão **Back** (Retornar) ou escreva `http://www.omeka.net/dashboard`. Agora, clique em **Manage Site** (Administre o site). + +## Instale alguns plugins + +{% include figure.html filename="intro-omeka-net-5.png" caption="Página dos Plugins" %} + +O seu site do Omeka vem com plugins que oferecem funções adicionais. Precisamos ativá-los. Para fazer isso, clique no item **Plugins** no menu, no canto superior direito. Na página seguinte, clique no botão **Install** (Instalar) em **Exhibit Builder** (construtor de exposições) (deixe as opções como estão na página seguinte) e em **Simple Pages** (Páginas simples). + +## Configurar o seu site para português (nota da tradução) + +A configuração padrão do Omeka é em inglês. Porém, podemos mudar a língua do seu site para português (pt-BR e pt-PT) através de um Plugin. Para realizar essa configuração, siga os passos a seguir: + +1. Clique em **Manage Site** (Administrar Site) no Menu Principal. +2. Clique em Plugins no menu superior ou acesse os Plugins através do link `https://nome_do_seu_site.omeka.net/admin/plugins`, sendo `nome_do_seu_site` o nome escolhido para o seu site. + +3. Encontre o Plugin **Locale** e clique no botão **Install** (Instalar). Ao clicar, a sua tela ficará parecida com a imagem abaixo. + +4. Ao clicar em instalar, aparecerá uma página com as opções de tradução. Escolha **Português - Brasil (pt_BR)** ou **Português - Portugal (pt_PT)**. + +5. Clique em **Save Changes** (Salvar Mudanças) + +{% include figure.html filename="intro-omeka-net-6.png" caption="A sua tela ficará parecida com a imagem acima. Nela, o Plugin Locale está indicado." %} + +Agora, o seu site e o painel de controle estarão em português. + +## Trocar temas + +{% include figure.html filename="intro-omeka-net-7.png" caption="Página de Configuração dos Temas" %} + +O Omeka permite que a aparência do site público seja alterada por meio dos temas. Para fazer isso, clique em **Aparência** (Appearence, à direita do canto superior do seu painel de controle). Mude os temas selecionando uma das opções disponíveis na página. Clique o botão verde **Utilizar este tema** (Use this theme) para atualizar o seu novo tema. Visite, então, o seu site público clicando no nome do seu site, no canto superior esquerdo da página. + +## Temos um novo tema! + +{% include figure.html filename="intro-omeka-net-8.png" caption="Vista pública com o novo tema" %} + +Confira o seu novo tema e volte para o seu painel de controle. É possível retornar para o seu antigo tema, continuar com esse ou selecionar uma das outras opções. + + +## Adicione um item + +{% include figure.html filename="intro-omeka-net-9.png" caption="Adicione um item" %} + +Clique em **Itens** no lado esquerdo do menu e depois (naturalmente!) **Adicione um item** (Add an item). + +## Descreva o seu novo item + +{% include figure.html filename="intro-omeka-net-10.png" caption="Torne o seu item público usando a caixa de seleção assinalada" %} + +Lembre, **Dublin Core** refere-se às informações descritivas (metadados) que você insere sobre um item. Todas essas informações são opcionais e não há como inseri-las incorretamente. Tente, porém, ser consistente. + +Não se esqueça de clicar na caixa de seleção **Público** (Public) para que o seu item fique visível para o público em geral. Se você não clicar nessa caixa, apenas pessoas cadastradas no seu site poderão ver o item. + +Para adicionar múltiplos campos - por exemplo, se você quiser adicionar vários assuntos ao seu item - use o botão verde **Adicionar informação** (Add input) à esquerda das caixas de texto. + +## Uma questão complexa + +{% include figure.html filename="intro-omeka-net-11.png" caption="O que é isto?" %} + +Eu estou a criar um registo de item para o meu cachorro, Boris. Mas eu estou a descrever o Boris _ele mesmo_ ou uma _fotografia_ do Boris? No caso da primeira opção, o **Criador** seria... bem, suponho que isso dependa das suas crenças religiosas. Se é o segundo caso, o criador seria Brad Wallace, quem tirou a foto. + +A decisão sobre descrever um objeto ou a representação de um objeto é sua. Uma vez que tenha decidido, seja consistente. + +## Anexe um ficheiro ao registo do seu item + +{% include figure.html filename="intro-omeka-net-12.png" caption="Adicionando ficheiros a um item" %} + +Uma vez que terminamos de adicionar os metadados do Dublin Core, podemos anexar um ficheiro ao registo do seu item clicando em **Arquivos** (Ficheiros em PT_PT / Files), no topo do formulário de Dublin Core. (Não é necessário clicar em **Adicionar Item** antes de fazer isso; o Omeka irá salvar automaticamente essa informação). Podemos adicionar múltiplos ficheiros, mas saiba que o plano Básico apenas vem com 500 MB de espaço de armazenamento. + +Tendo adicionado o ficheiro ou os ficheiros, podemos adicionar **Tags** (Etiquetas em PT_PT) clicando no botão. Também podemos clicar em **Metadados** (Meta-dados do Tipo de Item em PT_PT / Item Type Metadata) para escolher a tipologia - pessoa, lugar, animal, vegetal, mineral - do seu item. Se não encontrar um tipo de item apropriado para o seu item, não se preocupe. Nós podemos adicionar um novo tipo de item depois. + +Quando tudo estiver pronto, clique no botão verde **Adicionar item**. + +## Você tem um item! + +{% include figure.html filename="intro-omeka-net-13.png" caption="Explorar itens, vista de administrador" %} + +Esta lista contém todos os itens que foram adicionados. Se o item não fosse público, estaria escrito _Privado_ depois do título. Para ver como a página do seu novo item se parece, clique no nome do item. + +## Esta não é a página pública para o seu item + +{% include figure.html filename="intro-omeka-net-14.png" caption="Página de Item, vista de administrador" %} + +Pode parecer, mas essa página não é o que um usuário não-cadastrado irá ver quando navegar para a página do seu item. Para ver o que um usuário veria, clique no botão azul **Ver a Página Pública**, à direita. (Ou você pode editar o item clicando em **Editar** na direita). + +## Esta é a página pública para o seu item + +{% include figure.html filename="intro-omeka-net-15.png" caption="Página do item, vista pública" %} + +Isso é o que o usuário geral verá se ele navegar pela sua página. + +## Crie uma coleção + +{% include figure.html filename="intro-omeka-net-16.png" caption="Criar uma coleção" %} + +É possível começar a ordenar a sua lista de itens agrupando-os em coleções. Para fazer isso, retorne para o painel de controle (Dashboard), clique na aba de **Coleções** (Collections) e clique em **Adicionar uma coleção**. + +## Insira informações sobre a sua coleção + +{% include figure.html filename="intro-omeka-net-17.png" caption="Adicionar metadados da coleção" %} + +No Omeka, os metadados são fundamentais! Insira alguma informação sobre a sua nova coleção e lembre-se de clicar no botão **Público** perto do fim da página. Então salve a coleção. + +## Adicione itens à sua coleção + +{% include figure.html filename="intro-omeka-net-18.png" caption="Clique na caixa seleção de cada item para editar" %} + +Para preencher a coleção que acabou de criar, clique na aba de *Itens*. Da sua lista **Ver Itens** (Explorar Itens em PT_PT), clique nas caixas de verificação dos itens que pertencem à sua nova coleção. Então clique no botão **Editar**. + +## Escolha a coleção + +{% include figure.html filename="intro-omeka-net-19.png" caption="Escolha uma coleção do menu suspenso" %} + +Na página Editar Itens (Editar Itens em Lote em PT_PT), selecione a Coleção à qual gostaria de adicionar os seus itens. (Além disso, tenha atenção a todas as outras coisas que podem ser feitas nessa página). + +## Veja a sua nova coleção + +{% include figure.html filename="intro-omeka-net-20.png" caption="Ver coleção, vista pública" %} + +Retorne para o seu site público. Se clicarmos na aba de **Ver Coleções** (Explorar as Colecções em PT_PT) na face pública do seu site, deverá haver agora uma nova coleção contendo os itens que foram identificados. + +Agora que alguns itens foram adicionados e agrupados numa coleção, dedique algum tempo para editar ainda mais o seu site. Ele está a começar a tomar forma agora que há tanto itens individuais como unidades temáticas, mas o Omeka pode fazer ainda mais. Iremos falar sobre isso numa próxima lição. + +## Recursos Adicionais + +A equipe do Omeka compilou ótimos recursos nas [páginas de ajuda](https://info.omeka.net/)(em inglês) do software. + diff --git a/pt/licoes/limpar-dados-openrefine.md b/pt/licoes/limpar-dados-openrefine.md index 57290f8184..d4cb1b409a 100644 --- a/pt/licoes/limpar-dados-openrefine.md +++ b/pt/licoes/limpar-dados-openrefine.md @@ -1,155 +1,155 @@ ---- -title: "Limpar dados com o OpenRefine" -slug: limpar-dados-openrefine -original: cleaning-data-with-openrefine -layout: lesson -collection: lessons -date: 2013-08-05 -translation_date: 2023-04-29 -tested-date: 2024-03-14 -lesson-testers: Antonin Delpeuch -authors: -- Seth van Hooland -- Ruben Verborgh -- Max De Wilde -reviewers: -- Patrick Burns -- Nora McGregor -editors: -- Adam Crymble -translator: -- Francisco Nabais -translation-editor: -- Aracele Torres -translation-reviewer: -- Eric Brasil -- Joana Vieira Paulino -review-ticket: https://github.com/programminghistorian/ph-submissions/issues/427 -difficulty: 2 -activity: transforming -topics: [data-manipulation] -abstract: Este tutorial foca-se na forma como o usuário pode diagnosticar e agir perante a precisão dos dados -avatar_alt: Dois homens a lavar a roupa ao ar livre -doi: 10.46430/phpt0038 ---- - -{% include toc.html %} - -
    -Nota de Tradução: Alguns termos, por aparecerem constantemente e facilitarem a interpretação das imagens, apenas foram propositadamente traduzidos uma vez e serão colocados entre parênteses em português na primeira vez que surgem. -
    - -## Objetivos da lição - -Não aceite os dados tal como são apresentados. Esta é a principal mensagem deste tutorial que se foca na forma como os usuários podem diagnosticar e agir perante a precisão dos dados. Nesta lição, o usuário vai aprender os princípios e a prática da limpeza de dados, ao mesmo tempo que aprende como é que o [*OpenRefine*](http://openrefine.org) (em inglês) pode ser utilizado para realizar quatro tarefas essenciais que vão ajudar na limpeza de dados: - -1. Remover registos duplicados -2. Separar múltiplos valores contidos no mesmo campo -3. Analisar a distribuição de valores ao longo do Dataset -4. Agrupar diferentes representações da mesma realidade - -Estes passos são explicitados com a ajuda de uma série de exercicios baseados na coleção de metadados do *[Powerhouse](https://powerhouse.com.au/)* (em inglês), demonstrando, assim, como métodos (semi)automáticos podem ajudar na correção de erros dos dados. - -## Porque é que os historiadores devem se preocupar com a qualidade dos dados? - -Registros duplicados, valores vazios e formatos incossistentes são fenómenos com os quais devemos estar preparados para lidar quando utilizamos data sets históricos. Esta lição vai ensinar o usuário a descobrir inconsistências nos dados contidos em tabelas ou bases de dados. À medida que, cada vez mais, partilhamos, agregamos e reutilizamos dados na web, os historiadores terão uma maior necessidade de responder a problemas inevitáveis associados à qualidade dos dados. Utilizando um programa chamado *OpenRefine*, o usuário será capaz de identificar facilmente erros sistemáticos, tais como células em branco, duplicações, inconsistências ortográficas, etc. O *OpenRefine* não só permite um diagnóstico rápido da precisão dos dados, mas também age perante certos erros de forma automática. - -## Descrição da ferramenta: *OpenRefine* - -No passado, os historiadores dependiam de especialistas em tecnologias da informação para diagnosticar a qualidade dos dados e para executar tarefas de limpeza dos mesmos. Isto exigia programas computacionais personalizados quando se trabalhava com data sets consideráveis. Felizmente, o surgimento de Ferramentas Interativas de Transformação de Dados (Interactive Data Transformation tools, ou IDTs em inglês), permite que até profissionais sem habilidades técnicas aprofundadas possam realizar operações rápidas e baratas em grandes data sets. - -As Ferramentas Interativas de Transformação de Dados assemelham-se às tabelas de dados do desktop com as quais estamos familiarizados, chegando a partilhar funcionalidades com as mesmas. O usuário pode, por exemplo, usar aplicações como o Microsoft Excel para organizar os seus dados com base em vários filtros, sejam eles numéricos, alfabéticos ou até personalizados, o que permite detetar erros mais facilmente. Configurar estes filtros em tabelas de dados pode ser complicado, já que estes são uma função secundária do software. Geralmente, podemos dizer que as tabelas de dados são projetadas para funcionar em linhas ou células individuais, enquanto as Ferramentas Interativas de Transformação de Dados operam em grandes intervalos de dados ao mesmo tempo. Estas "Tabelas de dados em esteroides" fornecem uma interface integrada e amigável através da qual os usuários finais podem detetar e corrigir erros. - -Nos últimos anos, têm sido desenvolvidas várias ferramentas para a transformação de dados interativos, tais como [*Potter’s Wheel ABC*](https://perma.cc/Q6QD-E64N) (em inglês) e [*Wrangler*](https://perma.cc/Y45B-6ZLU) (em inglês). Aqui queremos concentrar-nos, sobretudo, no *OpenRefine* (anteriormente *Freebase Gridworks* e *Google Refine*), já que, na opinião dos autores, esta é a ferramenta mais amigável para processar e limpar eficazmente grandes quantidades de dados numa interface baseada no navegador de internet. - -Além do *[data profiling](https://perma.cc/32Z8-8EMT)* (perfil de dados) (em inglês) e das operações de limpeza, as extensões do *OpenRefine* permitem aos usuários identificar conceitos num texto desestruturado através de um processo denominado *[Named-Entity Recognition](https://perma.cc/FCB6-9DU2)* (Reconhecimento de Entidade Nomeada) (em inglês) (NER) e reconciliar os seus próprios dados com bases de conhecimento existentes. Ao fazê-lo, o *OpenRefine* pode ser uma ferramenta prática de ligação dos dados com conceitos e autoridades que já foram declarados na web por entidades como a *[Library of Congress](https://perma.cc/24QD-NP6Y)* (Biblioteca do Congresso dos Estados Unidos da América) (em inglês) ou o [OCLC](https://perma.cc/48KR-ZTAJ) (Centro de Bibliotecas de Computadores Online) (em inglês). A limpeza de dados é um pré-requisito para estas etapas; A taxa de sucesso do NER e o êxito do processo de correspondência entre os dados do usuário e as entidades externas, dependem da habilidade do mesmo de tornar estes dados o mais concretos possível. - -## Descrição do exercício: *Powerhouse* - -O *Powerhouse*, em Sydney, permite-lhe exportar gratuitamente os metadados da sua coleção no seu [sítio Web](https://powerhouse.com.au/). Este museu é um dos maiores do mundo na área da ciência e tecnologia, fornecendo acesso a quase 90,000 objetos, que vão desde motores a vapor até vidros finos e de peças de alta-costura a chips de computadores. - -O museu divulgou ativamente a sua coleção em linha e disponibilizou gratuitamente a maior parte dos seus dados. No seu sítio Web, era possível descarregar um ficheiro de texto separado por separadores denominado `phm-collection.tsv` e abri-lo como uma tabela de dados. O ficheiro descomprimido (58MB) contém metadados básicos (17 campos) para 75,823 objetos, sob a licença *[Creative Commons Attribution Share Alike (CCASA)](https://perma.cc/M3QW-RLW6)* (em inglês). Neste tutorial utilizaremos uma cópia dos dados que está arquivada para o usuário fazer o download (mais à frente). Isto garante que se o *Powerhouse* atualizar os seus dados, o usuário ainda vai conseguir acompanhar esta lição. - -Ao longo do processo de limpeza e de criação do perfil dos dados, a lição vai focar o campo das `'Categorias'`, que é preenchido com termos do [*Powerhouse Object Names Thesaurus* (BARTOC)](https://perma.cc/PEP6-X2LD) (em inglês). O BARTOC reconhece o uso e a ortografia australiana e reflete, de uma maneira muito direta, os pontos fortes da coleção. Nesta coleção, o usuário vai encontrar, por exemplo, mais e melhores representações da história social e das artes decorativas e menos objetos com nomes associados às belas-artes e à história natural. - -Os termos no campo das Categorias compreendem o que chamamos de [Vocabulário Controlado](https://perma.cc/FEW7-CFDB). Um Vocabulário Controlado consiste em palavras-chave que, ao utilizarem um número limitado de termos, descrevem o conteúdo de uma coleção, sendo, normalmente, um ponto de entrada importante para historiadores em data sets de bibliotecas, arquivos e museus. É, por isso, que será dada uma importância especial ao campo das 'Categorias'. Depois de ser feita a limpeza dos dados, deverá ser possível reutilizar os termos do Vocabulário Controlado para encontrar informação adicional sobre esses termos num outro lugar online. Isto é conhecido como a criação de *[Linked Data](https://perma.cc/5SRF-V3UR)* (Dados Vinculados). - -### Primeiros passos: instalação do *OpenRefine* e importação de dados - -Deverá ser feito o [Download do *OpenRefine*](https://openrefine.org/download) (em inglês) e seguidas as instruções. O *OpenRefine* funciona em todas as plataformas: Windows, Mac, e Linux. Este será aberto no navegador de internet do usuário, mas é importante entender que a aplicação é executada localmente e que os dados não serão guardados online. Com o *OpenRefine* aberto no seu navegador de internet, clique em '**Language Settings**', presente no canto superior esquerdo, e altere a linguagem para '**Português**'. Os arquivos de dados estão disponíveis no *Programming Historian* como *[phm-collection](/assets/cleaning-data-with-openrefine/phm-collection.tsv)*. Por favor, faça o Download do ficheiro *phm-collection.tsv* que serão utilizados ao longo deste tutorial antes de continuar. - -Na página inicial do *OpenRefine* crie um novo projeto utilizando o ficheiro de dados que fez o download e clique '**Próximo**' . A primeira linha será processada como o nome da coluna por defeito, mas será preciso desmarcar a caixa de seleção 'Usar caracter " encerrar células contendo separadores de colunas', já que as aspas dentro do ficheiro não têm qualquer significado para o *OpenRefine*. Além disto, deverá selecionar a caixa de seleção 'Tentar analisar texto de células como números' para que o *OpenRefine* detete automaticamente números. Agora deverá clicar em '**Criar projeto**'. Se tudo correr como planejado, deverá ver no canto superior esquerdo 75,814 linhas. - -O data set do *Powerhouse* consiste em metadados detalhados sobre todos os objetos da coleção incluindo o título, a descrição, as várias categorias às quais o item pertence, informação sobre a proveniência do mesmo e um link persistente para a página que hospeda o objeto dentro do site do museu. Para ter uma ideia do objeto a que corresponde os metadados, clique no link persistente e o site será aberto. - -{% include figure.html filename="en-or-cleaning-data-with-openrefine-01.png" alt="Imagem de um objeto de amostra no site *Powerhouse* onde é possível observar um carro de brincar com desenhos de palhaços" caption="Figura 1: Captura de tela de um objeto de amostra no site *Powerhouse*" %} - -### Conheça os seus dados - -A primeira coisa a fazer é observar e conhecer os seus dados. Poderá inspecionar os diferentes valores de dados exibindo-os em `facetas e filtros`. Poderá considerar a [faceta](https://perma.cc/HKN9-NYXZ) (em inglês) uma lente através da qual é possível ver um subconjunto específico de dados baseados no critério da sua escolha. Clique no triângulo em frente ao nome da coluna, selecione Faceta e crie uma Faceta. Por exemplo, experimente o `Faceta de texto` ou o `Faceta numérica`, dependendo da natureza dos valores contidos nesses campos (os valores numéricos estão expostos a verde). No entanto, tenha em atenção que estas Facetas de texto têm uma maior eficácia em campos com valores redundantes (*Categories* (categorias), por exemplo); Se ocorrer o erro 'Muitas para mostrar' você pode escolher aumentar o limite da contagem de opções a cima do padrão dos 2,000. Todavia, um limite muito alto pode tornar o aplicativo mais lento (por norma, 5,000 é uma escolha segura). Facetas numéricas não têm esta restrição. Para mais opções, selecione Facetas personalizadas : Faceta por valores em branco, por exemplo, torna-se útil na procura de quantos valores foram preenchidos em cada campo. Vamos explorar mais detalhadamente estas funcionalidades nos exercícios a seguir. - -### Remoção de linhas em branco - -Uma coisa que irá reparar quando criar Facetas numéricas para a coluna do *Record ID* (Identificador do registo), é que existem três linhas sem dados. Poderá encontrá-las ao desmarcar a caixa de seleção numérica, deixando apenas valores não-numéricos. Na verdade, estes valores não estão realmente a branco, mas contêm apenas um caractere de espaço em branco, que pode ser visível se mover o seu cursor para onde deveria estar esse valor e clicar no botão '**edit**' (Editar) que aparece. Para remover estas linhas, clique no triângulo em frente à primeira coluna denominada por '**Todos**' , selecione '**Editar linhas**' e depois '**Remover as linhas que corresponderam**'. Feche a faceta numerica para verificar que permanecem agora 75,811 linhas. - -### Remoção de duplicações - -O segundo passo é detetar e remover duplicações. Estas podem ser identificadas ao classificar colunas, como o *Record ID*, por um valor único (neste caso vamos assumir que o *Record ID* é, de facto, único para cada entrada). Esta operação pode ser realizada ao clicar no triângulo à esquerda do *Record ID*, depois devemos selecionar a opção '**Ordenar**…' e escolher o marcador '**números**'. No *OpenRefine*, ordenar é apenas uma ajuda visual, a não ser que torne a reordenação permanente. Para o fazer, clique na opção Ordenar por cima do *Marks* (Marcas) e, em seguida, deverá escolher a opção '**Reordenar linhas permanentemente**'. Se se esquecer de fazer isto, posteriormente, irá ter resultados imprevisíveis neste tutorial. - -Linhas idênticas estão agora adjacentes umas às outras. Em seguida, deixe em branco as linhas do *Record ID* que têm o mesmo *Record ID* que as a cima delas, marcando-as como duplicações. Para o fazer, deve clicar no triângulo do *Record ID*, escolher **Editar células** \> **Transformar em vazias abaixo**. A mensagem de *status* dirá que 84 colunas foram afetadas (se se esqueceu de reordenar as linhas permanentemente, apenas vão ser afetadas 19 colunas; em caso afirmativo, desfaça a operação Transformar em vazias abaixo no separador 'Desfazer/Refazer' e volte ao parágrafo anterior refazendo-o de modo a ter a certeza que as linhas estão reordenadas e não apenas classificadas). Elimine essas linhas ao criar uma faceta em '**Transformar em vazias abaixo**' na coluna do *Record ID* ('**Faceta**' \> '**Facetas personalizadas**' \> '**Faceta por valores em branco**') em seguida deverá selecionar as 84 linhas a branco clicando em '**true**' (Verdade) e removê-las usando o triângulo da coluna '**Todos**' ('**Editar linhas**' \> **Remover as linhas que corresponderam**'). Quando fechar a faceta deverá observar que existem agora 75,727 linhas únicas. - -O usuário deverá ter uma atenção especial ao eliminar duplicações. Na etapa mencionada acima, assumimos que o data set possui um campo com valores únicos, indicando que uma linha inteira representa uma duplicação. Este não é necessariamente o caso e, por isso, devemos ter cuidado e verificar manualmente se a linha inteira representa uma duplicação ou não. - -### Atomização - -Depois de remover os registos duplicados, podemos focar-nos na coluna *Categories*. Em média, foram atribuídas 2.25 categorias a cada objeto. Estas categorias estão contidas no mesmo campo, separadas por uma barra vertical '\|'. O registo 9, por exemplo, contém três: 'Mineral samples\|Specimens\|Mineral Samples-Geological' (Amostras minerais\|Espécimes\|Amostras minerais-Geológicas). Para analisar em detalhe o uso destas palavras-chave, os valores do campo das categorias devem ser separados em células individuais com base na barra vertical, expandindo os 75,727 registos em 170,167 linhas. Escolha '**Editar células**', '**Dividir células com múltiplos valores**', digitando '**\|**' como separador de valores. O *OpenRefine* irá informá-lo que tem agora 170,167 linhas. - -É importante compreender totalmente o paradigma das linhas/entradas. Torne a coluna *Record ID* visível para ver o que se passa. Pode mudar entre a opção de visualização 'linhas' e 'entradas' ao clicar nos links que dão pelos mesmos nomes, logo em cima do cabeçalho das colunas. Na opção 'linhas', cada linha representa um par de *Record IDs* e uma única categoria, permitindo a manipulação de cada uma individualmente. A opção 'entradas' tem uma entrada para cada *Record ID*, que pode ter categorias diferentes em linhas diferentes (agrupadas a cinzento ou branco), mas cada registo é manipulado como um todo. Concretamente, existem agora 170,167 atribuições de categorias (Linhas), separadas em 75,736 itens de coleção (Entradas). Pode também ter reparado que estamos com mais 9 registos do que os originais 75,727, mas não se preocupe com isso agora, iremos voltar a esta pequena diferença mais tarde. - -### Facetting e agrupamento - -Um dos conteúdos do campo foi devidamente atomizado, filtros, facetas e agrupamentos podem ser aplicados para fornecer uma visão rápida e geral dos problemas clássicos dos metadados. Ao aplicar a faceta customizada '`Faceta por valores em branco`' à coluna *Categories*, é possível identificar imediatamente os 461 registos que não têm uma categoria, representando 0.6% da coleção. Ao aplicar uma faceta de texto ao campo das categorias podemos ter uma visão geral das 4,935 diferentes categorias utilizadas na coleção (o limite padrão é 2,000, mas poderá clicar na opção '**Definir o limite da contagem da escolha**' para aumentá-la para 5,000). Os títulos podem ser ordenados alfabeticamente (nome') ou por frequência ('quantidade'), fornecendo ao utilizador uma lista dos termos mais usados para indexar a coleção. Os três títulos principais são 'Numismática' (*Numismatics*) (8,041), 'Cerâmica' (*Ceramics*) (7,390) e 'Roupas e vestuário' (*Clothing and dress*) (7,279). - -Após aplicar a faceta, o *OpenRefine* propõe aglomerar as escolhas da faceta com base em vários métodos de similaridade. Tal como a Figura 2 demonstra, o agrupamento permite ao usuário resolver problemas relacionados com inconsistências, o uso incoerente tanto da forma singular como plural e erros de ortografia simples. O *OpenRefine* apresenta os valores relacionados e propõe uma fusão resultante no valor mais recorrente. Deverá selecionar a opção '**Agrupar**' para abrir o comando de uniformização dos termos, em seguida, escolha os valores que deseja agrupar ao selecionar as caixas individualmente ou ao clicar '**Marcar todos**' na parte inferior e, por fim, '**Unir selecionados e Re-agrupar**'. - -{% include figure.html filename="tr-pt-cleaning-data-with-openrefine-2.png" alt="Interface do *OpenRefine* referente ao agrupamento e edição da coluna 'Categories' em que é possível observar os métodos de agrupamento e ainda as diferenças detetadas nesta coluna" caption="Figura 2: Visão geral de alguns agrupamentos" %} - -O método padrão de aglomeramento não é muito complexo, portanto ainda não encontra todos os aglomerados. Experimente com diferentes métodos para ver quais são os resultados que estes produzem. Deverá ter cuidado: alguns métodos podem ser muito agressivos e alguns valores, que não deverão estar juntos, podem acabar agrupados. Agora que os valores foram agrupados individualmente, podemos colocá-los de volta numa única célula. Clique no triângulo das *categories* e escolha **Editar células**, **Unir células com múltiplos valores**, escolha a barra vertical ('\|') como separador, **OK**. As linhas têm agora a mesma aparência que tinham antes, com um campo de categorias com vários valores. - -### Aplicação de transformações *ad-hoc* através do uso de expressões GREL - -Relembre-se que existiu um aumento no número de registos depois do processo de divisão: nove registos apareceram do nada. Para encontrar a causa desta disparidade, precisamos de voltar atrás, antes da divisão das categorias em linhas separadas. Para fazer isso, altere o separador 'Desfazer / Refazer' à direita do separador 'Faceta / Filtro' e vai obter um histórico de todas as ações que executou desde que o projeto foi criado. Selecione o passo antes de '*Split multi-valued cells in column Categories*' (Dividir células com vários valores na coluna Categorias) (se seguiu o nosso exemplo deverá ser '*Remove 84 rows*' (Remover 84 linhas)). Depois volte para o separador 'Faceta / Filtro'. - -O problema surgiu durante a operação de divisão no caractere de barra vertical, portanto há uma grande probabilidade do que correu mal estar relacionado com esse caractere. Vamos aplicar um filtro na coluna Categorias ao selecionar '**Filtro de texto**' no menu. Primeiro, digite um único `|` no campo da esquerda: o *OpenRefine* deverá informá-lo que existem 71,064 registos correspondentes (i.e. registos que contenham uma barra vertical) num total de 75,727. Células que não contenham a barra vertical podem ser células em branco ou células apenas com uma categoria, não tendo assim um separador. Tal como o registo 29 que apenas tem '*Scientific instruments*' (Instrumentos científicos). - -Agora insira um segundo '\|' depois do primeiro para obter '\|\|' (dupla barra vertical): poderá observar que existem 9 registos que correspondem a este padrão. Estes são, provavelmente, os 9 registos culpados pela nossa discrepância: quando o *OpenRefine* divide os registos, a dupla barra vertical é interpretada como uma quebra entre dois registos em vez de um separador duplo sem sentido. Agora, como é que corrigimos estes valores? Vá ao menu do campo das categorias e escolha '**Editar células**' \> '**Transformar**…. Bem-vindo à interface de transformação de texto personalizada, uma funcionalidade poderosa do *OpenRefine* que usa a *Google Refine Expression Language* (GREL). - -A palavra '*value*' (valor) no campo de texto representa o valor atual de cada célula, valor esse visível em baixo. Podemos modificar este valor ao aplicar-lhe funções (ver a *[GREL documentation](https://perma.cc/A228-FFBE)* (documentação da GREL, em inglês) para uma lista completa). Neste caso, queremos substituir a dupla barra vertical por uma única barra. Isto pode ser realizado ao inserir a seguinte expressão GREL (certifique-se que não se esquece das aspas): - -``` -value.replace('||', '|') -``` - -Em baixo do campo de texto 'Expressão', terá uma pré-visualização dos valores modificados com as duplas barras verticais removidas. Clique em **OK** e tente dividir as categorias de novo com a opção '**Editar células**' \> '**Dividir células com múltiplos valores...**'. O número de registos ficará agora nos 75,727 (clique no link '**entradas**' para verificar). - -\* \* \*\ -Outro problema que pode ser resolvido com a ajuda da GREL é o dos registos para os quais a mesma categoria é listada duas vezes. Observe o registo 41 por exemplo, cujas categorias são '*Models|Botanical specimens|Botanical Specimens|Didactic Displays|Models*' (Modelos|Espécimes botânicos|Espécimes Botânicos|Expositores Didáticos|Modelos). A categoria '*Models*' aparece duas vezes sem nenhuma razão aparente, pelo que vamos querer remover esta duplicação. Clique no triângulo da coluna das '*Categories*' e escolha 'Editar células', 'Unir células com múltiplos valores', OK. Escolha a barra vertical como separador. Agora as categorias estão listadas como antes. Em seguida selecione '**Editar células**' \> '**Transformar**', também na coluna das categorias. Ao usar a GREL podemos dividir sucessivamente as categorias na barra vertical, procurar categorias únicas e juntá-las de novo. Para isso, basta digitar a seguinte expressão: - - -``` -value.split('|').uniques().join('|') -``` - -Ao fazê-lo irá reparar que 33.006 células foram afetadas, mais de metade da coleção. - -### Exportação dos seus dados limpos - -Desde que carregou os seus dados no *OpenRefine*, todas as operações de limpeza foram executadas na memória do software, deixando os dados originais intocados. Se desejar salvar os dados que limpou, terá de os exportar ao clicar no menu '**Exportar**' no canto superior direito do ecrã. O *OpenRefine* suporta uma larga variedade de formatos, tais como [CSV](https://perma.cc/SVC7-TH2C) (em inglês), HTML ou Excel: selecione o que melhor se adapta a si e acrescente o seu próprio modelo de exportação ao clicar 'Criando modelo'. Poderá também exportar o seu projeto num formato interno do *OpenRefine* de modo a partilhá-lo com os outros. - -### Construção sob os dados limpos - -Depois de limpar os seus dados, poderá dar o próximo passo e explorar outros recursos interessantes do *OpenRefine*. A comunidade de utilizadores do *OpenRefine* desenvolveu duas interessantes extensões que permitem ligar os seus dados a dados que já foram publicados na web. A *[RDF Transform extension](https://perma.cc/9RTF-S6LT)* (em inglês) transforma palavras-chave de texto simples em URLs. A [NER extension](https://perma.cc/SM98-U7GG) (em inglês) permite ao usuário aplicar a *named-entity recognition* (NER) que identifica palavras chave em texto corrido e atribui-lhes um URL. - -## Conclusões - -Se apenas se lembrar de uma coisa desta lição, deverá ser o seguinte: *Todos os dados são sujos, mas poderá fazer algo quanto a isso*. Tal como mostrámos aqui, já existe muito que pode ser feito para aumentar significativamente a qualidade dos dados. Em primeiro lugar, aprendemos como é que podemos ter uma visão geral e rápida de quantos valores vazios existem no nosso data set e com que frequência é que um valor particular (e.g. uma palavra-chave) é usada ao longo da coleção. Esta lição também demonstra como resolver problemas recorrentes, tais como duplicações e inconsistências ortográficas de maneira automática com a ajuda do *OpenRefine*. Não hesite em experimentar as ferramentas de limpeza enquanto executa estas etapas numa cópia dos seus data sets, já que o *OpenRefine* permite-lhe rastrear e refazer todos os passos caso tenha cometido um erro. +--- +title: "Limpar dados com o OpenRefine" +slug: limpar-dados-openrefine +original: cleaning-data-with-openrefine +layout: lesson +collection: lessons +date: 2013-08-05 +translation_date: 2023-04-29 +tested-date: 2024-03-14 +lesson-testers: Antonin Delpeuch +authors: +- Seth van Hooland +- Ruben Verborgh +- Max De Wilde +reviewers: +- Patrick Burns +- Nora McGregor +editors: +- Adam Crymble +translator: +- Francisco Nabais +translation-editor: +- Aracele Torres +translation-reviewer: +- Eric Brasil +- Joana Vieira Paulino +review-ticket: https://github.com/programminghistorian/ph-submissions/issues/427 +difficulty: 2 +activity: transforming +topics: [data-manipulation] +abstract: Este tutorial foca-se na forma como o usuário pode diagnosticar e agir perante a precisão dos dados +avatar_alt: Dois homens a lavar a roupa ao ar livre +doi: 10.46430/phpt0038 +--- + +{% include toc.html %} + +
    +Nota de Tradução: Alguns termos, por aparecerem constantemente e facilitarem a interpretação das imagens, apenas foram propositadamente traduzidos uma vez e serão colocados entre parênteses em português na primeira vez que surgem. +
    + +## Objetivos da lição + +Não aceite os dados tal como são apresentados. Esta é a principal mensagem deste tutorial que se foca na forma como os usuários podem diagnosticar e agir perante a precisão dos dados. Nesta lição, o usuário vai aprender os princípios e a prática da limpeza de dados, ao mesmo tempo que aprende como é que o [*OpenRefine*](https://openrefine.org) (em inglês) pode ser utilizado para realizar quatro tarefas essenciais que vão ajudar na limpeza de dados: + +1. Remover registos duplicados +2. Separar múltiplos valores contidos no mesmo campo +3. Analisar a distribuição de valores ao longo do Dataset +4. Agrupar diferentes representações da mesma realidade + +Estes passos são explicitados com a ajuda de uma série de exercicios baseados na coleção de metadados do *[Powerhouse](https://powerhouse.com.au/)* (em inglês), demonstrando, assim, como métodos (semi)automáticos podem ajudar na correção de erros dos dados. + +## Porque é que os historiadores devem se preocupar com a qualidade dos dados? + +Registros duplicados, valores vazios e formatos incossistentes são fenómenos com os quais devemos estar preparados para lidar quando utilizamos data sets históricos. Esta lição vai ensinar o usuário a descobrir inconsistências nos dados contidos em tabelas ou bases de dados. À medida que, cada vez mais, partilhamos, agregamos e reutilizamos dados na web, os historiadores terão uma maior necessidade de responder a problemas inevitáveis associados à qualidade dos dados. Utilizando um programa chamado *OpenRefine*, o usuário será capaz de identificar facilmente erros sistemáticos, tais como células em branco, duplicações, inconsistências ortográficas, etc. O *OpenRefine* não só permite um diagnóstico rápido da precisão dos dados, mas também age perante certos erros de forma automática. + +## Descrição da ferramenta: *OpenRefine* + +No passado, os historiadores dependiam de especialistas em tecnologias da informação para diagnosticar a qualidade dos dados e para executar tarefas de limpeza dos mesmos. Isto exigia programas computacionais personalizados quando se trabalhava com data sets consideráveis. Felizmente, o surgimento de Ferramentas Interativas de Transformação de Dados (Interactive Data Transformation tools, ou IDTs em inglês), permite que até profissionais sem habilidades técnicas aprofundadas possam realizar operações rápidas e baratas em grandes data sets. + +As Ferramentas Interativas de Transformação de Dados assemelham-se às tabelas de dados do desktop com as quais estamos familiarizados, chegando a partilhar funcionalidades com as mesmas. O usuário pode, por exemplo, usar aplicações como o Microsoft Excel para organizar os seus dados com base em vários filtros, sejam eles numéricos, alfabéticos ou até personalizados, o que permite detetar erros mais facilmente. Configurar estes filtros em tabelas de dados pode ser complicado, já que estes são uma função secundária do software. Geralmente, podemos dizer que as tabelas de dados são projetadas para funcionar em linhas ou células individuais, enquanto as Ferramentas Interativas de Transformação de Dados operam em grandes intervalos de dados ao mesmo tempo. Estas "Tabelas de dados em esteroides" fornecem uma interface integrada e amigável através da qual os usuários finais podem detetar e corrigir erros. + +Nos últimos anos, têm sido desenvolvidas várias ferramentas para a transformação de dados interativos, tais como [*Potter’s Wheel ABC*](https://perma.cc/Q6QD-E64N) (em inglês) e [*Wrangler*](https://perma.cc/Y45B-6ZLU) (em inglês). Aqui queremos concentrar-nos, sobretudo, no *OpenRefine* (anteriormente *Freebase Gridworks* e *Google Refine*), já que, na opinião dos autores, esta é a ferramenta mais amigável para processar e limpar eficazmente grandes quantidades de dados numa interface baseada no navegador de internet. + +Além do *[data profiling](https://perma.cc/32Z8-8EMT)* (perfil de dados) (em inglês) e das operações de limpeza, as extensões do *OpenRefine* permitem aos usuários identificar conceitos num texto desestruturado através de um processo denominado *[Named-Entity Recognition](https://perma.cc/FCB6-9DU2)* (Reconhecimento de Entidade Nomeada) (em inglês) (NER) e reconciliar os seus próprios dados com bases de conhecimento existentes. Ao fazê-lo, o *OpenRefine* pode ser uma ferramenta prática de ligação dos dados com conceitos e autoridades que já foram declarados na web por entidades como a *[Library of Congress](https://perma.cc/24QD-NP6Y)* (Biblioteca do Congresso dos Estados Unidos da América) (em inglês) ou o [OCLC](https://perma.cc/48KR-ZTAJ) (Centro de Bibliotecas de Computadores Online) (em inglês). A limpeza de dados é um pré-requisito para estas etapas; A taxa de sucesso do NER e o êxito do processo de correspondência entre os dados do usuário e as entidades externas, dependem da habilidade do mesmo de tornar estes dados o mais concretos possível. + +## Descrição do exercício: *Powerhouse* + +O *Powerhouse*, em Sydney, permite-lhe exportar gratuitamente os metadados da sua coleção no seu [sítio Web](https://powerhouse.com.au/). Este museu é um dos maiores do mundo na área da ciência e tecnologia, fornecendo acesso a quase 90,000 objetos, que vão desde motores a vapor até vidros finos e de peças de alta-costura a chips de computadores. + +O museu divulgou ativamente a sua coleção em linha e disponibilizou gratuitamente a maior parte dos seus dados. No seu sítio Web, era possível descarregar um ficheiro de texto separado por separadores denominado `phm-collection.tsv` e abri-lo como uma tabela de dados. O ficheiro descomprimido (58MB) contém metadados básicos (17 campos) para 75,823 objetos, sob a licença *[Creative Commons Attribution Share Alike (CCASA)](https://perma.cc/M3QW-RLW6)* (em inglês). Neste tutorial utilizaremos uma cópia dos dados que está arquivada para o usuário fazer o download (mais à frente). Isto garante que se o *Powerhouse* atualizar os seus dados, o usuário ainda vai conseguir acompanhar esta lição. + +Ao longo do processo de limpeza e de criação do perfil dos dados, a lição vai focar o campo das `'Categorias'`, que é preenchido com termos do [*Powerhouse Object Names Thesaurus* (BARTOC)](https://perma.cc/PEP6-X2LD) (em inglês). O BARTOC reconhece o uso e a ortografia australiana e reflete, de uma maneira muito direta, os pontos fortes da coleção. Nesta coleção, o usuário vai encontrar, por exemplo, mais e melhores representações da história social e das artes decorativas e menos objetos com nomes associados às belas-artes e à história natural. + +Os termos no campo das Categorias compreendem o que chamamos de [Vocabulário Controlado](https://perma.cc/FEW7-CFDB). Um Vocabulário Controlado consiste em palavras-chave que, ao utilizarem um número limitado de termos, descrevem o conteúdo de uma coleção, sendo, normalmente, um ponto de entrada importante para historiadores em data sets de bibliotecas, arquivos e museus. É, por isso, que será dada uma importância especial ao campo das 'Categorias'. Depois de ser feita a limpeza dos dados, deverá ser possível reutilizar os termos do Vocabulário Controlado para encontrar informação adicional sobre esses termos num outro lugar online. Isto é conhecido como a criação de *[Linked Data](https://perma.cc/5SRF-V3UR)* (Dados Vinculados). + +### Primeiros passos: instalação do *OpenRefine* e importação de dados + +Deverá ser feito o [Download do *OpenRefine*](https://openrefine.org/download) (em inglês) e seguidas as instruções. O *OpenRefine* funciona em todas as plataformas: Windows, Mac, e Linux. Este será aberto no navegador de internet do usuário, mas é importante entender que a aplicação é executada localmente e que os dados não serão guardados online. Com o *OpenRefine* aberto no seu navegador de internet, clique em '**Language Settings**', presente no canto superior esquerdo, e altere a linguagem para '**Português**'. Os arquivos de dados estão disponíveis no *Programming Historian* como *[phm-collection](/assets/cleaning-data-with-openrefine/phm-collection.tsv)*. Por favor, faça o Download do ficheiro *phm-collection.tsv* que serão utilizados ao longo deste tutorial antes de continuar. + +Na página inicial do *OpenRefine* crie um novo projeto utilizando o ficheiro de dados que fez o download e clique '**Próximo**' . A primeira linha será processada como o nome da coluna por defeito, mas será preciso desmarcar a caixa de seleção 'Usar caracter " encerrar células contendo separadores de colunas', já que as aspas dentro do ficheiro não têm qualquer significado para o *OpenRefine*. Além disto, deverá selecionar a caixa de seleção 'Tentar analisar texto de células como números' para que o *OpenRefine* detete automaticamente números. Agora deverá clicar em '**Criar projeto**'. Se tudo correr como planejado, deverá ver no canto superior esquerdo 75,814 linhas. + +O data set do *Powerhouse* consiste em metadados detalhados sobre todos os objetos da coleção incluindo o título, a descrição, as várias categorias às quais o item pertence, informação sobre a proveniência do mesmo e um link persistente para a página que hospeda o objeto dentro do site do museu. Para ter uma ideia do objeto a que corresponde os metadados, clique no link persistente e o site será aberto. + +{% include figure.html filename="en-or-cleaning-data-with-openrefine-01.png" alt="Imagem de um objeto de amostra no site *Powerhouse* onde é possível observar um carro de brincar com desenhos de palhaços" caption="Figura 1: Captura de tela de um objeto de amostra no site *Powerhouse*" %} + +### Conheça os seus dados + +A primeira coisa a fazer é observar e conhecer os seus dados. Poderá inspecionar os diferentes valores de dados exibindo-os em `facetas e filtros`. Poderá considerar a [faceta](https://perma.cc/HKN9-NYXZ) (em inglês) uma lente através da qual é possível ver um subconjunto específico de dados baseados no critério da sua escolha. Clique no triângulo em frente ao nome da coluna, selecione Faceta e crie uma Faceta. Por exemplo, experimente o `Faceta de texto` ou o `Faceta numérica`, dependendo da natureza dos valores contidos nesses campos (os valores numéricos estão expostos a verde). No entanto, tenha em atenção que estas Facetas de texto têm uma maior eficácia em campos com valores redundantes (*Categories* (categorias), por exemplo); Se ocorrer o erro 'Muitas para mostrar' você pode escolher aumentar o limite da contagem de opções a cima do padrão dos 2,000. Todavia, um limite muito alto pode tornar o aplicativo mais lento (por norma, 5,000 é uma escolha segura). Facetas numéricas não têm esta restrição. Para mais opções, selecione Facetas personalizadas : Faceta por valores em branco, por exemplo, torna-se útil na procura de quantos valores foram preenchidos em cada campo. Vamos explorar mais detalhadamente estas funcionalidades nos exercícios a seguir. + +### Remoção de linhas em branco + +Uma coisa que irá reparar quando criar Facetas numéricas para a coluna do *Record ID* (Identificador do registo), é que existem três linhas sem dados. Poderá encontrá-las ao desmarcar a caixa de seleção numérica, deixando apenas valores não-numéricos. Na verdade, estes valores não estão realmente a branco, mas contêm apenas um caractere de espaço em branco, que pode ser visível se mover o seu cursor para onde deveria estar esse valor e clicar no botão '**edit**' (Editar) que aparece. Para remover estas linhas, clique no triângulo em frente à primeira coluna denominada por '**Todos**' , selecione '**Editar linhas**' e depois '**Remover as linhas que corresponderam**'. Feche a faceta numerica para verificar que permanecem agora 75,811 linhas. + +### Remoção de duplicações + +O segundo passo é detetar e remover duplicações. Estas podem ser identificadas ao classificar colunas, como o *Record ID*, por um valor único (neste caso vamos assumir que o *Record ID* é, de facto, único para cada entrada). Esta operação pode ser realizada ao clicar no triângulo à esquerda do *Record ID*, depois devemos selecionar a opção '**Ordenar**…' e escolher o marcador '**números**'. No *OpenRefine*, ordenar é apenas uma ajuda visual, a não ser que torne a reordenação permanente. Para o fazer, clique na opção Ordenar por cima do *Marks* (Marcas) e, em seguida, deverá escolher a opção '**Reordenar linhas permanentemente**'. Se se esquecer de fazer isto, posteriormente, irá ter resultados imprevisíveis neste tutorial. + +Linhas idênticas estão agora adjacentes umas às outras. Em seguida, deixe em branco as linhas do *Record ID* que têm o mesmo *Record ID* que as a cima delas, marcando-as como duplicações. Para o fazer, deve clicar no triângulo do *Record ID*, escolher **Editar células** \> **Transformar em vazias abaixo**. A mensagem de *status* dirá que 84 colunas foram afetadas (se se esqueceu de reordenar as linhas permanentemente, apenas vão ser afetadas 19 colunas; em caso afirmativo, desfaça a operação Transformar em vazias abaixo no separador 'Desfazer/Refazer' e volte ao parágrafo anterior refazendo-o de modo a ter a certeza que as linhas estão reordenadas e não apenas classificadas). Elimine essas linhas ao criar uma faceta em '**Transformar em vazias abaixo**' na coluna do *Record ID* ('**Faceta**' \> '**Facetas personalizadas**' \> '**Faceta por valores em branco**') em seguida deverá selecionar as 84 linhas a branco clicando em '**true**' (Verdade) e removê-las usando o triângulo da coluna '**Todos**' ('**Editar linhas**' \> **Remover as linhas que corresponderam**'). Quando fechar a faceta deverá observar que existem agora 75,727 linhas únicas. + +O usuário deverá ter uma atenção especial ao eliminar duplicações. Na etapa mencionada acima, assumimos que o data set possui um campo com valores únicos, indicando que uma linha inteira representa uma duplicação. Este não é necessariamente o caso e, por isso, devemos ter cuidado e verificar manualmente se a linha inteira representa uma duplicação ou não. + +### Atomização + +Depois de remover os registos duplicados, podemos focar-nos na coluna *Categories*. Em média, foram atribuídas 2.25 categorias a cada objeto. Estas categorias estão contidas no mesmo campo, separadas por uma barra vertical '\|'. O registo 9, por exemplo, contém três: 'Mineral samples\|Specimens\|Mineral Samples-Geological' (Amostras minerais\|Espécimes\|Amostras minerais-Geológicas). Para analisar em detalhe o uso destas palavras-chave, os valores do campo das categorias devem ser separados em células individuais com base na barra vertical, expandindo os 75,727 registos em 170,167 linhas. Escolha '**Editar células**', '**Dividir células com múltiplos valores**', digitando '**\|**' como separador de valores. O *OpenRefine* irá informá-lo que tem agora 170,167 linhas. + +É importante compreender totalmente o paradigma das linhas/entradas. Torne a coluna *Record ID* visível para ver o que se passa. Pode mudar entre a opção de visualização 'linhas' e 'entradas' ao clicar nos links que dão pelos mesmos nomes, logo em cima do cabeçalho das colunas. Na opção 'linhas', cada linha representa um par de *Record IDs* e uma única categoria, permitindo a manipulação de cada uma individualmente. A opção 'entradas' tem uma entrada para cada *Record ID*, que pode ter categorias diferentes em linhas diferentes (agrupadas a cinzento ou branco), mas cada registo é manipulado como um todo. Concretamente, existem agora 170,167 atribuições de categorias (Linhas), separadas em 75,736 itens de coleção (Entradas). Pode também ter reparado que estamos com mais 9 registos do que os originais 75,727, mas não se preocupe com isso agora, iremos voltar a esta pequena diferença mais tarde. + +### Facetting e agrupamento + +Um dos conteúdos do campo foi devidamente atomizado, filtros, facetas e agrupamentos podem ser aplicados para fornecer uma visão rápida e geral dos problemas clássicos dos metadados. Ao aplicar a faceta customizada '`Faceta por valores em branco`' à coluna *Categories*, é possível identificar imediatamente os 461 registos que não têm uma categoria, representando 0.6% da coleção. Ao aplicar uma faceta de texto ao campo das categorias podemos ter uma visão geral das 4,935 diferentes categorias utilizadas na coleção (o limite padrão é 2,000, mas poderá clicar na opção '**Definir o limite da contagem da escolha**' para aumentá-la para 5,000). Os títulos podem ser ordenados alfabeticamente (nome') ou por frequência ('quantidade'), fornecendo ao utilizador uma lista dos termos mais usados para indexar a coleção. Os três títulos principais são 'Numismática' (*Numismatics*) (8,041), 'Cerâmica' (*Ceramics*) (7,390) e 'Roupas e vestuário' (*Clothing and dress*) (7,279). + +Após aplicar a faceta, o *OpenRefine* propõe aglomerar as escolhas da faceta com base em vários métodos de similaridade. Tal como a Figura 2 demonstra, o agrupamento permite ao usuário resolver problemas relacionados com inconsistências, o uso incoerente tanto da forma singular como plural e erros de ortografia simples. O *OpenRefine* apresenta os valores relacionados e propõe uma fusão resultante no valor mais recorrente. Deverá selecionar a opção '**Agrupar**' para abrir o comando de uniformização dos termos, em seguida, escolha os valores que deseja agrupar ao selecionar as caixas individualmente ou ao clicar '**Marcar todos**' na parte inferior e, por fim, '**Unir selecionados e Re-agrupar**'. + +{% include figure.html filename="tr-pt-cleaning-data-with-openrefine-2.png" alt="Interface do *OpenRefine* referente ao agrupamento e edição da coluna 'Categories' em que é possível observar os métodos de agrupamento e ainda as diferenças detetadas nesta coluna" caption="Figura 2: Visão geral de alguns agrupamentos" %} + +O método padrão de aglomeramento não é muito complexo, portanto ainda não encontra todos os aglomerados. Experimente com diferentes métodos para ver quais são os resultados que estes produzem. Deverá ter cuidado: alguns métodos podem ser muito agressivos e alguns valores, que não deverão estar juntos, podem acabar agrupados. Agora que os valores foram agrupados individualmente, podemos colocá-los de volta numa única célula. Clique no triângulo das *categories* e escolha **Editar células**, **Unir células com múltiplos valores**, escolha a barra vertical ('\|') como separador, **OK**. As linhas têm agora a mesma aparência que tinham antes, com um campo de categorias com vários valores. + +### Aplicação de transformações *ad-hoc* através do uso de expressões GREL + +Relembre-se que existiu um aumento no número de registos depois do processo de divisão: nove registos apareceram do nada. Para encontrar a causa desta disparidade, precisamos de voltar atrás, antes da divisão das categorias em linhas separadas. Para fazer isso, altere o separador 'Desfazer / Refazer' à direita do separador 'Faceta / Filtro' e vai obter um histórico de todas as ações que executou desde que o projeto foi criado. Selecione o passo antes de '*Split multi-valued cells in column Categories*' (Dividir células com vários valores na coluna Categorias) (se seguiu o nosso exemplo deverá ser '*Remove 84 rows*' (Remover 84 linhas)). Depois volte para o separador 'Faceta / Filtro'. + +O problema surgiu durante a operação de divisão no caractere de barra vertical, portanto há uma grande probabilidade do que correu mal estar relacionado com esse caractere. Vamos aplicar um filtro na coluna Categorias ao selecionar '**Filtro de texto**' no menu. Primeiro, digite um único `|` no campo da esquerda: o *OpenRefine* deverá informá-lo que existem 71,064 registos correspondentes (i.e. registos que contenham uma barra vertical) num total de 75,727. Células que não contenham a barra vertical podem ser células em branco ou células apenas com uma categoria, não tendo assim um separador. Tal como o registo 29 que apenas tem '*Scientific instruments*' (Instrumentos científicos). + +Agora insira um segundo '\|' depois do primeiro para obter '\|\|' (dupla barra vertical): poderá observar que existem 9 registos que correspondem a este padrão. Estes são, provavelmente, os 9 registos culpados pela nossa discrepância: quando o *OpenRefine* divide os registos, a dupla barra vertical é interpretada como uma quebra entre dois registos em vez de um separador duplo sem sentido. Agora, como é que corrigimos estes valores? Vá ao menu do campo das categorias e escolha '**Editar células**' \> '**Transformar**…. Bem-vindo à interface de transformação de texto personalizada, uma funcionalidade poderosa do *OpenRefine* que usa a *Google Refine Expression Language* (GREL). + +A palavra '*value*' (valor) no campo de texto representa o valor atual de cada célula, valor esse visível em baixo. Podemos modificar este valor ao aplicar-lhe funções (ver a *[GREL documentation](https://perma.cc/A228-FFBE)* (documentação da GREL, em inglês) para uma lista completa). Neste caso, queremos substituir a dupla barra vertical por uma única barra. Isto pode ser realizado ao inserir a seguinte expressão GREL (certifique-se que não se esquece das aspas): + +``` +value.replace('||', '|') +``` + +Em baixo do campo de texto 'Expressão', terá uma pré-visualização dos valores modificados com as duplas barras verticais removidas. Clique em **OK** e tente dividir as categorias de novo com a opção '**Editar células**' \> '**Dividir células com múltiplos valores...**'. O número de registos ficará agora nos 75,727 (clique no link '**entradas**' para verificar). + +\* \* \*\ +Outro problema que pode ser resolvido com a ajuda da GREL é o dos registos para os quais a mesma categoria é listada duas vezes. Observe o registo 41 por exemplo, cujas categorias são '*Models|Botanical specimens|Botanical Specimens|Didactic Displays|Models*' (Modelos|Espécimes botânicos|Espécimes Botânicos|Expositores Didáticos|Modelos). A categoria '*Models*' aparece duas vezes sem nenhuma razão aparente, pelo que vamos querer remover esta duplicação. Clique no triângulo da coluna das '*Categories*' e escolha 'Editar células', 'Unir células com múltiplos valores', OK. Escolha a barra vertical como separador. Agora as categorias estão listadas como antes. Em seguida selecione '**Editar células**' \> '**Transformar**', também na coluna das categorias. Ao usar a GREL podemos dividir sucessivamente as categorias na barra vertical, procurar categorias únicas e juntá-las de novo. Para isso, basta digitar a seguinte expressão: + + +``` +value.split('|').uniques().join('|') +``` + +Ao fazê-lo irá reparar que 33.006 células foram afetadas, mais de metade da coleção. + +### Exportação dos seus dados limpos + +Desde que carregou os seus dados no *OpenRefine*, todas as operações de limpeza foram executadas na memória do software, deixando os dados originais intocados. Se desejar salvar os dados que limpou, terá de os exportar ao clicar no menu '**Exportar**' no canto superior direito do ecrã. O *OpenRefine* suporta uma larga variedade de formatos, tais como [CSV](https://perma.cc/SVC7-TH2C) (em inglês), HTML ou Excel: selecione o que melhor se adapta a si e acrescente o seu próprio modelo de exportação ao clicar 'Criando modelo'. Poderá também exportar o seu projeto num formato interno do *OpenRefine* de modo a partilhá-lo com os outros. + +### Construção sob os dados limpos + +Depois de limpar os seus dados, poderá dar o próximo passo e explorar outros recursos interessantes do *OpenRefine*. A comunidade de utilizadores do *OpenRefine* desenvolveu duas interessantes extensões que permitem ligar os seus dados a dados que já foram publicados na web. A *[RDF Transform extension](https://perma.cc/9RTF-S6LT)* (em inglês) transforma palavras-chave de texto simples em URLs. A [NER extension](https://perma.cc/SM98-U7GG) (em inglês) permite ao usuário aplicar a *named-entity recognition* (NER) que identifica palavras chave em texto corrido e atribui-lhes um URL. + +## Conclusões + +Se apenas se lembrar de uma coisa desta lição, deverá ser o seguinte: *Todos os dados são sujos, mas poderá fazer algo quanto a isso*. Tal como mostrámos aqui, já existe muito que pode ser feito para aumentar significativamente a qualidade dos dados. Em primeiro lugar, aprendemos como é que podemos ter uma visão geral e rápida de quantos valores vazios existem no nosso data set e com que frequência é que um valor particular (e.g. uma palavra-chave) é usada ao longo da coleção. Esta lição também demonstra como resolver problemas recorrentes, tais como duplicações e inconsistências ortográficas de maneira automática com a ajuda do *OpenRefine*. Não hesite em experimentar as ferramentas de limpeza enquanto executa estas etapas numa cópia dos seus data sets, já que o *OpenRefine* permite-lhe rastrear e refazer todos os passos caso tenha cometido um erro. diff --git a/pt/licoes/manipulacao-transformacao-dados-r.md b/pt/licoes/manipulacao-transformacao-dados-r.md index dbd149d8e4..bdb00c5f3b 100644 --- a/pt/licoes/manipulacao-transformacao-dados-r.md +++ b/pt/licoes/manipulacao-transformacao-dados-r.md @@ -1,422 +1,422 @@ ---- -title: Manipulação e transformação de dados com R -slug: manipulacao-transformacao-dados-r -layout: lesson -collection: lessons -date: 2017-08-01 -translation_date: 2022-11-26 -authors: -- Nabeel Siddiqui -editors: -- Ian Milligan -reviewers: -- Lauren Tilton -- Ryan Deschamps -translator: -- Ian Araujo -translation-editor: -- Jimmy Medeiros -translation-reviewer: -- Suemi Higuchi -- Joana Paulino -difficulty: 2 -review-ticket: https://github.com/programminghistorian/ph-submissions/issues/397 -activity: transforming -topics: [data-manipulation, data-management, distant-reading, r, data-visualization] -abstract: "Esta lição explora como os investigadores podem tornar seus dados organizados, entender os pacotes do R para manipulação de dados e conduzir análises de dados básicas usando esta linguagem." -original: data-wrangling-and-management-in-r -avatar_alt: Barra de sabão -doi: 10.46430/phpt0035 ---- - -{% include toc.html %} - -## Requisitos -Nesta lição consideramos que já possui algum conhecimento da linguagem R. Se ainda não completou a lição [Noções básicas de R com dados tabulares](/pt/licoes/nocoes-basicas-R-dados-tabulares), recomendamos que o faça primeiro. Ter experiência com outras linguagens de programação também pode ser benéfico. Se está buscando por onde começar aprendendo outras linguagens, recomendamos os excelentes tutoriais de Python do *Programming Historian*. - -Nota da tradução: o conteúdo da programação utilizado na lição original foi alterado para esta versão em português para que o contexto e os exemplos sejam próximos da realidade da comunidade que fala o idioma. Por conta disso, parte do texto da lição traduzida, bem como os exemplos e as interpretações dos dados são diferente da lição original. No entanto, o conteúdo e a estrutura da lição são fidedignos à lição original, como os tipos de dados e as análises desenvolvidas. Mudamos, por exemplo, a comparação entre Mississipi e Virgínia por Brasil e Argentina, mantendo os recursos e procedimentos realizados por Nabeel Siddiqui. - -## Objetivos da lição -Ao fim desta lição, você: - -1. Saberá como tornar seus dados bem ordenados (*tidy*) e entenderá por que isso é importante. -2. Terá assimilado o uso do pacote [dplyr](https://cran.r-project.org/web/packages/dplyr/index.html) (em inglês) e sua aplicação na manipulação e controle de dados. -3. Estará familiarizado com o operador *pipe* `%>%` na linguagem R e verá como ele pode auxiliar na criação de códigos mais legíveis. -4. Terá ganho experiência com análise exploratória de dados através de exemplos básicos de manipulação de dados. - -## Introdução -Os dados que encontra disponíveis nas diversas plataformas raramente estão no formato adequado para serem analisados, e precisará manipulá-los antes de explorar as perguntas de seu interesse. Isso pode tomar mais tempo que a própria análise dos dados! Neste tutorial, vamos aprender técnicas básicas para manipulação, gestão e controle de dados usando R. Especificamente, nos debruçaremos sobre a filosofia do ["*tidy data*"](https://www.jstatsoft.org/article/view/v059i10) (em inglês) conforme apresentada por Hadley Wickham. - -De acordo com [Wickham](http://hadley.nz/) (em inglês), os dados estão *tidy* ou bem-organizados quando satisfazem três critérios chave: - -1. Cada unidade de observação está em uma linha -2. Cada variável está em uma coluna -3. Cada valor possui a sua própria célula. - -Estar atento a estes critérios nos permite reconhecer quando os nossos dados estão adequados ou não. Também nos fornece um esquema padrão e um conjunto de soluções para lidar com alguns dos problemas mais comuns encontrados em *datasets* "mal-arranjados", como por exemplo: - -1. Nomes de colunas como valores ao invés de nomes de variáveis -2. Múltiplas variáveis contidas em uma única coluna -3. Variáveis armazenadas tanto em linhas quanto em colunas -4. Unidades de observação de diferentes categorias armazenadas na mesma tabela -5. Uma única unidade de observação armazenada em múltiplas tabelas. - -Talvez o mais importante seja que manter os dados nesse formato nos permite utilizar uma série de pacotes do ["tidyverse,"](http://tidyverse.org/) (em inglês), concebidos para trabalhar especificamente com dados neste formato *tidy*. Dessa forma, assegurando-nos de que os dados de entrada e de saída estão bem organizados, precisaremos apenas de um pequeno conjunto de ferramentas para resolver um grande número de questões. Podemos combinar, manipular e dividir os *datasets* que criamos, conforme considerarmos mais adequado. - -Neste tutorial focaremos no pacote [dplyr](https://cran.r-project.org/web/packages/dplyr/index.html) (em inglês) presente no tidyverse, mas também é importante mencionar alguns outros que serão vistos na lição: - -* [**magittr**](http://magrittr.tidyverse.org) (em inglês) -- Este pacote nos garante acesso ao operador *pipe* `%>%`, que torna o nosso código mais legível. -* [**ggplot2**](http://ggplot2.tidyverse.org/) (em inglês) -- Este pacote utiliza a ["Gramática de Gráficos"](http://www.springer.com/us/book/9780387245447) (em inglês) para fornecer uma forma fácil de visualizar nossos dados. -* [**tibble**](http://tibble.tidyverse.org/) (em inglês) -- Este pacote nos fornece uma releitura dos tradicionais *data frames*, mais fáceis de serem trabalhados e visualizados. - -Instale o "tidyverse", se ainda não o fez, e carregue-o antes de começarmos. Além disso, certifique-se de que possui instaladas a -[versão mais recente do R](https://cran.rstudio.com/) e a [versão mais recente do RStudio](https://www.rstudio.com/products/rstudio/download/) compatíveis com o seu sistema operacional. - -Copie o código a seguir para o seu RStudio. Para executá-lo, precisa selecionar as linhas e pressionar Ctrl+Enter (Command+Enter no Mac OS): - - # Instala e carrega a biblioteca tidyverse - # Não se preocupe caso demore um pouco - - install.packages("tidyverse") - library(tidyverse) - - -## Um exemplo do dplyr em ação -Vejamos um exemplo de como o dplyr pode auxiliar historiadores. Vamos utilizar o pacote "dados" [^1] e importar alguns indicadores socioeconômicos de países entre 1952 e 2007. - -O pacote "remotes" permite a instalação de pacotes R a partir de repositórios remotos, incluindo o GitHub, como é o caso de "dados". - - # Instala e carrega as bibliotecas "remotes" e "dados" - - install.packages("remotes") - library(remotes) - - remotes::install_github("cienciadedatos/dados") - library(dados) - -Em seguida, para termos acesso ao *dataset* "dados_gapminder", que se encontra no pacote "dados", basta executar o seguinte código: - - # Cria o objeto dados_socioeconomicos_paises e atribui a ele os elementos de dados_gapminder - - dados_socioeconomicos_paises <- dados_gapminder - -Os dados do [Gapminder](https://www.gapminder.org/) (em inglês) contêm o progresso de países ao longo do tempo, observando as estatísticas de alguns índices. Após importar o *dataset*, notará que ele possui seis variáveis: país, continente, ano, expectativa de vida, população e PIB *per capita*. Os dados já estão em formato *tidy*, possibilitando uma infinidade de opções para exploração futura. - -Neste exemplo, vamos visualizar o crescimento populacional de Brasil e Argentina ao longo dos anos. Para isso utilizaremos o pacote dplyr a fim de filtrar os dados que contenham apenas informações dos países de nosso interesse. Em seguida, utilizaremos o ggplot2 para visualizar tais dados. Este exercício é apenas uma breve demonstração do que é possível fazer com o dplyr, portanto, não se preocupe se não entender o código por enquanto. - - # Filtra os países desejados (Brasil e Argentina) - - dados_brasil_argentina <- dados_socioeconomicos_paises %>% - filter(pais %in% c("Brasil", "Argentina")) - - # Visualiza a população dos dois países - - ggplot(data = dados_brasil_argentina, aes(x = ano, y = populacao, color = pais)) + - geom_line() + - geom_point() - -{% include figure.html filename="pt-tr-manipulacao-transformacao-dados-r-01.png" alt="Imagem com a representação de um gráfico de linhas com dados da população por anos para o Brasil e a Argentina" caption="Gráfico da população de Brasil e Argentina, ao longo dos anos" %} - -Como podemos observar, a população absoluta do Brasil é consideravelmente maior em comparação com a população da Argentina. Embora isso pareça óbvio devido ao tamanho do território brasileiro, o código nos fornece uma base sobre a qual podemos formular uma infinidade de questões similares. Por exemplo, com uma pequena mudança no código podemos criar um gráfico similar com dois países diferentes, como Portugal e Bélgica. - - # Filtra os países desejados (Portugal e Bélgica) - - dados_portugal_belgica <- dados_socioeconomicos_paises %>% - filter(pais %in% c("Portugal", "Bélgica")) - - # Visualiza a população dos dois países - - ggplot(data = dados_portugal_belgica, aes(x = ano, y = populacao, color = pais)) + - geom_line() + - geom_point() - -{% include figure.html filename="pt-tr-manipulacao-transformacao-dados-r-02.png" alt="Imagem com a representação de um gráfico de linhas com dados da população por anos para a Bélgica e Portugal" caption="Gráfico da população de Portugal e Bégica, ao longo dos anos" %} - -Promover mudanças rápidas no código e revisar nossos dados é parte fundamental do processo de análise exploratória de dados (AED). Ao invés de tentar "provar" uma hipótese, a análise exploratória nos ajuda a entender melhor os dados e a levantar questões sobre eles. Para os historiadores, a AED fornece uma forma fácil de saber quando aprofundar mais em um tema e quando voltar atrás, e esta é uma área onde o R se sobressai. - -## Operador Pipe - -Antes de olharmos para o dplyr, precisamos entender o que é o operador *pipe* `%>%` no R, uma vez que iremos utilizá-lo em muitos exemplos adiante. Como mencionado anteriormente, este operador é parte do pacote [magrittr](https://cran.r-project.org/web/packages/magrittr/vignettes/magrittr.html) (em inglês), criado por [Stefan Milton Bache](http://stefanbache.dk/) e [Hadley Wickham](http://hadley.nz/), e está incluída no tidyverse. O seu nome é uma referência ao pintor surrealista Rene Magritte, criador da obra ["A Traição das Imagens"](https://www.renemagritte.org/the-treachery-of-images.jsp), que mostra um cachimbo com a frase "isto não é um cachimbo" (*ceci n'est pas une pipe*, em francês). - -O operador *pipe* `%>%` permite passar o que está à sua esquerda como a primeira variável em uma função especificada à sua direita. Embora possa parecer estranho no início, uma vez que aprende a usar o *pipe* descobrirá que ele torna seu código mais legível, evitando instruções aninhadas. Não se preocupe se estiver um pouco confuso por agora. Tudo ficará mais claro à medida que observarmos os exemplos. - -Vamos dizer que estamos interessados em obter a raiz quadrada de cada população e, então, somar todas as raízes antes de calcular a média. Obviamente, essa não é uma medição útil, mas demonstra a rapidez com que o código do R pode se tornar difícil de ler. Normalmente, usaríamos declarações aninhadas: - - mean(sum(sqrt(dados_socioeconomicos_paises$populacao))) - - ## [1] 6328339 - -Veja que com tantos comandos aninhados fica difícil lembrar quantos parênteses são necessários no final da linha, tornando o código complicado de ler. Para atenuar esse problema, algumas pessoas criam vetores temporários entre cada chamada de função. - - # Obtém a raiz quadrada da população de todos os países - - vetor_raiz_populacao <- sqrt(dados_socioeconomicos_paises$populacao) - - # Obtém a soma de todas as raízes da variável temporária - - soma_vetor_raizes_populacao <- sum(vetor_raiz_populacao) - - # Obtém a média da variável temporária - - media_soma_vetor_raizes_populacao <- mean(soma_vetor_raizes_populacao) - - # Exibe a média - - media_soma_vetor_raizes_populacao - - ## [1] 6328339 - -Embora obtenha o mesmo resultado, este código é muito mais legível. No entanto, se esquecer de excluir os vetores temporários, seu espaço de trabalho pode se tornar confuso. O operador *pipe* faz esse trabalho por você. Aqui está o mesmo código usando o operador *pipe*: - - dados_socioeconomicos_paises$populacao %>% sqrt %>% sum %>% mean - - ## [1] 6328339 - -Este código é mais fácil de ler que os anteriores e pode torná-lo ainda mais limpo escrevendo em linhas diferentes. - - # Certifique-se de colocar o operador no final da linha - - dados_socioeconomicos_paises$populacao %>% - sqrt %>% - sum %>% - mean - - ## [1] 6328339 - -Note que os vetores ou *data frames* criados pelo operador pipe são descartados quando se completa a operação. Se quiser salvar o resultado da operação, será preciso atribuí-lo a uma nova variável: - - vetor_permanente_media_soma_populacao <- dados_socioeconomicos_paises$populacao %>% - sqrt %>% - sum %>% - mean - - vetor_permanente_media_soma_populacao - - ## [1] 6328339 - -Agora que adquirimos uma compreensão do operador *pipe*, estamos prontos para começar a analisar e manipular alguns dados. Ao longo da lição vamos continuar trabalhando com o *dataset* dados_gapminder: - - # Certifique-se de que o pacote "dados" está instalado e carregado aantes de proceder conforme abaixo - - dados_gapminder - - ## # A tibble: 1,704 x 6 - ## pais continente ano expectativa_de_vida populacao pib_per_capita - ## - ## 1 Afeganistão Ásia 1952 28.8 8425333 779. - ## 2 Afeganistão Ásia 1957 30.3 9240934 821. - ## 3 Afeganistão Ásia 1962 32.0 10267083 853. - ## 4 Afeganistão Ásia 1967 34.0 11537966 836. - ## 5 Afeganistão Ásia 1972 36.1 13079460 740. - ## 6 Afeganistão Ásia 1977 38.4 14880372 786. - ## 7 Afeganistão Ásia 1982 39.9 12881816 978. - ## 8 Afeganistão Ásia 1987 40.8 13867957 852. - ## 9 Afeganistão Ásia 1992 41.7 16317921 649. - ## 10 Afeganistão Ásia 1997 41.8 22227415 635. - ## # … with 1,694 more rows - -Como pode observar, este *dataset* contém o nome do país, seu continente e o ano de registro, além dos indicadores de expectativa de vida, total da população e PIB *per capita*, em determinados anos. Conforme mencionamos acima, antes de analisar os dados é importante verificar se estes estão bem ordenados no formato *tidy*. Relembrando os três critérios discutidos, podemos dizer que sim, o *dataset* encontra-se organizado e pronto para ser trabalhado com o pacote dplyr. - -## O que é dplyr? -[Dplyr](https://cran.r-project.org/web/packages/dplyr/vignettes/dplyr.html) (em inglês) também é parte do tidyverse, fornecendo funções para manipulação e transformação dos dados. Porque estamos mantendo nossos dados bem organizados, precisaremos apenas de um pequeno conjunto de ferramentas para explorá-los. Em comparação com o pacote básico do R, usando o dplyr em nosso código, fica geralmente mais rápido e há a garantia de que os dados resultantes (*output*) estarão bem ordenados uma vez que os dados de entrada (*input*) também estarão. Talvez o mais importante seja que o dplyr torna o nosso código mais fácil de ser lido e utiliza "verbos" que são, na maioria das vezes, intuitivos. Cada função do dplyr corresponde a um desses verbos, sendo cinco principais: filtrar (`filter`), selecionar (`select`), ordenar (`arrange`), modificar (`mutate`) e sumarizar (`summarise`). Vamos observar individualmente como cada uma dessas funções funciona na prática. - -### Selecionar (select) - -Se olharmos para o *dataset* dados_gapminder, vamos observar a presença de seis colunas, cada uma contendo diferentes informações. Podemos escolher, para a nossa análise, visualizar apenas algumas dessas colunas. A função `select()` do dplyr nos permite fazer isso. O primeiro argumento da função é o *data frame* que desejamos manipular e os seguintes são os nomes das colunas que queremos manter: - - # Remove as colunas de dados_gapminder usando select() - # Note que não é necessário acrescentar o nome da coluna com o símbolo $ (dólar) ao final de dados_gapminder visto que o dplyr automaticamente assume que "," (vírgula) representa E (AND em inglês) - - select(dados_gapminder, pais, ano, expectativa_de_vida) - - ## # A tibble: 1,704 x 3 - ## pais ano expectativa_de_vida - ## - ## 1 Afeganistão 1952 28.8 - ## 2 Afeganistão 1957 30.3 - ## 3 Afeganistão 1962 32.0 - ## 4 Afeganistão 1967 34.0 - ## 5 Afeganistão 1972 36.1 - ## 6 Afeganistão 1977 38.4 - ## 7 Afeganistão 1982 39.9 - ## 8 Afeganistão 1987 40.8 - ## 9 Afeganistão 1992 41.7 - ## 10 Afeganistão 1997 41.8 - ## # … with 1,694 more rows - -Vejamos como escrever o mesmo código utilizando o operador *pipe* `%>%`: - - dados_gapminder %>% - select(pais, ano, expectativa_de_vida) - - ## # A tibble: 1,704 x 3 - ## pais ano expectativa_de_vida - ## - ## 1 Afeganistão 1952 28.8 - ## 2 Afeganistão 1957 30.3 - ## 3 Afeganistão 1962 32.0 - ## 4 Afeganistão 1967 34.0 - ## 5 Afeganistão 1972 36.1 - ## 6 Afeganistão 1977 38.4 - ## 7 Afeganistão 1982 39.9 - ## 8 Afeganistão 1987 40.8 - ## 9 Afeganistão 1992 41.7 - ## 10 Afeganistão 1997 41.8 - ## # … with 1,694 more rows - -Fazer referência a cada uma das colunas que desejamos manter apenas para nos livrar de uma é um tanto tedioso. Podemos usar o símbolo de menos (-) para demonstrar que queremos remover uma coluna. - - dados_gapminder %>% - select(-continente) - - ## # A tibble: 1,704 x 5 - ## pais ano expectativa_de_vida populacao pib_per_capita - ## - ## 1 Afeganistão 1952 28.8 8425333 779. - ## 2 Afeganistão 1957 30.3 9240934 821. - ## 3 Afeganistão 1962 32.0 10267083 853. - ## 4 Afeganistão 1967 34.0 11537966 836. - ## 5 Afeganistão 1972 36.1 13079460 740. - ## 6 Afeganistão 1977 38.4 14880372 786. - ## 7 Afeganistão 1982 39.9 12881816 978. - ## 8 Afeganistão 1987 40.8 13867957 852. - ## 9 Afeganistão 1992 41.7 16317921 649. - ## 10 Afeganistão 1997 41.8 22227415 635. - ## # … with 1,694 more rows - -### Filtrar (filter) - -A função `filter()` faz o mesmo que a função select, mas ao invés de escolher o nome da coluna, podemos usá-lo para filtrar linhas usando um teste de requisito. Por exemplo, se quisermos selecionar somente os registros dos países em 2007: - - dados_gapminder %>% - filter(ano == 2007) - - ## # A tibble: 142 x 6 - ## pais continente ano expectativa_de_vida populacao pib_per_capita - ## - ## 1 Afeganistão Ásia 2007 43.8 31889923 975. - ## 2 Albânia Europa 2007 76.4 3600523 5937. - ## 3 Argélia África 2007 72.3 33333216 6223. - ## 4 Angola África 2007 42.7 12420476 4797. - ## 5 Argentina Américas 2007 75.3 40301927 12779. - ## 6 Austrália Oceania 2007 81.2 20434176 34435. - ## 7 Áustria Europa 2007 79.8 8199783 36126. - ## 8 Bahrein Ásia 2007 75.6 708573 29796. - ## 9 Bangladesh Ásia 2007 64.1 150448339 1391. - ## 10 Bélgica Europa 2007 79.4 10392226 33693. - ## # … with 132 more rows - -### Modificar (mutate) - -A função `mutate()` permite adicionar uma coluna ao seu *dataset*. No momento, temos país e continente em duas colunas separadas. Podemos utilizar a função `paste()` para combinar as duas informações e especificar um separador. Vamos colocá-las em uma única coluna chamada "localizacao". - - dados_gapminder %>% - mutate(localizacao = paste(pais, continente, sep = ", ")) - - ## # A tibble: 1,704 x 7 - ## pais continente ano expectativa_de_vida populacao pib_per_capita localizacao - ## - ## 1 Afeganistão Ásia 1952 28.8 8425333 779. Afeganistão, Ásia - ## 2 Afeganistão Ásia 1957 30.3 9240934 821. Afeganistão, Ásia - ## 3 Afeganistão Ásia 1962 32.0 10267083 853. Afeganistão, Ásia - ## 4 Afeganistão Ásia 1967 34.0 11537966 836. Afeganistão, Ásia - ## 5 Afeganistão Ásia 1972 36.1 13079460 740. Afeganistão, Ásia - ## 6 Afeganistão Ásia 1977 38.4 14880372 786. Afeganistão, Ásia - ## 7 Afeganistão Ásia 1982 39.9 12881816 978. Afeganistão, Ásia - ## 8 Afeganistão Ásia 1987 40.8 13867957 852. Afeganistão, Ásia - ## 9 Afeganistão Ásia 1992 41.7 16317921 649. Afeganistão, Ásia - ## 10 Afeganistão Ásia 1997 41.8 22227415 635. Afeganistão, Ásia - ## # … with 1,694 more rows - -Novamente, é preciso lembrar que o dplyr não salva os dados, nem transforma o original. Em vez disso, ele cria um *data frame* temporário em cada etapa. Se deseja manter os dados, é necessário criar uma variável permanente. - - dados_gapminder_localizacao <- dados_gapminder %>% - mutate(localizacao = paste(pais, continente, sep = ", ")) - - # Visualiza a nova tabela criada com a localização adicionada - - dados_gapminder_localizacao - - ## # A tibble: 1,704 x 7 - ## pais continente ano expectativa_de_vida populacao pib_per_capita localizacao - ## - ## 1 Afeganistão Ásia 1952 28.8 8425333 779. Afeganistão, Ásia - ## 2 Afeganistão Ásia 1957 30.3 9240934 821. Afeganistão, Ásia - ## 3 Afeganistão Ásia 1962 32.0 10267083 853. Afeganistão, Ásia - ## 4 Afeganistão Ásia 1967 34.0 11537966 836. Afeganistão, Ásia - ## 5 Afeganistão Ásia 1972 36.1 13079460 740. Afeganistão, Ásia - ## 6 Afeganistão Ásia 1977 38.4 14880372 786. Afeganistão, Ásia - ## 7 Afeganistão Ásia 1982 39.9 12881816 978. Afeganistão, Ásia - ## 8 Afeganistão Ásia 1987 40.8 13867957 852. Afeganistão, Ásia - ## 9 Afeganistão Ásia 1992 41.7 16317921 649. Afeganistão, Ásia - ## 10 Afeganistão Ásia 1997 41.8 22227415 635. Afeganistão, Ásia - ## # … with 1,694 more rows - -### Ordenar (arrange) - -A função `arrange()` nos permite ordenar as colunas de novas formas. Atualmente, o nosso conjunto de dados está organizado em ordem alfabética pelo nome do país. Vamos ordená-lo em ordem decrescente de acordo com o total da população. - - dados_gapminder %>% - arrange(desc(populacao)) - - ## # A tibble: 1,704 x 6 - ## pais continente ano expectativa_de_vida populacao pib_per_capita - ## - ## 1 China Ásia 2007 73.0 1318683096 4959. - ## 2 China Ásia 2002 72.0 1280400000 3119. - ## 3 China Ásia 1997 70.4 1230075000 2289. - ## 4 China Ásia 1992 68.7 1164970000 1656. - ## 5 Índia Ásia 2007 64.7 1110396331 2452. - ## 6 China Ásia 1987 67.3 1084035000 1379. - ## 7 Índia Ásia 2002 62.9 1034172547 1747. - ## 8 China Ásia 1982 65.5 1000281000 962. - ## 9 Índia Ásia 1997 61.8 959000000 1459. - ## 10 China Ásia 1977 64.0 943455000 741. - ## # … with 1,694 more rows - -### Sumarizar (summarise) - -A última função do dplyr que veremos é a `summarise()`, usada geralmente para criar uma tabela contendo dados estatísticos resumidos que podemos plotar. Vamos utilizar a função `summarise()` para calcular a média da expectativa de vida nos países, considerando todo o conjunto dados_gapminder. - - dados_gapminder %>% - summarise(mean(expectativa_de_vida)) - - ## # A tibble: 1 x 1 - ## `mean(expectativa_de_vida)` - ## - ## 1 59.5 - -## Juntando tudo - -Agora, após termos visto os cinco principais verbos do dplyr, podemos criar rapidamente uma visualização dos nossos dados. Vamos criar um gráfico de barras mostrando o número de países com expectativa de vida maior que 50 anos, em 2007. - - expectativa_vida_2007 <- dados_gapminder %>% - filter(ano == 2007) %>% - mutate(expectativa_2007 = ifelse(expectativa_de_vida >= 50, "Maior ou igual a 50 anos", "Menor que 50 anos")) - - ggplot(expectativa_vida_2007) + - geom_bar(aes(x = expectativa_2007, fill = expectativa_2007)) + - labs(x = "A expectativa de vida é maior que 50 anos?") - -{% include figure.html filename="pt-tr-manipulacao-transformacao-dados-r-03.png" alt="Imagem com a representação de um gráfico de barras com dados sobre o número de países onde a expectativa de vida é maior ou menor que 50 anos, em 2007" caption="Expectativa de vida nos países em 2007" %} - -Novamente, fazendo uma pequena mudança no nosso código, podemos ver também o número de países com expectativa de vida maior que 50 anos, em 1952. - - expectativa_vida_1952 <- dados_gapminder %>% - filter(ano == 1952) %>% - mutate(expectativa_1952 = ifelse(expectativa_de_vida >= 50, "Maior ou igual a 50 anos", "Menor que 50 anos")) - - ggplot(expectativa_vida_1952) + - geom_bar(aes(x = expectativa_1952, fill = expectativa_1952)) + - labs(x = "A expectativa de vida é maior que 50 anos?") - -({% include figure.html filename="pt-tr-manipulacao-transformacao-dados-r-04.png" alt="Imagem com a representação de um gráfico de barras com dados sobre o número de países onde a expectativa de vida é maior ou menor que 50 anos, em 1952" caption="Expectativa de vida nos países em 1952" %} - -## Conclusão - -Este tutorial deve encaminhar seus conhecimentos para pensar sobre como organizar e manipular dados usando R. Posteriormente, provavelmente vai querer visualizar esses dados de alguma forma, usando gráficos, como fizemos em partes desta lição. Recomendamos que comece a estudar o [ggplot2](http://www.ggplot2.org) (em inglês), pacote com uma coleção de ferramentas que funcionam bem em conjunto com o dplyr. Além disso, você deve buscar conhecer as outras funções do pacote dplyr que não vimos aqui, para aprimorar suas habilidades de manipulação de dados. Por enquanto, esta lição deve proporcionar um bom ponto de partida, cobrindo muitos dos principais problemas que poderá encontrar. - - -### Notas - -[^1]: O pacote "dados" disponibiliza a tradução de conjuntos de dados originalmente em inglês encontrados em outros pacotes de R. Está disponível em https://github.com/cienciadedatos/dados +--- +title: Manipulação e transformação de dados com R +slug: manipulacao-transformacao-dados-r +layout: lesson +collection: lessons +date: 2017-08-01 +translation_date: 2022-11-26 +authors: +- Nabeel Siddiqui +editors: +- Ian Milligan +reviewers: +- Lauren Tilton +- Ryan Deschamps +translator: +- Ian Araujo +translation-editor: +- Jimmy Medeiros +translation-reviewer: +- Suemi Higuchi +- Joana Paulino +difficulty: 2 +review-ticket: https://github.com/programminghistorian/ph-submissions/issues/397 +activity: transforming +topics: [data-manipulation, data-management, distant-reading, r, data-visualization] +abstract: "Esta lição explora como os investigadores podem tornar seus dados organizados, entender os pacotes do R para manipulação de dados e conduzir análises de dados básicas usando esta linguagem." +original: data-wrangling-and-management-in-r +avatar_alt: Barra de sabão +doi: 10.46430/phpt0035 +--- + +{% include toc.html %} + +## Requisitos +Nesta lição consideramos que já possui algum conhecimento da linguagem R. Se ainda não completou a lição [Noções básicas de R com dados tabulares](/pt/licoes/nocoes-basicas-R-dados-tabulares), recomendamos que o faça primeiro. Ter experiência com outras linguagens de programação também pode ser benéfico. Se está buscando por onde começar aprendendo outras linguagens, recomendamos os excelentes tutoriais de Python do *Programming Historian*. + +Nota da tradução: o conteúdo da programação utilizado na lição original foi alterado para esta versão em português para que o contexto e os exemplos sejam próximos da realidade da comunidade que fala o idioma. Por conta disso, parte do texto da lição traduzida, bem como os exemplos e as interpretações dos dados são diferente da lição original. No entanto, o conteúdo e a estrutura da lição são fidedignos à lição original, como os tipos de dados e as análises desenvolvidas. Mudamos, por exemplo, a comparação entre Mississipi e Virgínia por Brasil e Argentina, mantendo os recursos e procedimentos realizados por Nabeel Siddiqui. + +## Objetivos da lição +Ao fim desta lição, você: + +1. Saberá como tornar seus dados bem ordenados (*tidy*) e entenderá por que isso é importante. +2. Terá assimilado o uso do pacote [dplyr](https://cran.r-project.org/web/packages/dplyr/index.html) (em inglês) e sua aplicação na manipulação e controle de dados. +3. Estará familiarizado com o operador *pipe* `%>%` na linguagem R e verá como ele pode auxiliar na criação de códigos mais legíveis. +4. Terá ganho experiência com análise exploratória de dados através de exemplos básicos de manipulação de dados. + +## Introdução +Os dados que encontra disponíveis nas diversas plataformas raramente estão no formato adequado para serem analisados, e precisará manipulá-los antes de explorar as perguntas de seu interesse. Isso pode tomar mais tempo que a própria análise dos dados! Neste tutorial, vamos aprender técnicas básicas para manipulação, gestão e controle de dados usando R. Especificamente, nos debruçaremos sobre a filosofia do ["*tidy data*"](https://www.jstatsoft.org/article/view/v059i10) (em inglês) conforme apresentada por Hadley Wickham. + +De acordo com [Wickham](https://hadley.nz/) (em inglês), os dados estão *tidy* ou bem-organizados quando satisfazem três critérios chave: + +1. Cada unidade de observação está em uma linha +2. Cada variável está em uma coluna +3. Cada valor possui a sua própria célula. + +Estar atento a estes critérios nos permite reconhecer quando os nossos dados estão adequados ou não. Também nos fornece um esquema padrão e um conjunto de soluções para lidar com alguns dos problemas mais comuns encontrados em *datasets* "mal-arranjados", como por exemplo: + +1. Nomes de colunas como valores ao invés de nomes de variáveis +2. Múltiplas variáveis contidas em uma única coluna +3. Variáveis armazenadas tanto em linhas quanto em colunas +4. Unidades de observação de diferentes categorias armazenadas na mesma tabela +5. Uma única unidade de observação armazenada em múltiplas tabelas. + +Talvez o mais importante seja que manter os dados nesse formato nos permite utilizar uma série de pacotes do ["tidyverse,"](https://tidyverse.org/) (em inglês), concebidos para trabalhar especificamente com dados neste formato *tidy*. Dessa forma, assegurando-nos de que os dados de entrada e de saída estão bem organizados, precisaremos apenas de um pequeno conjunto de ferramentas para resolver um grande número de questões. Podemos combinar, manipular e dividir os *datasets* que criamos, conforme considerarmos mais adequado. + +Neste tutorial focaremos no pacote [dplyr](https://cran.r-project.org/web/packages/dplyr/index.html) (em inglês) presente no tidyverse, mas também é importante mencionar alguns outros que serão vistos na lição: + +* [**magittr**](https://magrittr.tidyverse.org) (em inglês) -- Este pacote nos garante acesso ao operador *pipe* `%>%`, que torna o nosso código mais legível. +* [**ggplot2**](https://ggplot2.tidyverse.org/) (em inglês) -- Este pacote utiliza a ["Gramática de Gráficos"](https://www.springer.com/us/book/9780387245447) (em inglês) para fornecer uma forma fácil de visualizar nossos dados. +* [**tibble**](https://tibble.tidyverse.org/) (em inglês) -- Este pacote nos fornece uma releitura dos tradicionais *data frames*, mais fáceis de serem trabalhados e visualizados. + +Instale o "tidyverse", se ainda não o fez, e carregue-o antes de começarmos. Além disso, certifique-se de que possui instaladas a +[versão mais recente do R](https://cran.rstudio.com/) e a [versão mais recente do RStudio](https://www.rstudio.com/products/rstudio/download/) compatíveis com o seu sistema operacional. + +Copie o código a seguir para o seu RStudio. Para executá-lo, precisa selecionar as linhas e pressionar Ctrl+Enter (Command+Enter no Mac OS): + + # Instala e carrega a biblioteca tidyverse + # Não se preocupe caso demore um pouco + + install.packages("tidyverse") + library(tidyverse) + + +## Um exemplo do dplyr em ação +Vejamos um exemplo de como o dplyr pode auxiliar historiadores. Vamos utilizar o pacote "dados" [^1] e importar alguns indicadores socioeconômicos de países entre 1952 e 2007. + +O pacote "remotes" permite a instalação de pacotes R a partir de repositórios remotos, incluindo o GitHub, como é o caso de "dados". + + # Instala e carrega as bibliotecas "remotes" e "dados" + + install.packages("remotes") + library(remotes) + + remotes::install_github("cienciadedatos/dados") + library(dados) + +Em seguida, para termos acesso ao *dataset* "dados_gapminder", que se encontra no pacote "dados", basta executar o seguinte código: + + # Cria o objeto dados_socioeconomicos_paises e atribui a ele os elementos de dados_gapminder + + dados_socioeconomicos_paises <- dados_gapminder + +Os dados do [Gapminder](https://www.gapminder.org/) (em inglês) contêm o progresso de países ao longo do tempo, observando as estatísticas de alguns índices. Após importar o *dataset*, notará que ele possui seis variáveis: país, continente, ano, expectativa de vida, população e PIB *per capita*. Os dados já estão em formato *tidy*, possibilitando uma infinidade de opções para exploração futura. + +Neste exemplo, vamos visualizar o crescimento populacional de Brasil e Argentina ao longo dos anos. Para isso utilizaremos o pacote dplyr a fim de filtrar os dados que contenham apenas informações dos países de nosso interesse. Em seguida, utilizaremos o ggplot2 para visualizar tais dados. Este exercício é apenas uma breve demonstração do que é possível fazer com o dplyr, portanto, não se preocupe se não entender o código por enquanto. + + # Filtra os países desejados (Brasil e Argentina) + + dados_brasil_argentina <- dados_socioeconomicos_paises %>% + filter(pais %in% c("Brasil", "Argentina")) + + # Visualiza a população dos dois países + + ggplot(data = dados_brasil_argentina, aes(x = ano, y = populacao, color = pais)) + + geom_line() + + geom_point() + +{% include figure.html filename="pt-tr-manipulacao-transformacao-dados-r-01.png" alt="Imagem com a representação de um gráfico de linhas com dados da população por anos para o Brasil e a Argentina" caption="Gráfico da população de Brasil e Argentina, ao longo dos anos" %} + +Como podemos observar, a população absoluta do Brasil é consideravelmente maior em comparação com a população da Argentina. Embora isso pareça óbvio devido ao tamanho do território brasileiro, o código nos fornece uma base sobre a qual podemos formular uma infinidade de questões similares. Por exemplo, com uma pequena mudança no código podemos criar um gráfico similar com dois países diferentes, como Portugal e Bélgica. + + # Filtra os países desejados (Portugal e Bélgica) + + dados_portugal_belgica <- dados_socioeconomicos_paises %>% + filter(pais %in% c("Portugal", "Bélgica")) + + # Visualiza a população dos dois países + + ggplot(data = dados_portugal_belgica, aes(x = ano, y = populacao, color = pais)) + + geom_line() + + geom_point() + +{% include figure.html filename="pt-tr-manipulacao-transformacao-dados-r-02.png" alt="Imagem com a representação de um gráfico de linhas com dados da população por anos para a Bélgica e Portugal" caption="Gráfico da população de Portugal e Bégica, ao longo dos anos" %} + +Promover mudanças rápidas no código e revisar nossos dados é parte fundamental do processo de análise exploratória de dados (AED). Ao invés de tentar "provar" uma hipótese, a análise exploratória nos ajuda a entender melhor os dados e a levantar questões sobre eles. Para os historiadores, a AED fornece uma forma fácil de saber quando aprofundar mais em um tema e quando voltar atrás, e esta é uma área onde o R se sobressai. + +## Operador Pipe + +Antes de olharmos para o dplyr, precisamos entender o que é o operador *pipe* `%>%` no R, uma vez que iremos utilizá-lo em muitos exemplos adiante. Como mencionado anteriormente, este operador é parte do pacote [magrittr](https://cran.r-project.org/web/packages/magrittr/vignettes/magrittr.html) (em inglês), criado por [Stefan Milton Bache](https://stefanbache.dk/) e [Hadley Wickham](https://hadley.nz/), e está incluída no tidyverse. O seu nome é uma referência ao pintor surrealista Rene Magritte, criador da obra ["A Traição das Imagens"](https://www.renemagritte.org/the-treachery-of-images.jsp), que mostra um cachimbo com a frase "isto não é um cachimbo" (*ceci n'est pas une pipe*, em francês). + +O operador *pipe* `%>%` permite passar o que está à sua esquerda como a primeira variável em uma função especificada à sua direita. Embora possa parecer estranho no início, uma vez que aprende a usar o *pipe* descobrirá que ele torna seu código mais legível, evitando instruções aninhadas. Não se preocupe se estiver um pouco confuso por agora. Tudo ficará mais claro à medida que observarmos os exemplos. + +Vamos dizer que estamos interessados em obter a raiz quadrada de cada população e, então, somar todas as raízes antes de calcular a média. Obviamente, essa não é uma medição útil, mas demonstra a rapidez com que o código do R pode se tornar difícil de ler. Normalmente, usaríamos declarações aninhadas: + + mean(sum(sqrt(dados_socioeconomicos_paises$populacao))) + + ## [1] 6328339 + +Veja que com tantos comandos aninhados fica difícil lembrar quantos parênteses são necessários no final da linha, tornando o código complicado de ler. Para atenuar esse problema, algumas pessoas criam vetores temporários entre cada chamada de função. + + # Obtém a raiz quadrada da população de todos os países + + vetor_raiz_populacao <- sqrt(dados_socioeconomicos_paises$populacao) + + # Obtém a soma de todas as raízes da variável temporária + + soma_vetor_raizes_populacao <- sum(vetor_raiz_populacao) + + # Obtém a média da variável temporária + + media_soma_vetor_raizes_populacao <- mean(soma_vetor_raizes_populacao) + + # Exibe a média + + media_soma_vetor_raizes_populacao + + ## [1] 6328339 + +Embora obtenha o mesmo resultado, este código é muito mais legível. No entanto, se esquecer de excluir os vetores temporários, seu espaço de trabalho pode se tornar confuso. O operador *pipe* faz esse trabalho por você. Aqui está o mesmo código usando o operador *pipe*: + + dados_socioeconomicos_paises$populacao %>% sqrt %>% sum %>% mean + + ## [1] 6328339 + +Este código é mais fácil de ler que os anteriores e pode torná-lo ainda mais limpo escrevendo em linhas diferentes. + + # Certifique-se de colocar o operador no final da linha + + dados_socioeconomicos_paises$populacao %>% + sqrt %>% + sum %>% + mean + + ## [1] 6328339 + +Note que os vetores ou *data frames* criados pelo operador pipe são descartados quando se completa a operação. Se quiser salvar o resultado da operação, será preciso atribuí-lo a uma nova variável: + + vetor_permanente_media_soma_populacao <- dados_socioeconomicos_paises$populacao %>% + sqrt %>% + sum %>% + mean + + vetor_permanente_media_soma_populacao + + ## [1] 6328339 + +Agora que adquirimos uma compreensão do operador *pipe*, estamos prontos para começar a analisar e manipular alguns dados. Ao longo da lição vamos continuar trabalhando com o *dataset* dados_gapminder: + + # Certifique-se de que o pacote "dados" está instalado e carregado aantes de proceder conforme abaixo + + dados_gapminder + + ## # A tibble: 1,704 x 6 + ## pais continente ano expectativa_de_vida populacao pib_per_capita + ## + ## 1 Afeganistão Ásia 1952 28.8 8425333 779. + ## 2 Afeganistão Ásia 1957 30.3 9240934 821. + ## 3 Afeganistão Ásia 1962 32.0 10267083 853. + ## 4 Afeganistão Ásia 1967 34.0 11537966 836. + ## 5 Afeganistão Ásia 1972 36.1 13079460 740. + ## 6 Afeganistão Ásia 1977 38.4 14880372 786. + ## 7 Afeganistão Ásia 1982 39.9 12881816 978. + ## 8 Afeganistão Ásia 1987 40.8 13867957 852. + ## 9 Afeganistão Ásia 1992 41.7 16317921 649. + ## 10 Afeganistão Ásia 1997 41.8 22227415 635. + ## # … with 1,694 more rows + +Como pode observar, este *dataset* contém o nome do país, seu continente e o ano de registro, além dos indicadores de expectativa de vida, total da população e PIB *per capita*, em determinados anos. Conforme mencionamos acima, antes de analisar os dados é importante verificar se estes estão bem ordenados no formato *tidy*. Relembrando os três critérios discutidos, podemos dizer que sim, o *dataset* encontra-se organizado e pronto para ser trabalhado com o pacote dplyr. + +## O que é dplyr? +[Dplyr](https://cran.r-project.org/web/packages/dplyr/vignettes/dplyr.html) (em inglês) também é parte do tidyverse, fornecendo funções para manipulação e transformação dos dados. Porque estamos mantendo nossos dados bem organizados, precisaremos apenas de um pequeno conjunto de ferramentas para explorá-los. Em comparação com o pacote básico do R, usando o dplyr em nosso código, fica geralmente mais rápido e há a garantia de que os dados resultantes (*output*) estarão bem ordenados uma vez que os dados de entrada (*input*) também estarão. Talvez o mais importante seja que o dplyr torna o nosso código mais fácil de ser lido e utiliza "verbos" que são, na maioria das vezes, intuitivos. Cada função do dplyr corresponde a um desses verbos, sendo cinco principais: filtrar (`filter`), selecionar (`select`), ordenar (`arrange`), modificar (`mutate`) e sumarizar (`summarise`). Vamos observar individualmente como cada uma dessas funções funciona na prática. + +### Selecionar (select) + +Se olharmos para o *dataset* dados_gapminder, vamos observar a presença de seis colunas, cada uma contendo diferentes informações. Podemos escolher, para a nossa análise, visualizar apenas algumas dessas colunas. A função `select()` do dplyr nos permite fazer isso. O primeiro argumento da função é o *data frame* que desejamos manipular e os seguintes são os nomes das colunas que queremos manter: + + # Remove as colunas de dados_gapminder usando select() + # Note que não é necessário acrescentar o nome da coluna com o símbolo $ (dólar) ao final de dados_gapminder visto que o dplyr automaticamente assume que "," (vírgula) representa E (AND em inglês) + + select(dados_gapminder, pais, ano, expectativa_de_vida) + + ## # A tibble: 1,704 x 3 + ## pais ano expectativa_de_vida + ## + ## 1 Afeganistão 1952 28.8 + ## 2 Afeganistão 1957 30.3 + ## 3 Afeganistão 1962 32.0 + ## 4 Afeganistão 1967 34.0 + ## 5 Afeganistão 1972 36.1 + ## 6 Afeganistão 1977 38.4 + ## 7 Afeganistão 1982 39.9 + ## 8 Afeganistão 1987 40.8 + ## 9 Afeganistão 1992 41.7 + ## 10 Afeganistão 1997 41.8 + ## # … with 1,694 more rows + +Vejamos como escrever o mesmo código utilizando o operador *pipe* `%>%`: + + dados_gapminder %>% + select(pais, ano, expectativa_de_vida) + + ## # A tibble: 1,704 x 3 + ## pais ano expectativa_de_vida + ## + ## 1 Afeganistão 1952 28.8 + ## 2 Afeganistão 1957 30.3 + ## 3 Afeganistão 1962 32.0 + ## 4 Afeganistão 1967 34.0 + ## 5 Afeganistão 1972 36.1 + ## 6 Afeganistão 1977 38.4 + ## 7 Afeganistão 1982 39.9 + ## 8 Afeganistão 1987 40.8 + ## 9 Afeganistão 1992 41.7 + ## 10 Afeganistão 1997 41.8 + ## # … with 1,694 more rows + +Fazer referência a cada uma das colunas que desejamos manter apenas para nos livrar de uma é um tanto tedioso. Podemos usar o símbolo de menos (-) para demonstrar que queremos remover uma coluna. + + dados_gapminder %>% + select(-continente) + + ## # A tibble: 1,704 x 5 + ## pais ano expectativa_de_vida populacao pib_per_capita + ## + ## 1 Afeganistão 1952 28.8 8425333 779. + ## 2 Afeganistão 1957 30.3 9240934 821. + ## 3 Afeganistão 1962 32.0 10267083 853. + ## 4 Afeganistão 1967 34.0 11537966 836. + ## 5 Afeganistão 1972 36.1 13079460 740. + ## 6 Afeganistão 1977 38.4 14880372 786. + ## 7 Afeganistão 1982 39.9 12881816 978. + ## 8 Afeganistão 1987 40.8 13867957 852. + ## 9 Afeganistão 1992 41.7 16317921 649. + ## 10 Afeganistão 1997 41.8 22227415 635. + ## # … with 1,694 more rows + +### Filtrar (filter) + +A função `filter()` faz o mesmo que a função select, mas ao invés de escolher o nome da coluna, podemos usá-lo para filtrar linhas usando um teste de requisito. Por exemplo, se quisermos selecionar somente os registros dos países em 2007: + + dados_gapminder %>% + filter(ano == 2007) + + ## # A tibble: 142 x 6 + ## pais continente ano expectativa_de_vida populacao pib_per_capita + ## + ## 1 Afeganistão Ásia 2007 43.8 31889923 975. + ## 2 Albânia Europa 2007 76.4 3600523 5937. + ## 3 Argélia África 2007 72.3 33333216 6223. + ## 4 Angola África 2007 42.7 12420476 4797. + ## 5 Argentina Américas 2007 75.3 40301927 12779. + ## 6 Austrália Oceania 2007 81.2 20434176 34435. + ## 7 Áustria Europa 2007 79.8 8199783 36126. + ## 8 Bahrein Ásia 2007 75.6 708573 29796. + ## 9 Bangladesh Ásia 2007 64.1 150448339 1391. + ## 10 Bélgica Europa 2007 79.4 10392226 33693. + ## # … with 132 more rows + +### Modificar (mutate) + +A função `mutate()` permite adicionar uma coluna ao seu *dataset*. No momento, temos país e continente em duas colunas separadas. Podemos utilizar a função `paste()` para combinar as duas informações e especificar um separador. Vamos colocá-las em uma única coluna chamada "localizacao". + + dados_gapminder %>% + mutate(localizacao = paste(pais, continente, sep = ", ")) + + ## # A tibble: 1,704 x 7 + ## pais continente ano expectativa_de_vida populacao pib_per_capita localizacao + ## + ## 1 Afeganistão Ásia 1952 28.8 8425333 779. Afeganistão, Ásia + ## 2 Afeganistão Ásia 1957 30.3 9240934 821. Afeganistão, Ásia + ## 3 Afeganistão Ásia 1962 32.0 10267083 853. Afeganistão, Ásia + ## 4 Afeganistão Ásia 1967 34.0 11537966 836. Afeganistão, Ásia + ## 5 Afeganistão Ásia 1972 36.1 13079460 740. Afeganistão, Ásia + ## 6 Afeganistão Ásia 1977 38.4 14880372 786. Afeganistão, Ásia + ## 7 Afeganistão Ásia 1982 39.9 12881816 978. Afeganistão, Ásia + ## 8 Afeganistão Ásia 1987 40.8 13867957 852. Afeganistão, Ásia + ## 9 Afeganistão Ásia 1992 41.7 16317921 649. Afeganistão, Ásia + ## 10 Afeganistão Ásia 1997 41.8 22227415 635. Afeganistão, Ásia + ## # … with 1,694 more rows + +Novamente, é preciso lembrar que o dplyr não salva os dados, nem transforma o original. Em vez disso, ele cria um *data frame* temporário em cada etapa. Se deseja manter os dados, é necessário criar uma variável permanente. + + dados_gapminder_localizacao <- dados_gapminder %>% + mutate(localizacao = paste(pais, continente, sep = ", ")) + + # Visualiza a nova tabela criada com a localização adicionada + + dados_gapminder_localizacao + + ## # A tibble: 1,704 x 7 + ## pais continente ano expectativa_de_vida populacao pib_per_capita localizacao + ## + ## 1 Afeganistão Ásia 1952 28.8 8425333 779. Afeganistão, Ásia + ## 2 Afeganistão Ásia 1957 30.3 9240934 821. Afeganistão, Ásia + ## 3 Afeganistão Ásia 1962 32.0 10267083 853. Afeganistão, Ásia + ## 4 Afeganistão Ásia 1967 34.0 11537966 836. Afeganistão, Ásia + ## 5 Afeganistão Ásia 1972 36.1 13079460 740. Afeganistão, Ásia + ## 6 Afeganistão Ásia 1977 38.4 14880372 786. Afeganistão, Ásia + ## 7 Afeganistão Ásia 1982 39.9 12881816 978. Afeganistão, Ásia + ## 8 Afeganistão Ásia 1987 40.8 13867957 852. Afeganistão, Ásia + ## 9 Afeganistão Ásia 1992 41.7 16317921 649. Afeganistão, Ásia + ## 10 Afeganistão Ásia 1997 41.8 22227415 635. Afeganistão, Ásia + ## # … with 1,694 more rows + +### Ordenar (arrange) + +A função `arrange()` nos permite ordenar as colunas de novas formas. Atualmente, o nosso conjunto de dados está organizado em ordem alfabética pelo nome do país. Vamos ordená-lo em ordem decrescente de acordo com o total da população. + + dados_gapminder %>% + arrange(desc(populacao)) + + ## # A tibble: 1,704 x 6 + ## pais continente ano expectativa_de_vida populacao pib_per_capita + ## + ## 1 China Ásia 2007 73.0 1318683096 4959. + ## 2 China Ásia 2002 72.0 1280400000 3119. + ## 3 China Ásia 1997 70.4 1230075000 2289. + ## 4 China Ásia 1992 68.7 1164970000 1656. + ## 5 Índia Ásia 2007 64.7 1110396331 2452. + ## 6 China Ásia 1987 67.3 1084035000 1379. + ## 7 Índia Ásia 2002 62.9 1034172547 1747. + ## 8 China Ásia 1982 65.5 1000281000 962. + ## 9 Índia Ásia 1997 61.8 959000000 1459. + ## 10 China Ásia 1977 64.0 943455000 741. + ## # … with 1,694 more rows + +### Sumarizar (summarise) + +A última função do dplyr que veremos é a `summarise()`, usada geralmente para criar uma tabela contendo dados estatísticos resumidos que podemos plotar. Vamos utilizar a função `summarise()` para calcular a média da expectativa de vida nos países, considerando todo o conjunto dados_gapminder. + + dados_gapminder %>% + summarise(mean(expectativa_de_vida)) + + ## # A tibble: 1 x 1 + ## `mean(expectativa_de_vida)` + ## + ## 1 59.5 + +## Juntando tudo + +Agora, após termos visto os cinco principais verbos do dplyr, podemos criar rapidamente uma visualização dos nossos dados. Vamos criar um gráfico de barras mostrando o número de países com expectativa de vida maior que 50 anos, em 2007. + + expectativa_vida_2007 <- dados_gapminder %>% + filter(ano == 2007) %>% + mutate(expectativa_2007 = ifelse(expectativa_de_vida >= 50, "Maior ou igual a 50 anos", "Menor que 50 anos")) + + ggplot(expectativa_vida_2007) + + geom_bar(aes(x = expectativa_2007, fill = expectativa_2007)) + + labs(x = "A expectativa de vida é maior que 50 anos?") + +{% include figure.html filename="pt-tr-manipulacao-transformacao-dados-r-03.png" alt="Imagem com a representação de um gráfico de barras com dados sobre o número de países onde a expectativa de vida é maior ou menor que 50 anos, em 2007" caption="Expectativa de vida nos países em 2007" %} + +Novamente, fazendo uma pequena mudança no nosso código, podemos ver também o número de países com expectativa de vida maior que 50 anos, em 1952. + + expectativa_vida_1952 <- dados_gapminder %>% + filter(ano == 1952) %>% + mutate(expectativa_1952 = ifelse(expectativa_de_vida >= 50, "Maior ou igual a 50 anos", "Menor que 50 anos")) + + ggplot(expectativa_vida_1952) + + geom_bar(aes(x = expectativa_1952, fill = expectativa_1952)) + + labs(x = "A expectativa de vida é maior que 50 anos?") + +({% include figure.html filename="pt-tr-manipulacao-transformacao-dados-r-04.png" alt="Imagem com a representação de um gráfico de barras com dados sobre o número de países onde a expectativa de vida é maior ou menor que 50 anos, em 1952" caption="Expectativa de vida nos países em 1952" %} + +## Conclusão + +Este tutorial deve encaminhar seus conhecimentos para pensar sobre como organizar e manipular dados usando R. Posteriormente, provavelmente vai querer visualizar esses dados de alguma forma, usando gráficos, como fizemos em partes desta lição. Recomendamos que comece a estudar o [ggplot2](https://www.ggplot2.org) (em inglês), pacote com uma coleção de ferramentas que funcionam bem em conjunto com o dplyr. Além disso, você deve buscar conhecer as outras funções do pacote dplyr que não vimos aqui, para aprimorar suas habilidades de manipulação de dados. Por enquanto, esta lição deve proporcionar um bom ponto de partida, cobrindo muitos dos principais problemas que poderá encontrar. + + +### Notas + +[^1]: O pacote "dados" disponibiliza a tradução de conjuntos de dados originalmente em inglês encontrados em outros pacotes de R. Está disponível em https://github.com/cienciadedatos/dados diff --git a/pt/licoes/nocoes-basicas-R-dados-tabulares.md b/pt/licoes/nocoes-basicas-R-dados-tabulares.md index 911a095f47..d0b81914fb 100644 --- a/pt/licoes/nocoes-basicas-R-dados-tabulares.md +++ b/pt/licoes/nocoes-basicas-R-dados-tabulares.md @@ -1,569 +1,569 @@ ---- -title: Noções básicas de R com dados tabulares -layout: lesson -slug: nocoes-basicas-R-dados-tabulares -date: 2016-09-05 -translation_date: 2021-08-28 -authors: -- Taryn Dewar -reviewers: -- James Baker -- John Russell -editors: -- Adam Crymble -translator: -- Diana Rebelo Rodriguez -translation-editor: -- Jimmy Medeiros -translation-reviewer: -- Ivo Veiga -- Romulo Predes -difficulty: 1 -activity: transforming -topics: [data-manipulation, r] -abstract: "Esta lição ensina uma maneira de analisar rapidamente grandes volumes de dados tabulares, tornando a pesquisa mais rápida e eficaz." -review-ticket: https://github.com/programminghistorian/ph-submissions/issues/398 -original: r-basics-with-tabular-data -avatar_alt: Letra R ornamentada e ilustrada -doi: 10.46430/phpt0015 ---- - - - -{% include toc.html %} - -## Objetivos da lição - -À medida que mais e mais registros históricos são digitalizados, ter uma maneira de analisar rapidamente grandes volumes de dados tabulares torna a pesquisa mais rápida e eficaz. - -[R](https://pt.wikipedia.org/wiki/R_%28linguagem_de_programa%C3%A7%C3%A3o%29) é uma linguagem de programação com pontos fortes nas análises estatísticas. Como tal, ela pode ser usada para realizar análises quantitativas sobre fontes históricas, incluindo, mas não se limitando, a testes estatísticos. Como é possível executar repetidamente o mesmo código nas mesmas fontes, R permite analisar dados rapidamente e produz resultados que podem ser replicados. Além disso, como é possível salvar o código, R permite que se redirecionem ou revejam funções para projectos futuros, tornando-o uma parte flexível de sua caixa de ferramentas. - -Este tutorial não pressupõe nenhum conhecimento prévio do R. Ele percorrerá algumas das funções básicas do R e servirá como uma introdução à linguagem. Ele aborda o processo de instalação, explica algumas das ferramentas que se podem usar no R, bem como explica como trabalhar com conjuntos de dados enquanto se faz pesquisa. O tutorial fará isso através de uma série de mini-lições que mostrarão os tipos de fontes com as quais o R funciona bem e exemplos de como fazer cálculos para encontrar informações que possam ser relevantes à pesquisa histórica. A lição também abordará diferentes métodos de entrada de dados para R, tais como matrizes e o uso de ficheiros CSV. - -## Para quem isto é útil? - -R é ideal para analisar conjuntos de dados de grande dimensão que levariam muito tempo para serem processados manualmente. Depois de entendida a forma como se escrevem algumas funções básicas e como importar ficheiros de dados próprios, é possível analisar e visualizar os dados de forma rápida e eficiente. - -Embora R seja uma ótima ferramenta para dados tabulares, pode-se achar mais útil utilizar outras abordagens para analisar fontes não tabulares (tais como transcrições de jornais). Caso possua interesse em estudar estes tipos de fontes, dê uma olhada em algumas das outras grandes lições do [The Programming Historian](/pt/). - -## Instalar R - -R é uma linguagem de programação e um ambiente para trabalhar com dados. Ele pode ser executado utilizando o console de R, bem como no [command-line](/en/lessons/intro-to-bash) (linha de comandos) ou na interface [R Studio](https://www.rstudio.com/). Este tutorial irá focar no uso do console de R. Para começar com o R, baixe o programa do [The Comprehensive R Archive Network](https://cran.r-project.org/). R é compatível com Linux, Mac e Windows. - -Quando se abre o console de R pela primeira vez, a janela aberta se parece com essa: -![O console R no Mac.](/images/r-basics-with-tabular-data/Intro-to-R-1.png) - -## Usar o console de R - -O console R é um ótimo lugar para começar a trabalhar se quando se é inexperiente em R, porque ele foi projetado especificamente para esta linguagem e tem funções específicas para o R. - -O console é onde se digitam os comandos. Para limpar a tela inicial, vá para 'Edit' (editar) na barra de menu e selecione 'Clean Console’ (limpar console). Isto iniciará R com uma nova página. Também é possível mudar a aparência do console clicando na roda colorida no topo do console em um Mac, ou selecionando 'GUI Preferences' (preferências da Interface Gráfica do Usuário) no menu 'Edit' em um PC. Além disso, também é possível ajustar a cor da tela de fundo e as cores da fonte para as funções. - -## Usar conjuntos de dados - -Antes de trabalhar com dados próprios, usar os conjuntos de dados já incorporados ajuda a ter uma noção de como R funciona. É possível pesquisar nos conjuntos de dados inserindo data() no console. Isto mostrará a lista de todos os conjuntos de dados disponíveis em uma janela separada. Essa lista inclui os títulos de todos os diferentes conjuntos de dados, bem como uma breve descrição sobre as informações em cada um deles. - -No exemplo abaixo iremos primeiro carregar o conjunto de dados AirPassengers na sua sessão R digitando data(AirPassengers) na próxima linha do console^[1] e pressionando Enter. Para visualizar o conjunto de dados, digite apenas AirPassengers na próxima linha e pressione Enter novamente. Isso imprimirá uma tabela mostrando o número de passageiros que voaram em companhias aéreas internacionais entre janeiro de 1949 e dezembro de 1960, em milhares. Deverá aparecer o seguinte: - -``` -> data(AirPassengers) -> AirPassengers - Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec -1949 112 118 132 129 121 135 148 148 136 119 104 118 -1950 115 126 141 135 125 149 170 170 158 133 114 140 -1951 145 150 178 163 172 178 199 199 184 162 146 166 -1952 171 180 193 181 183 218 230 242 209 191 172 194 -1953 196 196 236 235 229 243 264 272 237 211 180 201 -1954 204 188 235 227 234 264 302 293 259 229 203 229 -1955 242 233 267 269 270 315 364 347 312 274 237 278 -1956 284 277 317 313 318 374 413 405 355 306 271 306 -1957 315 301 356 348 355 422 465 467 404 347 305 336 -1958 340 318 362 348 363 435 491 505 404 359 310 337 -1959 360 342 406 396 420 472 548 559 463 407 362 405 -1960 417 391 419 461 472 535 622 606 508 461 390 432 -``` - -Agora, é possível usar R para responder a uma série de perguntas com base nestes dados. Por exemplo, quais foram os meses mais populares para voar? Houve um aumento nas viagens internacionais ao longo do tempo? Provavelmente poderíamos encontrar as respostas a tais perguntas simplesmente escaneando esta tabela, mas não tão rapidamente quanto o computador. E se houvesse muito mais dados? - -## Funções básicas - -R pode ser usado para calcular uma série de valores que podem ser úteis enquanto se faz pesquisa em um conjunto de dados. Por exemplo, é possível encontrar a [média](https://pt.wikipedia.org/wiki/M%C3%A9dia), a [mediana](https://pt.wikipedia.org/wiki/Mediana_%28estat%C3%ADstica%29) e os valores mínimos e máximos. Para encontrar a média e a mediana no conjunto de dados, insere-se, respectivamente, mean(AirPassengers) e median(AirPassengers) no console. E se quisermos calcular mais de um valor de cada vez? Para produzir um resumo dos dados, digite summary(AirPassengers) (resumo) no console. Isto dará os valores mínimo e máximo dos dados, assim como a média, a mediana e os valores do primeiro e terceiro quartil. - -``` -> summary(AirPassengers) - Min. 1st Qu. Median Mean 3rd Qu. Max. - 104.0 180.0 265.5 280.3 360.5 622.0 -``` - -Um resumo nos mostra que o número mínimo de passageiros entre janeiro de 1949 e dezembro de 1960 foi de 104.000 e que o número máximo de passageiros foi de 622.000. O valor médio nos mostra que aproximadamente 280.300 pessoas viajavam por mês durante o período de coleta dos dados. Estes valores podem ser úteis para ver o grau de variação no número de passageiros ao longo do tempo. - -Usar a função summary() é uma boa maneira de se obter uma visão geral de todo o conjunto de dados. No entanto, e se quisermos analisar um subconjunto de dados, como um determinado ano ou alguns meses? É possível selecionar diferentes pontos de dados (como um determinado mês) e intervalos (como um determinado ano) em R para calcular muitos valores diferentes. Por exemplo, pode-se adicionar o número de passageiros durante dois meses para determinar o número total de passageiros durante esse período de tempo. - -Tente adicionar os dois primeiros valores dos dados AirPassengers no console e, em seguida, pressione 'Enter'. Devem aparecer duas linhas assim: - -``` -> 112+118 -[1] 230 -``` - -Isto lhe daria o número total de passageiros (em centenas de milhares) que voaram em janeiro e fevereiro de 1949. - -R pode fazer muito mais do que simples aritmética. É possível criar objetos, ou [variáveis](https://pt.wikipedia.org/wiki/Vari%C3%A1vel_%28programa%C3%A7%C3%A3o%29), para representar números e [expressões](https://pt.wikipedia.org/wiki/Express%C3%A3o_%28computa%C3%A7%C3%A3o%29). Por exemplo, pode-se nomear o valor de janeiro de 1949 como variável Jan1949. DigiteJan1949 <- 112 no console e, em seguida, Jan1949 na linha seguinte. A notação <- atribui o valor 112 à variável Jan1949. O que deve aparecer é: - -``` -> Jan1949 <- 112 -> Jan1949 -[1] 112 -``` - -R é sensível a maiúsculas e minúsculas, portanto tenha cuidado para usar a mesma notação quando usar as variáveis que foram atribuídas (ou nomeadas) em outras ações. Veja o artigo de Rasmus Bååth, [The State of Naming Conventions in R](https://journal.r-project.org/archive/2012-2/RJournal_2012-2_Baaaath.pdf) (em inglês), para mais informações sobre como nomear melhor as variáveis. - -Para remover uma variável do console, digite rm() (*remove* ou apagar) com a variável da qual se deseja apagar dos parênteses, e pressione Enter. Para ver todas as variáveis atribuídas, digite ls() (*list objects* ou lista de objetos) no console e pressione Enter. Isto pode ajudar a evitar o uso do mesmo nome para múltiplas variáveis. Isto também é importante porque R armazena todos os objetos que são criados em sua memória, portanto, mesmo que não se consiga ver uma variável nomeada x no console, ela pode ter sido criada antes e acidentalmente poderia sobrescrevê-la ao atribuir outra variável. - -Aqui está a lista de variáveis que criamos até agora: - -``` -> ls() -[1] "AirPassengers" "Jan1949" -``` - -Temos as variáveis AirPassengers e Jan1949. Se removermos a variável Jan1949 e digitarmos novamente ls(), veremos: - -``` -> rm(Jan1949) -> ls() -[1] "AirPassengers" -``` - -Se a qualquer momento não conseguir corrigir um erro ou ficar preso a uma função, digite help() no console para abrir a página de ajuda. Também é possível encontrar ajuda geral usando o menu ‘Help’ na parte superior do console. Se quiser mudar algo no código que já escreveu, pode-se digitar novamente o código em uma nova linha. Para economizar tempo, também é possível usar as setas do teclado para rolar para cima e para baixo no console para encontrar a linha de código que se deseja mudar. - -É possível usar letras como variáveis, mas quando começar a trabalhar com seus próprios dados, pode ser mais fácil atribuir nomes que sejam mais representativos desses dados. Mesmo com os dados AirPassengers, atribuir variáveis que se correlacionam com meses ou anos específicos tornaria mais fácil saber exatamente com quais valores se está trabalhando. - -### Prática - -A. Atribuir os valores de janeiro de 1950 e janeiro de 1960 dos dados de AirPassengers() em dois objetos novos. Em seguida, somar os valores dos dois objetos criados em uma nova linha de código. - -B. Usar os dois objetos criadas para encontrar a diferença entre os viajantes aéreos de janeiro de 1960 e de 1950. - -### Soluções - -A. Atribuir variáveis para os pontos de janeiro de 1950 e janeiro de 1960 dos dados de AirPassengers(). Adicionar as duas variáveis juntas na linha seguinte. - -``` -> Jan1950<- 115 -> Jan1960<- 417 -> Jan1950+Jan1960 -[1] 532 -``` - -Isto significa que 532.000 pessoas viajaram em voos internacionais em janeiro de 1950 e janeiro de 1960. - -B. Usar as variáveis que foram criadas para encontrar a diferença entre os viajantes aéreos de 1960 e 1950. - -``` -> Jan1960-Jan1950 -[1] 302 -``` - -Isto significa que, em janeiro de 1960, mais 302.000 pessoas viajaram em voos internacionais do que em janeiro de 1950. - -Definir variáveis para pontos de dados individuais pode ser entediante, especialmente se os nomes atribuídos são bastante longos. Entretanto, o processo é semelhante para atribuir um intervalo de valores a uma variável, como todos os pontos de dados durante um ano. Fazemos isso criando listas chamadas ‘vetores’ usando o comando c. c significa ‘combinar’ e nos permite vincular números em uma variável comum. Por exemplo, pode-se criar um vetor para os dados AirPassengers() de 1949 nomeado Air49: - -``` -> Air49<- c(112,118,132,129,121,135,148,148,136,119,104,118) -``` - -Cada item é acessível usando o nome da variável e sua posição no índice (a partir de 1). Neste caso, Air49[2] contém o valor que corresponde a fevereiro de 1949 - 118. - -``` -> Air49[2] -[1] 118 -``` - -É possível criar uma lista de valores consecutivos usando dois pontos. Por exemplo: - -``` -> y <- 1:10 -> y -[1] 1 2 3 4 5 6 7 8 9 10 -``` - -Usando este conhecimento, podemos usar a seguinte expressão para definir uma variável para os dados AirPassengers de 1949. - -``` -> Air49 <- AirPassengers[1:12] -> Air49 - [1] 112 118 132 129 121 135 148 148 136 119 104 118 -``` - -Air49[2] selecionou os primeiros doze termos no conjunto de dados AirPassengers. Isto dá o mesmo resultado que acima, mas leva menos tempo e também reduz a chance de que um valor seja transcrito incorretamente. - -Para obter o número total de passageiros para 1949, é possível somar todos os termos no vetor, usando a função sum() (somar). - -``` -> sum(Air49) -[1] 1520 -``` - -Portanto, o número total de passageiros em 1949 era de aproximadamente 1.520.000. - -Finalmente, a função length() (comprimento) torna possível saber o número de objetos em um vetor: - -``` -> length(Air49) -[1] 12 -``` - -### Prática - -1. Criar uma variável para os dados AirPassengers de 1950. -2. Imprimir ou apresentar o segundo objeto da série de 1950. -3. Qual é o tamanho (*length*) da sequência na pergunta 2? -4. Quantos passageiros voaram no total em 1950? - -### Soluções - -1. -``` -> Air50 <- AirPassengers[13:24] -Air50 -[1] 115 126 141 135 125 149 170 170 158 133 114 140 -``` - -2. -``` -> Air50[2] -[1] 126 -``` - -3. -``` -> length(Air50) -[1] 12 -``` - -4. -``` ->sum(Air50) -[1] 1676 -``` - -Caso se quisesse criar variáveis para todos os anos no conjunto de dados, seria possível então usar algumas das ferramentas que examinamos para determinar o número de pessoas que viajam de avião ao longo do tempo. Aqui está uma lista de variáveis para 1949 a 1960, seguida pelo número total de passageiros para cada ano: - -``` -> Air49 <- AirPassengers[1:12] -Air50 <- AirPassengers[13:24] -Air51 <- AirPassengers[25:36] -Air52 <- AirPassengers[37:48] -Air53 <- AirPassengers[49:60] -Air54 <- AirPassengers[61:72] -Air55 <- AirPassengers[73:84] -Air56 <- AirPassengers[85:96] -Air57 <- AirPassengers[97:108] -Air58 <- AirPassengers[109:120] -Air59 <- AirPassengers[121:132] -Air60 <- AirPassengers[133:144] -``` - -``` -> sum(Air49) -[1] 1520 -sum(Air50) -[1] 1676 -sum(Air51) -[1] 2042 -sum(Air52) -[1] 2364 -sum(Air53) -[1] 2700 -sum(Air54) -[1] 2867 -sum(Air55) -[1] 3408 -sum(Air56) -[1] 3939 -sum(Air57) -[1] 4421 -sum(Air58) -[1] 4572 -sum(Air59) -[1] 5140 -sum(Air60) -[1] 5714 -``` - -A partir destas informações, podemos ver que o número de passageiros aumenta a cada ano. É possível ir mais longe com estes dados para determinar se havia um interesse crescente em férias em certos períodos do ano, ou mesmo o aumento percentual de passageiros ao longo do tempo. - -## Trabalhar com bases de dados maiores - -Note que o exemplo acima não é bem adequado para conjuntos de dados de grande dimensão: contar pontos de dados para encontrar os corretos seria muito entediante. Pense no que aconteceria se procurássemos informações do ano 96 em um conjunto de dados com 150 anos de dados coletados. - -É possível selecionar linhas e colunas específicas de dados se o conjunto de dados estiver em um formato particular. Carregue os dados de mtcars em seu console: - -``` -> data(mtcars) -> mtcars - mpg cyl disp hp drat wt qsec vs am gear carb -Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4 -Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4 -Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1 -Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 -Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 -Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1 -Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4 -Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 -Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2 -Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4 -Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4 -Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3 -Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3 -Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3 -Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4 -Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4 -Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4 -Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1 -Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2 -Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1 -Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1 -Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2 -AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2 -Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4 -Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2 -Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1 -Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2 -Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2 -Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4 -Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6 -Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8 -Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2 -``` - -Este [conjunto de dados](https://stat.ethz.ch/R-manual/R-devel/library/datasets/html/mtcars.html) fornece uma visão geral dos *Motor Trend Car Road Tests* de automóveis da revista Motor Trend de 1974[^2]. Ele contém informações sobre quantas milhas por galão ou quilômetros por litro um carro poderia percorrer[^3], o número de cilindros do motor em cada carro, potência, relação do eixo traseiro, peso, e outras características de cada modelo. Os dados poderão ser usados para descobrir qual destas características tornou cada tipo de carro mais ou menos seguro para os passageiros ao longo do tempo. - -É possível selecionar colunas inserindo o nome do conjunto de dados seguido por colchetes e o número da linha ou coluna de dados que lhe interessa. Para ordenar as linhas e colunas, pense no dataset[x,y], sendo dataset o conjunto de dados com o qual se está trabalhando, x a linha e y a coluna. - -Se estivesse interessado na primeira linha de informações no conjunto mtcars, deveria executar o seguinte em seu console: - -``` -> mtcars[1,] - mpg cyl disp hp drat wt qsec vs am gear carb -Mazda RX4 21 6 160 110 3.9 2.62 16.46 0 1 4 4 -``` - -Para ver uma coluna dos dados, podemos digitar: - -``` -> mtcars[,2] - [1] 6 6 4 6 8 6 8 4 4 6 6 8 8 8 8 8 8 4 4 4 4 8 8 8 8 4 4 4 8 6 8 4 -``` - -Isto mostra todos os valores sob a categoria cyl (cilindrada). A maioria dos modelos de carros tem motores de 4, 6 ou 8 cilindros. Também é possível selecionar pontos de dados individuais inserindo valores tanto para x (linha) quanto para y (coluna): - -``` - > mtcars[1,2] -[1] 6 -``` - -Isto retorna o valor na primeira linha, segunda coluna. A partir daqui, seria possível executar um resumo em uma linha ou coluna de dados sem ter que contar o número de termos no conjunto de dados. Por exemplo, digitar summary(mtcars[,1]) no console e pressionar 'Enter' daria o resumo para as milhas por galão que os diferentes carros no conjunto de dados mtcars usam: - -``` -> summary(mtcars[,1]) - Min. 1st Qu. Median Mean 3rd Qu. Max. - 10.40 15.42 19.20 20.09 22.80 33.90 -``` - -O resumo indica que a eficiência máxima de combustível foi de 33,9 milhas por galão ou 54.5 quilômetros por 3.78 litros, do Toyota Corolla e o menos eficiente foi o Lincoln Continental, que só conseguiu 10,4 milhas por galão, ou seja, 16.7 quilômetros por 3.78 litros. Podemos encontrar os carros que correspondem aos pontos de valor olhando de volta para a tabela. É muito mais fácil encontrar um valor específico do que tentar fazer as contas em sua cabeça ou pesquisar através de uma planilha. - -## Matrizes - -Agora que temos uma melhor compreensão de como algumas das funções básicas em R funcionam, podemos analisar maneiras de usar essas funções em nossos próprios dados. Isto inclui a construção de [matrizes](https://pt.wikipedia.org/wiki/Matriz_%28matem%C3%A1tica%29) usando pequenos conjuntos de dados. O benefício de saber como construir matrizes em R é que se tivermos apenas alguns pontos de dados para trabalhar, poderíamos simplesmente criar uma matriz em vez de um CSV que precisaria ser depois importado. Uma das maneiras mais simples de construir uma matriz é criar pelo menos duas variáveis ou vetores e depois ligá-los entre si. Por exemplo, vejamos alguns dados do [Old Bailey](https://pt.wikipedia.org/wiki/Old_Bailey) (o Tribunal Penal Central da Inglaterra e do País de Gales): - - -![Conjunto de dados criminais do [The Old Bailey](https://www.oldbaileyonline.org/) nas décadas entre 1670 e 1800.](/images/r-basics-with-tabular-data/Intro-to-R-2.png) - - -O Old Bailey contém estatísticas e informações sobre casos criminais entre 1674 e 1913 que foram mantidos pelo Tribunal Penal Central de Londres. Se quiséssemos analisar o número total de crimes de roubo e furto violento entre 1670 e 1710, poderíamos colocar esses números em uma matriz. - -Para isso, vamos criar as variáveis Roubos e RoubosViolentos usando os totais de cada década como pontos de dados: - -``` -> Roubos <- c(2,30,38,13) -RoubosViolentos <- c(7,20,36,3) -``` - -Para criar uma matriz podemos usar a função cbind() (*column bind* ou união de colunas). Isto une Roubos e RoubosViolentos em colunas, representadas como Crime aqui: - -``` -> Roubos <- c(2,30,38,13) -RoubosViolentos <- c(7,20,36,3) -Crime <- cbind(Roubos,RoubosViolentos) -Crime - Roubos RoubosViolentos -[1,] 2 7 -[2,] 30 20 -[3,] 38 36 -[4,] 13 3 -``` - -Também é possível estabelecer uma matriz usando rbind(). rbind() une os dados em fileiras (*row bind* ou união de fileiras). Observe a diferença entrenCrime e Crime2: - -``` -> Crime2 <- rbind(Roubos,RoubosViolentos) -> Crime2 - [,1] [,2] [,3] [,4] -Roubos 2 30 38 13 -RoubosViolentos 7 20 36 3 -``` - -A segunda matriz também pode ser criada usando a expressão t(Crime) (matriz transposta), que gera o inverso de Crime. - -Também é possível construir uma matriz utilizando matrix(). Isto permite transformar uma sequência de números, como o número de roubos e roubos violentos cometidos, em uma matriz se não tiver criado variáveis separadas para estes valores: - -``` -> matrix(c(2,30,3,4,7,20,36,3),nrow=2) - [,1] [,2] [,3] [,4] -[1,] 2 3 7 36 -[2,] 30 4 20 3 -``` - -``` -[2,] 30 4 20 3 -> matrix(c(2,30,3,4,7,20,36,3),ncol=2) - [,1] [,2] -[1,] 2 7 -[2,] 30 20 -[3,] 3 36 -[4,] 4 3 -``` - -A primeira parte da função é a lista de números. Depois disso, é possível determinar quantas linhas (nrow=) (número de linhas) ou colunas (ncol=) (número de colunas) a matriz terá. - -A função apply() permite executar a mesma função em cada linha ou coluna de uma matriz. Existem três partes da função apply(), nas quais é preciso selecionar: a matriz que está sendo utilizada, os termos que se deseja usar e a função que se deseja executar na matriz: - -``` -> Crime - Roubos RoubosViolentos -[1,] 2 7 -[2,] 30 20 -[3,] 38 36 -[4,] 13 3 -> apply(Crime,1,mean) -[1] 4.5 25.0 37.0 8.0 -``` - -Este exemplo mostra a função apply utilizada na matriz Crime para calcular a média (*mean*) de cada linha e, portanto, o número médio de roubos e assaltos combinados que foram cometidos em cada década. Se quiser saber a média de cada coluna, use um 2 em vez de um 1 dentro da função: - -``` -> apply(Crime,2,mean) - Roubos RoubosViolentos - 20.75 16.50 -``` - -Isto mostra o número médio de roubos e assaltos entre as décadas. - -### Prática - -1. Criar uma matriz de duas colunas usando os seguintes dados de Quebra da Paz (*Breaking Peace*) e Assassinatos (*Killing*) de 1710 a 1730 da tabela acima do Old Bailey: c(2,3,3,44,51,17) - -2. Usar a função cbind() para juntar QuebraPaz <- c(2,3,3) e Assassinatos <- c(44,51,17). - -3. Calcular a média de cada coluna para a matriz acima usando a função apply(). - -### Soluções - -1. -``` -> matrix(c(2,3,3,44,51,17),ncol=2) - [,1] [,2] -[1,] 2 44 -[2,] 3 51 -[3,] 3 17 -``` - -2. -``` -> QuebraPaz <- c(2,3,3) -> Assassinatos <- c(44,51,17) -> PazAssassinatos <- cbind(QuebraPaz,Assassinatos) -> PazAssassinatos - QuebraPaz Assassinatos -[1,] 2 44 -[2,] 3 51 -[3,] 3 17 -``` - -3. -``` -> apply(PazAssassinatos,2,mean) -> QuebraPaz Assassinatos -> 2.666667 37.333333 -``` - -Matrizes podem ser úteis quando se está trabalhando com pequenas quantidades de dados. No entanto, nem sempre é a melhor opção, porque uma matriz pode ser difícil de ler. Às vezes é mais fácil criar seu próprio ficheiro usando um programa de planilhas como [Excel](https://pt.wikipedia.org/wiki/Microsoft_Excel) ou [Open Office](https://www.openoffice.org/pt/) para garantir que todas as informações que deseja estudar estejam organizadas e importar esse ficheiro para o R. - -## Carregar seu próprio conjunto de dados em R - -Agora que já praticou com dados simples, pode trabalhar com seus próprios dados. Como trabalhar com esses dados em R? Há várias maneiras de se fazer isso. A primeira é carregar a planilha diretamente em R. Outra maneira é importar um ficheiro CSV (*comma-separated values* ou valores separados por vírgula) ou TXT (de texto) para R. - -Para carregar um ficheiro Excel diretamente no console R, é necessário primeiro instalar o pacote readxl (ler o ficheiro Excel). Para fazer isto, digite install.packages("readxl") no console e pressione Enter. Pode ser que seja necessário verificar se o pacote foi instalado no console clicando na guia “Packages&Data” (pacotes e dados) no menu, selecionando “Package Manager” (gerenciador de pacotes) e depois clicando na caixa ao lado do pacote readxl. A partir daqui, é possível selecionar um ficheiro e carregá-lo em R. Abaixo está um exemplo de como pode parecer carregar um simples ficheiro Excel: - -``` -> x <- read_excel("Workbook2.xlsx") -> x - a b -1 1 5 -2 2 6 -3 3 7 -4 4 8 -``` - -Após o comando read_excel insere-se o nome do ficheiro que está sendo selecionado. Os números embaixo correspondem aos dados da planilha de amostra que utilizei. Observe como as linhas estão numeradas e as colunas estão etiquetadas como eram na planilha original. - -Quando estiver carregando dados em R, certifique-se de que o ficheiro que está sendo acessado esteja dentro do diretório em seu computador de onde se está trabalhando. Para verificar isso, digite dir() (diretório) ou getwd() (mostrar o caminho do diretório de trabalho) no console. É possível mudar o diretório, se necessário, indo para a aba “Miscellaneous” (diversos) na barra de título em sua tela e, em seguida, selecionando o que se quer definir como diretório para R. Se não fizer isso, R não será capaz de encontrar o ficheiro corretamente. - -Outra maneira de carregar dados em R é usar um ficheiro CSV. Um ficheiro [CSV](https://pt.wikipedia.org/wiki/Comma-separated_values) exibe valores em filas e colunas, separados por vírgulas. É possível salvar qualquer documento criado no Excel como um ficheiro .csv e depois carregá-lo em R. Para usar um ficheiro CSV em R, nomeie o ficheiro usando o comando <- e depois digite read.csv(file="file-name.csv",header=TRUE,sep=",") no console. file-name indica ao R qual ficheiro selecionar, enquanto que definir o cabeçalho ou header= (o ficheiro equivale a), para TRUE (verdadeiro) diz que a primeira linha são cabeçalhos e não variáveis. sep significa que há uma vírgula entre cada número e linha. - -Normalmente, um CSV pode conter muitas informações. Entretanto, para começar, tente criar um ficheiro CSV em Excel usando os dados do *Old Bailey* que usamos para as matrizes. Defina as colunas para as datas entre 1710 e 1730, mais o número de violações de crimes de paz e assassinatos para aquelas décadas. Salve o ficheiro como "OldBailey.csv" e tente carregá-lo em R usando os passos acima. Veremos que: - -``` -> read.csv (file="OldBailey.csv", header=TRUE, sep=",") -Date QuebraPaz Assassinatos -1 1710 2 44 -2 1720 3 51 -3 1730 4 17 -``` - -Agora poderíamos acessar os dados em R e fazer quaisquer cálculos para ajudá-lo a estudar os dados. Os ficheiros CSV também podem ser muito mais complexos do que este exemplo, portanto, qualquer conjunto de dados com os quais trabalhamos em estudos próprios também poderia ser aberto em R. - -TXT (ou ficheiros de texto) podem ser importados para R de maneira semelhante. Usando o comando read.table(), é possível carregar ficheiros de texto em R, seguindo a mesma sintaxe que no exemplo acima. - -## Salvar dados en R - -Agora que carregamos dados em R e conhecemos algumas maneiras de trabalhar com os dados, o que acontece se quisermos salvá-los em outro formato? A função write.xlsx() permite que se faça exatamente isso - pegar os dados de R e salvá-los em um ficheiro Excel. Tente escrever o ficheiro do *Old Bailey* em um ficheiro Excel. Primeiro, será necessário carregar o pacote e depois será possível criar o ficheiro após criar uma variável para os dados do *Old Bailey*: - -``` -> library(xlsx) -> write.xlsx(x= OldBailey, file= "OldBailey.xlsx", sheetName= "OldBailey", row.names= TRUE) -``` - -Neste caso, e dentro do parêntese desta função [write.xlsx](https://www.rdocumentation.org/packages/xlsx/versions/0.6.5), estamos chamando para processar a variável "OldBailey" com o argumento x= . Ao mesmo tempo, indicamos que o ficheiro salvo deve ser chamado “OldBailey” com a extensão “.xlsx” com o argumento file= . Além disso, damos o nome "OldBailey" à planilha onde estarão os dados com sheetName= . Finalmente, estabelecemos que queremos (TRUE ou verdadeiro) que os nomes da linha em nossa variável sejam salvos no novo ficheiro. [N. da T.] - -## Resumo e passos seguintes - -Este tutorial explorou as bases do uso de R para trabalhar com dados de pesquisa tabular. O R pode ser uma ferramenta muito útil para a pesquisa em ciências humanas e sociais porque a análise de dados é reprodutível e permite analisar dados rapidamente sem ter que montar um sistema complicado. Agora que conhece alguns dos conceitos básicos do R, pode-se explorar algumas das outras funções do programa, incluindo cálculos estatísticos, produção de gráficos e criação de suas próprias funções. - -Para mais informações sobre o R, visite o [R Manual](https://cran.r-project.org/doc/manuals/r-release/R-intro.html) (em inglês). - -Há também uma série de outros tutoriais de R online, inclusive: - -* [R: A self-learn tutorial](https://web.archive.org/web/20191015004305/https://www.nceas.ucsb.edu/files/scicomp/Dloads/RProgramming/BestFirstRTutorial.pdf) (em inglês) - este tutorial passa por uma série de funções e fornece exercícios para praticar competências. - -* [DataCamp Introduction to R](https://www.datacamp.com/courses/free-introduction-to-r) - este é um curso online gratuito que lhe dá feedback sobre seu código para ajudar a identificar erros e aprender como escrever código de forma mais eficiente. - -Finalmente, um grande recurso para historiadores digitais é o [Lincoln Mullen’s Digital History Methods in R](http://dh-r.lincolnmullen.com/). É um rascunho de um livro escrito especificamente sobre como usar R para o trabalho de história digital. - -## Notas - -[^1]: Box, G. E. P., Jenkins, G. M. e Reinsel, G. C. (1976), Time Series Analysis, Forecasting and Control. Third Edition. Holden-Day. Series G. -[^2]: Henderson e Velleman (1981), Building multiple regression models interactively. Biometrics, 37, 391Ð411. -[^3]: Nota da tradutora: Um galão equivale a 3,78 litros e uma milha equivale a 1,6 quilômetros. +--- +title: Noções básicas de R com dados tabulares +layout: lesson +slug: nocoes-basicas-R-dados-tabulares +date: 2016-09-05 +translation_date: 2021-08-28 +authors: +- Taryn Dewar +reviewers: +- James Baker +- John Russell +editors: +- Adam Crymble +translator: +- Diana Rebelo Rodriguez +translation-editor: +- Jimmy Medeiros +translation-reviewer: +- Ivo Veiga +- Romulo Predes +difficulty: 1 +activity: transforming +topics: [data-manipulation, r] +abstract: "Esta lição ensina uma maneira de analisar rapidamente grandes volumes de dados tabulares, tornando a pesquisa mais rápida e eficaz." +review-ticket: https://github.com/programminghistorian/ph-submissions/issues/398 +original: r-basics-with-tabular-data +avatar_alt: Letra R ornamentada e ilustrada +doi: 10.46430/phpt0015 +--- + + + +{% include toc.html %} + +## Objetivos da lição + +À medida que mais e mais registros históricos são digitalizados, ter uma maneira de analisar rapidamente grandes volumes de dados tabulares torna a pesquisa mais rápida e eficaz. + +[R](https://pt.wikipedia.org/wiki/R_%28linguagem_de_programa%C3%A7%C3%A3o%29) é uma linguagem de programação com pontos fortes nas análises estatísticas. Como tal, ela pode ser usada para realizar análises quantitativas sobre fontes históricas, incluindo, mas não se limitando, a testes estatísticos. Como é possível executar repetidamente o mesmo código nas mesmas fontes, R permite analisar dados rapidamente e produz resultados que podem ser replicados. Além disso, como é possível salvar o código, R permite que se redirecionem ou revejam funções para projectos futuros, tornando-o uma parte flexível de sua caixa de ferramentas. + +Este tutorial não pressupõe nenhum conhecimento prévio do R. Ele percorrerá algumas das funções básicas do R e servirá como uma introdução à linguagem. Ele aborda o processo de instalação, explica algumas das ferramentas que se podem usar no R, bem como explica como trabalhar com conjuntos de dados enquanto se faz pesquisa. O tutorial fará isso através de uma série de mini-lições que mostrarão os tipos de fontes com as quais o R funciona bem e exemplos de como fazer cálculos para encontrar informações que possam ser relevantes à pesquisa histórica. A lição também abordará diferentes métodos de entrada de dados para R, tais como matrizes e o uso de ficheiros CSV. + +## Para quem isto é útil? + +R é ideal para analisar conjuntos de dados de grande dimensão que levariam muito tempo para serem processados manualmente. Depois de entendida a forma como se escrevem algumas funções básicas e como importar ficheiros de dados próprios, é possível analisar e visualizar os dados de forma rápida e eficiente. + +Embora R seja uma ótima ferramenta para dados tabulares, pode-se achar mais útil utilizar outras abordagens para analisar fontes não tabulares (tais como transcrições de jornais). Caso possua interesse em estudar estes tipos de fontes, dê uma olhada em algumas das outras grandes lições do [The Programming Historian](/pt/). + +## Instalar R + +R é uma linguagem de programação e um ambiente para trabalhar com dados. Ele pode ser executado utilizando o console de R, bem como no [command-line](/en/lessons/intro-to-bash) (linha de comandos) ou na interface [R Studio](https://www.rstudio.com/). Este tutorial irá focar no uso do console de R. Para começar com o R, baixe o programa do [The Comprehensive R Archive Network](https://cran.r-project.org/). R é compatível com Linux, Mac e Windows. + +Quando se abre o console de R pela primeira vez, a janela aberta se parece com essa: +![O console R no Mac.](/images/r-basics-with-tabular-data/Intro-to-R-1.png) + +## Usar o console de R + +O console R é um ótimo lugar para começar a trabalhar se quando se é inexperiente em R, porque ele foi projetado especificamente para esta linguagem e tem funções específicas para o R. + +O console é onde se digitam os comandos. Para limpar a tela inicial, vá para 'Edit' (editar) na barra de menu e selecione 'Clean Console’ (limpar console). Isto iniciará R com uma nova página. Também é possível mudar a aparência do console clicando na roda colorida no topo do console em um Mac, ou selecionando 'GUI Preferences' (preferências da Interface Gráfica do Usuário) no menu 'Edit' em um PC. Além disso, também é possível ajustar a cor da tela de fundo e as cores da fonte para as funções. + +## Usar conjuntos de dados + +Antes de trabalhar com dados próprios, usar os conjuntos de dados já incorporados ajuda a ter uma noção de como R funciona. É possível pesquisar nos conjuntos de dados inserindo data() no console. Isto mostrará a lista de todos os conjuntos de dados disponíveis em uma janela separada. Essa lista inclui os títulos de todos os diferentes conjuntos de dados, bem como uma breve descrição sobre as informações em cada um deles. + +No exemplo abaixo iremos primeiro carregar o conjunto de dados AirPassengers na sua sessão R digitando data(AirPassengers) na próxima linha do console^[1] e pressionando Enter. Para visualizar o conjunto de dados, digite apenas AirPassengers na próxima linha e pressione Enter novamente. Isso imprimirá uma tabela mostrando o número de passageiros que voaram em companhias aéreas internacionais entre janeiro de 1949 e dezembro de 1960, em milhares. Deverá aparecer o seguinte: + +``` +> data(AirPassengers) +> AirPassengers + Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec +1949 112 118 132 129 121 135 148 148 136 119 104 118 +1950 115 126 141 135 125 149 170 170 158 133 114 140 +1951 145 150 178 163 172 178 199 199 184 162 146 166 +1952 171 180 193 181 183 218 230 242 209 191 172 194 +1953 196 196 236 235 229 243 264 272 237 211 180 201 +1954 204 188 235 227 234 264 302 293 259 229 203 229 +1955 242 233 267 269 270 315 364 347 312 274 237 278 +1956 284 277 317 313 318 374 413 405 355 306 271 306 +1957 315 301 356 348 355 422 465 467 404 347 305 336 +1958 340 318 362 348 363 435 491 505 404 359 310 337 +1959 360 342 406 396 420 472 548 559 463 407 362 405 +1960 417 391 419 461 472 535 622 606 508 461 390 432 +``` + +Agora, é possível usar R para responder a uma série de perguntas com base nestes dados. Por exemplo, quais foram os meses mais populares para voar? Houve um aumento nas viagens internacionais ao longo do tempo? Provavelmente poderíamos encontrar as respostas a tais perguntas simplesmente escaneando esta tabela, mas não tão rapidamente quanto o computador. E se houvesse muito mais dados? + +## Funções básicas + +R pode ser usado para calcular uma série de valores que podem ser úteis enquanto se faz pesquisa em um conjunto de dados. Por exemplo, é possível encontrar a [média](https://pt.wikipedia.org/wiki/M%C3%A9dia), a [mediana](https://pt.wikipedia.org/wiki/Mediana_%28estat%C3%ADstica%29) e os valores mínimos e máximos. Para encontrar a média e a mediana no conjunto de dados, insere-se, respectivamente, mean(AirPassengers) e median(AirPassengers) no console. E se quisermos calcular mais de um valor de cada vez? Para produzir um resumo dos dados, digite summary(AirPassengers) (resumo) no console. Isto dará os valores mínimo e máximo dos dados, assim como a média, a mediana e os valores do primeiro e terceiro quartil. + +``` +> summary(AirPassengers) + Min. 1st Qu. Median Mean 3rd Qu. Max. + 104.0 180.0 265.5 280.3 360.5 622.0 +``` + +Um resumo nos mostra que o número mínimo de passageiros entre janeiro de 1949 e dezembro de 1960 foi de 104.000 e que o número máximo de passageiros foi de 622.000. O valor médio nos mostra que aproximadamente 280.300 pessoas viajavam por mês durante o período de coleta dos dados. Estes valores podem ser úteis para ver o grau de variação no número de passageiros ao longo do tempo. + +Usar a função summary() é uma boa maneira de se obter uma visão geral de todo o conjunto de dados. No entanto, e se quisermos analisar um subconjunto de dados, como um determinado ano ou alguns meses? É possível selecionar diferentes pontos de dados (como um determinado mês) e intervalos (como um determinado ano) em R para calcular muitos valores diferentes. Por exemplo, pode-se adicionar o número de passageiros durante dois meses para determinar o número total de passageiros durante esse período de tempo. + +Tente adicionar os dois primeiros valores dos dados AirPassengers no console e, em seguida, pressione 'Enter'. Devem aparecer duas linhas assim: + +``` +> 112+118 +[1] 230 +``` + +Isto lhe daria o número total de passageiros (em centenas de milhares) que voaram em janeiro e fevereiro de 1949. + +R pode fazer muito mais do que simples aritmética. É possível criar objetos, ou [variáveis](https://pt.wikipedia.org/wiki/Vari%C3%A1vel_%28programa%C3%A7%C3%A3o%29), para representar números e [expressões](https://pt.wikipedia.org/wiki/Express%C3%A3o_%28computa%C3%A7%C3%A3o%29). Por exemplo, pode-se nomear o valor de janeiro de 1949 como variável Jan1949. DigiteJan1949 <- 112 no console e, em seguida, Jan1949 na linha seguinte. A notação <- atribui o valor 112 à variável Jan1949. O que deve aparecer é: + +``` +> Jan1949 <- 112 +> Jan1949 +[1] 112 +``` + +R é sensível a maiúsculas e minúsculas, portanto tenha cuidado para usar a mesma notação quando usar as variáveis que foram atribuídas (ou nomeadas) em outras ações. Veja o artigo de Rasmus Bååth, [The State of Naming Conventions in R](https://journal.r-project.org/archive/2012-2/RJournal_2012-2_Baaaath.pdf) (em inglês), para mais informações sobre como nomear melhor as variáveis. + +Para remover uma variável do console, digite rm() (*remove* ou apagar) com a variável da qual se deseja apagar dos parênteses, e pressione Enter. Para ver todas as variáveis atribuídas, digite ls() (*list objects* ou lista de objetos) no console e pressione Enter. Isto pode ajudar a evitar o uso do mesmo nome para múltiplas variáveis. Isto também é importante porque R armazena todos os objetos que são criados em sua memória, portanto, mesmo que não se consiga ver uma variável nomeada x no console, ela pode ter sido criada antes e acidentalmente poderia sobrescrevê-la ao atribuir outra variável. + +Aqui está a lista de variáveis que criamos até agora: + +``` +> ls() +[1] "AirPassengers" "Jan1949" +``` + +Temos as variáveis AirPassengers e Jan1949. Se removermos a variável Jan1949 e digitarmos novamente ls(), veremos: + +``` +> rm(Jan1949) +> ls() +[1] "AirPassengers" +``` + +Se a qualquer momento não conseguir corrigir um erro ou ficar preso a uma função, digite help() no console para abrir a página de ajuda. Também é possível encontrar ajuda geral usando o menu ‘Help’ na parte superior do console. Se quiser mudar algo no código que já escreveu, pode-se digitar novamente o código em uma nova linha. Para economizar tempo, também é possível usar as setas do teclado para rolar para cima e para baixo no console para encontrar a linha de código que se deseja mudar. + +É possível usar letras como variáveis, mas quando começar a trabalhar com seus próprios dados, pode ser mais fácil atribuir nomes que sejam mais representativos desses dados. Mesmo com os dados AirPassengers, atribuir variáveis que se correlacionam com meses ou anos específicos tornaria mais fácil saber exatamente com quais valores se está trabalhando. + +### Prática + +A. Atribuir os valores de janeiro de 1950 e janeiro de 1960 dos dados de AirPassengers() em dois objetos novos. Em seguida, somar os valores dos dois objetos criados em uma nova linha de código. + +B. Usar os dois objetos criadas para encontrar a diferença entre os viajantes aéreos de janeiro de 1960 e de 1950. + +### Soluções + +A. Atribuir variáveis para os pontos de janeiro de 1950 e janeiro de 1960 dos dados de AirPassengers(). Adicionar as duas variáveis juntas na linha seguinte. + +``` +> Jan1950<- 115 +> Jan1960<- 417 +> Jan1950+Jan1960 +[1] 532 +``` + +Isto significa que 532.000 pessoas viajaram em voos internacionais em janeiro de 1950 e janeiro de 1960. + +B. Usar as variáveis que foram criadas para encontrar a diferença entre os viajantes aéreos de 1960 e 1950. + +``` +> Jan1960-Jan1950 +[1] 302 +``` + +Isto significa que, em janeiro de 1960, mais 302.000 pessoas viajaram em voos internacionais do que em janeiro de 1950. + +Definir variáveis para pontos de dados individuais pode ser entediante, especialmente se os nomes atribuídos são bastante longos. Entretanto, o processo é semelhante para atribuir um intervalo de valores a uma variável, como todos os pontos de dados durante um ano. Fazemos isso criando listas chamadas ‘vetores’ usando o comando c. c significa ‘combinar’ e nos permite vincular números em uma variável comum. Por exemplo, pode-se criar um vetor para os dados AirPassengers() de 1949 nomeado Air49: + +``` +> Air49<- c(112,118,132,129,121,135,148,148,136,119,104,118) +``` + +Cada item é acessível usando o nome da variável e sua posição no índice (a partir de 1). Neste caso, Air49[2] contém o valor que corresponde a fevereiro de 1949 - 118. + +``` +> Air49[2] +[1] 118 +``` + +É possível criar uma lista de valores consecutivos usando dois pontos. Por exemplo: + +``` +> y <- 1:10 +> y +[1] 1 2 3 4 5 6 7 8 9 10 +``` + +Usando este conhecimento, podemos usar a seguinte expressão para definir uma variável para os dados AirPassengers de 1949. + +``` +> Air49 <- AirPassengers[1:12] +> Air49 + [1] 112 118 132 129 121 135 148 148 136 119 104 118 +``` + +Air49[2] selecionou os primeiros doze termos no conjunto de dados AirPassengers. Isto dá o mesmo resultado que acima, mas leva menos tempo e também reduz a chance de que um valor seja transcrito incorretamente. + +Para obter o número total de passageiros para 1949, é possível somar todos os termos no vetor, usando a função sum() (somar). + +``` +> sum(Air49) +[1] 1520 +``` + +Portanto, o número total de passageiros em 1949 era de aproximadamente 1.520.000. + +Finalmente, a função length() (comprimento) torna possível saber o número de objetos em um vetor: + +``` +> length(Air49) +[1] 12 +``` + +### Prática + +1. Criar uma variável para os dados AirPassengers de 1950. +2. Imprimir ou apresentar o segundo objeto da série de 1950. +3. Qual é o tamanho (*length*) da sequência na pergunta 2? +4. Quantos passageiros voaram no total em 1950? + +### Soluções + +1. +``` +> Air50 <- AirPassengers[13:24] +Air50 +[1] 115 126 141 135 125 149 170 170 158 133 114 140 +``` + +2. +``` +> Air50[2] +[1] 126 +``` + +3. +``` +> length(Air50) +[1] 12 +``` + +4. +``` +>sum(Air50) +[1] 1676 +``` + +Caso se quisesse criar variáveis para todos os anos no conjunto de dados, seria possível então usar algumas das ferramentas que examinamos para determinar o número de pessoas que viajam de avião ao longo do tempo. Aqui está uma lista de variáveis para 1949 a 1960, seguida pelo número total de passageiros para cada ano: + +``` +> Air49 <- AirPassengers[1:12] +Air50 <- AirPassengers[13:24] +Air51 <- AirPassengers[25:36] +Air52 <- AirPassengers[37:48] +Air53 <- AirPassengers[49:60] +Air54 <- AirPassengers[61:72] +Air55 <- AirPassengers[73:84] +Air56 <- AirPassengers[85:96] +Air57 <- AirPassengers[97:108] +Air58 <- AirPassengers[109:120] +Air59 <- AirPassengers[121:132] +Air60 <- AirPassengers[133:144] +``` + +``` +> sum(Air49) +[1] 1520 +sum(Air50) +[1] 1676 +sum(Air51) +[1] 2042 +sum(Air52) +[1] 2364 +sum(Air53) +[1] 2700 +sum(Air54) +[1] 2867 +sum(Air55) +[1] 3408 +sum(Air56) +[1] 3939 +sum(Air57) +[1] 4421 +sum(Air58) +[1] 4572 +sum(Air59) +[1] 5140 +sum(Air60) +[1] 5714 +``` + +A partir destas informações, podemos ver que o número de passageiros aumenta a cada ano. É possível ir mais longe com estes dados para determinar se havia um interesse crescente em férias em certos períodos do ano, ou mesmo o aumento percentual de passageiros ao longo do tempo. + +## Trabalhar com bases de dados maiores + +Note que o exemplo acima não é bem adequado para conjuntos de dados de grande dimensão: contar pontos de dados para encontrar os corretos seria muito entediante. Pense no que aconteceria se procurássemos informações do ano 96 em um conjunto de dados com 150 anos de dados coletados. + +É possível selecionar linhas e colunas específicas de dados se o conjunto de dados estiver em um formato particular. Carregue os dados de mtcars em seu console: + +``` +> data(mtcars) +> mtcars + mpg cyl disp hp drat wt qsec vs am gear carb +Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4 +Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4 +Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1 +Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 +Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 +Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1 +Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4 +Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 +Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2 +Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4 +Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4 +Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3 +Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3 +Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3 +Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4 +Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4 +Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4 +Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1 +Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2 +Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1 +Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1 +Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2 +AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2 +Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4 +Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2 +Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1 +Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2 +Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2 +Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4 +Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6 +Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8 +Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2 +``` + +Este [conjunto de dados](https://stat.ethz.ch/R-manual/R-devel/library/datasets/html/mtcars.html) fornece uma visão geral dos *Motor Trend Car Road Tests* de automóveis da revista Motor Trend de 1974[^2]. Ele contém informações sobre quantas milhas por galão ou quilômetros por litro um carro poderia percorrer[^3], o número de cilindros do motor em cada carro, potência, relação do eixo traseiro, peso, e outras características de cada modelo. Os dados poderão ser usados para descobrir qual destas características tornou cada tipo de carro mais ou menos seguro para os passageiros ao longo do tempo. + +É possível selecionar colunas inserindo o nome do conjunto de dados seguido por colchetes e o número da linha ou coluna de dados que lhe interessa. Para ordenar as linhas e colunas, pense no dataset[x,y], sendo dataset o conjunto de dados com o qual se está trabalhando, x a linha e y a coluna. + +Se estivesse interessado na primeira linha de informações no conjunto mtcars, deveria executar o seguinte em seu console: + +``` +> mtcars[1,] + mpg cyl disp hp drat wt qsec vs am gear carb +Mazda RX4 21 6 160 110 3.9 2.62 16.46 0 1 4 4 +``` + +Para ver uma coluna dos dados, podemos digitar: + +``` +> mtcars[,2] + [1] 6 6 4 6 8 6 8 4 4 6 6 8 8 8 8 8 8 4 4 4 4 8 8 8 8 4 4 4 8 6 8 4 +``` + +Isto mostra todos os valores sob a categoria cyl (cilindrada). A maioria dos modelos de carros tem motores de 4, 6 ou 8 cilindros. Também é possível selecionar pontos de dados individuais inserindo valores tanto para x (linha) quanto para y (coluna): + +``` + > mtcars[1,2] +[1] 6 +``` + +Isto retorna o valor na primeira linha, segunda coluna. A partir daqui, seria possível executar um resumo em uma linha ou coluna de dados sem ter que contar o número de termos no conjunto de dados. Por exemplo, digitar summary(mtcars[,1]) no console e pressionar 'Enter' daria o resumo para as milhas por galão que os diferentes carros no conjunto de dados mtcars usam: + +``` +> summary(mtcars[,1]) + Min. 1st Qu. Median Mean 3rd Qu. Max. + 10.40 15.42 19.20 20.09 22.80 33.90 +``` + +O resumo indica que a eficiência máxima de combustível foi de 33,9 milhas por galão ou 54.5 quilômetros por 3.78 litros, do Toyota Corolla e o menos eficiente foi o Lincoln Continental, que só conseguiu 10,4 milhas por galão, ou seja, 16.7 quilômetros por 3.78 litros. Podemos encontrar os carros que correspondem aos pontos de valor olhando de volta para a tabela. É muito mais fácil encontrar um valor específico do que tentar fazer as contas em sua cabeça ou pesquisar através de uma planilha. + +## Matrizes + +Agora que temos uma melhor compreensão de como algumas das funções básicas em R funcionam, podemos analisar maneiras de usar essas funções em nossos próprios dados. Isto inclui a construção de [matrizes](https://pt.wikipedia.org/wiki/Matriz_%28matem%C3%A1tica%29) usando pequenos conjuntos de dados. O benefício de saber como construir matrizes em R é que se tivermos apenas alguns pontos de dados para trabalhar, poderíamos simplesmente criar uma matriz em vez de um CSV que precisaria ser depois importado. Uma das maneiras mais simples de construir uma matriz é criar pelo menos duas variáveis ou vetores e depois ligá-los entre si. Por exemplo, vejamos alguns dados do [Old Bailey](https://pt.wikipedia.org/wiki/Old_Bailey) (o Tribunal Penal Central da Inglaterra e do País de Gales): + + +![Conjunto de dados criminais do [The Old Bailey](https://www.oldbaileyonline.org/) nas décadas entre 1670 e 1800.](/images/r-basics-with-tabular-data/Intro-to-R-2.png) + + +O Old Bailey contém estatísticas e informações sobre casos criminais entre 1674 e 1913 que foram mantidos pelo Tribunal Penal Central de Londres. Se quiséssemos analisar o número total de crimes de roubo e furto violento entre 1670 e 1710, poderíamos colocar esses números em uma matriz. + +Para isso, vamos criar as variáveis Roubos e RoubosViolentos usando os totais de cada década como pontos de dados: + +``` +> Roubos <- c(2,30,38,13) +RoubosViolentos <- c(7,20,36,3) +``` + +Para criar uma matriz podemos usar a função cbind() (*column bind* ou união de colunas). Isto une Roubos e RoubosViolentos em colunas, representadas como Crime aqui: + +``` +> Roubos <- c(2,30,38,13) +RoubosViolentos <- c(7,20,36,3) +Crime <- cbind(Roubos,RoubosViolentos) +Crime + Roubos RoubosViolentos +[1,] 2 7 +[2,] 30 20 +[3,] 38 36 +[4,] 13 3 +``` + +Também é possível estabelecer uma matriz usando rbind(). rbind() une os dados em fileiras (*row bind* ou união de fileiras). Observe a diferença entrenCrime e Crime2: + +``` +> Crime2 <- rbind(Roubos,RoubosViolentos) +> Crime2 + [,1] [,2] [,3] [,4] +Roubos 2 30 38 13 +RoubosViolentos 7 20 36 3 +``` + +A segunda matriz também pode ser criada usando a expressão t(Crime) (matriz transposta), que gera o inverso de Crime. + +Também é possível construir uma matriz utilizando matrix(). Isto permite transformar uma sequência de números, como o número de roubos e roubos violentos cometidos, em uma matriz se não tiver criado variáveis separadas para estes valores: + +``` +> matrix(c(2,30,3,4,7,20,36,3),nrow=2) + [,1] [,2] [,3] [,4] +[1,] 2 3 7 36 +[2,] 30 4 20 3 +``` + +``` +[2,] 30 4 20 3 +> matrix(c(2,30,3,4,7,20,36,3),ncol=2) + [,1] [,2] +[1,] 2 7 +[2,] 30 20 +[3,] 3 36 +[4,] 4 3 +``` + +A primeira parte da função é a lista de números. Depois disso, é possível determinar quantas linhas (nrow=) (número de linhas) ou colunas (ncol=) (número de colunas) a matriz terá. + +A função apply() permite executar a mesma função em cada linha ou coluna de uma matriz. Existem três partes da função apply(), nas quais é preciso selecionar: a matriz que está sendo utilizada, os termos que se deseja usar e a função que se deseja executar na matriz: + +``` +> Crime + Roubos RoubosViolentos +[1,] 2 7 +[2,] 30 20 +[3,] 38 36 +[4,] 13 3 +> apply(Crime,1,mean) +[1] 4.5 25.0 37.0 8.0 +``` + +Este exemplo mostra a função apply utilizada na matriz Crime para calcular a média (*mean*) de cada linha e, portanto, o número médio de roubos e assaltos combinados que foram cometidos em cada década. Se quiser saber a média de cada coluna, use um 2 em vez de um 1 dentro da função: + +``` +> apply(Crime,2,mean) + Roubos RoubosViolentos + 20.75 16.50 +``` + +Isto mostra o número médio de roubos e assaltos entre as décadas. + +### Prática + +1. Criar uma matriz de duas colunas usando os seguintes dados de Quebra da Paz (*Breaking Peace*) e Assassinatos (*Killing*) de 1710 a 1730 da tabela acima do Old Bailey: c(2,3,3,44,51,17) + +2. Usar a função cbind() para juntar QuebraPaz <- c(2,3,3) e Assassinatos <- c(44,51,17). + +3. Calcular a média de cada coluna para a matriz acima usando a função apply(). + +### Soluções + +1. +``` +> matrix(c(2,3,3,44,51,17),ncol=2) + [,1] [,2] +[1,] 2 44 +[2,] 3 51 +[3,] 3 17 +``` + +2. +``` +> QuebraPaz <- c(2,3,3) +> Assassinatos <- c(44,51,17) +> PazAssassinatos <- cbind(QuebraPaz,Assassinatos) +> PazAssassinatos + QuebraPaz Assassinatos +[1,] 2 44 +[2,] 3 51 +[3,] 3 17 +``` + +3. +``` +> apply(PazAssassinatos,2,mean) +> QuebraPaz Assassinatos +> 2.666667 37.333333 +``` + +Matrizes podem ser úteis quando se está trabalhando com pequenas quantidades de dados. No entanto, nem sempre é a melhor opção, porque uma matriz pode ser difícil de ler. Às vezes é mais fácil criar seu próprio ficheiro usando um programa de planilhas como [Excel](https://pt.wikipedia.org/wiki/Microsoft_Excel) ou [Open Office](https://www.openoffice.org/pt/) para garantir que todas as informações que deseja estudar estejam organizadas e importar esse ficheiro para o R. + +## Carregar seu próprio conjunto de dados em R + +Agora que já praticou com dados simples, pode trabalhar com seus próprios dados. Como trabalhar com esses dados em R? Há várias maneiras de se fazer isso. A primeira é carregar a planilha diretamente em R. Outra maneira é importar um ficheiro CSV (*comma-separated values* ou valores separados por vírgula) ou TXT (de texto) para R. + +Para carregar um ficheiro Excel diretamente no console R, é necessário primeiro instalar o pacote readxl (ler o ficheiro Excel). Para fazer isto, digite install.packages("readxl") no console e pressione Enter. Pode ser que seja necessário verificar se o pacote foi instalado no console clicando na guia “Packages&Data” (pacotes e dados) no menu, selecionando “Package Manager” (gerenciador de pacotes) e depois clicando na caixa ao lado do pacote readxl. A partir daqui, é possível selecionar um ficheiro e carregá-lo em R. Abaixo está um exemplo de como pode parecer carregar um simples ficheiro Excel: + +``` +> x <- read_excel("Workbook2.xlsx") +> x + a b +1 1 5 +2 2 6 +3 3 7 +4 4 8 +``` + +Após o comando read_excel insere-se o nome do ficheiro que está sendo selecionado. Os números embaixo correspondem aos dados da planilha de amostra que utilizei. Observe como as linhas estão numeradas e as colunas estão etiquetadas como eram na planilha original. + +Quando estiver carregando dados em R, certifique-se de que o ficheiro que está sendo acessado esteja dentro do diretório em seu computador de onde se está trabalhando. Para verificar isso, digite dir() (diretório) ou getwd() (mostrar o caminho do diretório de trabalho) no console. É possível mudar o diretório, se necessário, indo para a aba “Miscellaneous” (diversos) na barra de título em sua tela e, em seguida, selecionando o que se quer definir como diretório para R. Se não fizer isso, R não será capaz de encontrar o ficheiro corretamente. + +Outra maneira de carregar dados em R é usar um ficheiro CSV. Um ficheiro [CSV](https://pt.wikipedia.org/wiki/Comma-separated_values) exibe valores em filas e colunas, separados por vírgulas. É possível salvar qualquer documento criado no Excel como um ficheiro .csv e depois carregá-lo em R. Para usar um ficheiro CSV em R, nomeie o ficheiro usando o comando <- e depois digite read.csv(file="file-name.csv",header=TRUE,sep=",") no console. file-name indica ao R qual ficheiro selecionar, enquanto que definir o cabeçalho ou header= (o ficheiro equivale a), para TRUE (verdadeiro) diz que a primeira linha são cabeçalhos e não variáveis. sep significa que há uma vírgula entre cada número e linha. + +Normalmente, um CSV pode conter muitas informações. Entretanto, para começar, tente criar um ficheiro CSV em Excel usando os dados do *Old Bailey* que usamos para as matrizes. Defina as colunas para as datas entre 1710 e 1730, mais o número de violações de crimes de paz e assassinatos para aquelas décadas. Salve o ficheiro como "OldBailey.csv" e tente carregá-lo em R usando os passos acima. Veremos que: + +``` +> read.csv (file="OldBailey.csv", header=TRUE, sep=",") +Date QuebraPaz Assassinatos +1 1710 2 44 +2 1720 3 51 +3 1730 4 17 +``` + +Agora poderíamos acessar os dados em R e fazer quaisquer cálculos para ajudá-lo a estudar os dados. Os ficheiros CSV também podem ser muito mais complexos do que este exemplo, portanto, qualquer conjunto de dados com os quais trabalhamos em estudos próprios também poderia ser aberto em R. + +TXT (ou ficheiros de texto) podem ser importados para R de maneira semelhante. Usando o comando read.table(), é possível carregar ficheiros de texto em R, seguindo a mesma sintaxe que no exemplo acima. + +## Salvar dados en R + +Agora que carregamos dados em R e conhecemos algumas maneiras de trabalhar com os dados, o que acontece se quisermos salvá-los em outro formato? A função write.xlsx() permite que se faça exatamente isso - pegar os dados de R e salvá-los em um ficheiro Excel. Tente escrever o ficheiro do *Old Bailey* em um ficheiro Excel. Primeiro, será necessário carregar o pacote e depois será possível criar o ficheiro após criar uma variável para os dados do *Old Bailey*: + +``` +> library(xlsx) +> write.xlsx(x= OldBailey, file= "OldBailey.xlsx", sheetName= "OldBailey", row.names= TRUE) +``` + +Neste caso, e dentro do parêntese desta função [write.xlsx](https://www.rdocumentation.org/packages/xlsx/versions/0.6.5), estamos chamando para processar a variável "OldBailey" com o argumento x= . Ao mesmo tempo, indicamos que o ficheiro salvo deve ser chamado “OldBailey” com a extensão “.xlsx” com o argumento file= . Além disso, damos o nome "OldBailey" à planilha onde estarão os dados com sheetName= . Finalmente, estabelecemos que queremos (TRUE ou verdadeiro) que os nomes da linha em nossa variável sejam salvos no novo ficheiro. [N. da T.] + +## Resumo e passos seguintes + +Este tutorial explorou as bases do uso de R para trabalhar com dados de pesquisa tabular. O R pode ser uma ferramenta muito útil para a pesquisa em ciências humanas e sociais porque a análise de dados é reprodutível e permite analisar dados rapidamente sem ter que montar um sistema complicado. Agora que conhece alguns dos conceitos básicos do R, pode-se explorar algumas das outras funções do programa, incluindo cálculos estatísticos, produção de gráficos e criação de suas próprias funções. + +Para mais informações sobre o R, visite o [R Manual](https://cran.r-project.org/doc/manuals/r-release/R-intro.html) (em inglês). + +Há também uma série de outros tutoriais de R online, inclusive: + +* [R: A self-learn tutorial](https://web.archive.org/web/20191015004305/https://www.nceas.ucsb.edu/files/scicomp/Dloads/RProgramming/BestFirstRTutorial.pdf) (em inglês) - este tutorial passa por uma série de funções e fornece exercícios para praticar competências. + +* [DataCamp Introduction to R](https://www.datacamp.com/courses/free-introduction-to-r) - este é um curso online gratuito que lhe dá feedback sobre seu código para ajudar a identificar erros e aprender como escrever código de forma mais eficiente. + +Finalmente, um grande recurso para historiadores digitais é o [Lincoln Mullen’s Digital History Methods in R](https://dh-r.lincolnmullen.com/). É um rascunho de um livro escrito especificamente sobre como usar R para o trabalho de história digital. + +## Notas + +[^1]: Box, G. E. P., Jenkins, G. M. e Reinsel, G. C. (1976), Time Series Analysis, Forecasting and Control. Third Edition. Holden-Day. Series G. +[^2]: Henderson e Velleman (1981), Building multiple regression models interactively. Biometrics, 37, 391Ð411. +[^3]: Nota da tradutora: Um galão equivale a 3,78 litros e uma milha equivale a 1,6 quilômetros. diff --git a/pt/licoes/nocoes-basicas-paginas-web-html.md b/pt/licoes/nocoes-basicas-paginas-web-html.md index e355644598..0ab6ab3380 100644 --- a/pt/licoes/nocoes-basicas-paginas-web-html.md +++ b/pt/licoes/nocoes-basicas-paginas-web-html.md @@ -1,121 +1,121 @@ ---- -title: Noções básicas de páginas web e HTML -layout: lesson -slug: nocoes-basicas-paginas-web-html -date: 2012-07-17 -translation_date: 2021-05-12 -authors: -- William J. Turkel -- Adam Crymble -reviewers: -- Jim Clifford -- Amanda Morton -editors: -- Miriam Posner -translator: -- Aracele Torres -translation-editor: -- Danielle Sanches -translation-reviewer: -- Bruno Martins -- Rômulo Predes -difficulty: 2 -review-ticket: https://github.com/programminghistorian/ph-submissions/issues/318 -activity: presenting -topics: [python] -abstract: "Esta lição é uma introdução ao HTML e às páginas da web que ele estrutura." -next: trabalhando-ficheiros-texto-python -previous: introducao-e-instalacao -original: viewing-html-files -avatar_alt: Uma mulher ouvindo um homem através de uma trombeta de ouvido -doi: 10.46430/phpt0002 ---- - -{% include toc.html %} - - - - -## Visualizando arquivos HTML - -Quando você está trabalhando com fontes online, na maior parte do tempo utiliza -ficheiros contendo anotações em HTML (Hyper Text Markup Language). O seu navegador web já -sabe como interpretar HTML, apresentando a informação de uma forma adequada para leitores humanos. -A maioria dos navegadores também permite que você veja o *código-fonte* HTML de qualquer página que você visitar. -As duas imagens abaixo mostram uma página web típica (do *Old Bailey Online*) e o código -HTML usado para gerar essa página, que você pode ver com a opção do menu do Firefox -`Abrir menu -> Desenvolvimento web -> Código-fonte da página`. - -Quando você está trabalhando no navegador, normalmente não precisa (ou quer) ver o código-fonte HTML de uma página da web. -No entanto, se você está criando uma página própria, pode ser muito útil ver como outras pessoas realizaram um -determinado efeito. Você também vai querer estudar o código HTML enquanto escreve -programas para manipular páginas da web ou extrair informação automaticamente delas. - -{% include figure.html filename="obo.png" caption="Captura de tela do Old Bailey Online" %} - -{% include figure.html filename="obo-page-source.png" caption="Código HTML da página Old Bailey Online" %} - -(Para aprender mais sobre HTML, você pode achar útil nesse momento usar o [W3 Schools HTML Tutorial][]. Um conhecimento detalhado de HTML não é necessário para continuar lendo, mas qualquer tempo que você passe aprendendo HTML será amplamente recompensado no seu trabalho como historiador digital ou humanista digital.) - -## "Olá mundo" em HTML - -A HTML é conhecida como uma linguagem de *marcação*. Em outras palavras, HTML é o texto que foi "marcado" (i.e., anotado), com *tags* que fornecem informações para o interpretador (que geralmente é um navegador web). Suponha que está formatando uma entrada bibliográfica e quer indicar o título de um trabalho, colocando-o em itálico. Em HTML, pode utilizar tags `em` ("em" significa ênfase) para este efeito. Portanto, parte do seu ficheiro HTML pode ter a seguinte aparência: - -``` xml -... em Digital History de Cohen e Rosenzweig, por exemplo ... -``` - -O ficheiro HTML mais simples consiste em *tags* que indicam o início e o fim de todo o documento, e *tags* que identificam um `head` e um `body` dentro desse documento. A informação descritiva (i.e., os "meta-dados") sobre o ficheiro geralmente vai para o cabeçalho, enquanto que a informação que será exibida ao leitor humano geralmente vai para o corpo. - -``` xml - - -Olá mundo! - -``` - -Você pode tentar criar algum código HTML. Com o seu editor de texto, crie um novo ficheiro. Copie o código abaixo no editor. A primeira linha diz ao navegador qual o tipo do ficheiro. A *tag* `html` tem a direção do texto definida como `ltr` (da esquerda para a direita), e ainda a propriedade `lang` (idioma) definida como português. A *tag* `title` no cabeçalho do documento HTML contém informação que geralmente é exibida na barra superior de uma janela quando a página está sendo visualizada, e nas abas do Firefox. - - -``` xml - - - - - <!-- Insira seu título aqui --> - - - - - - -``` - -Altere - -``` xml - -``` - -e - -``` xml - -``` - -para - -``` xml -Olá mundo! -``` - -Guarde o ficheiro num diretório `programming-historian` como `ola-mundo.html`. De seguida, vá para o Firefox e escolha `Abrir menu -> Abrir ficheiro...` e -então escolha `ola-mundo.html`. Dependendo do seu editor de texto, você pode ter a opção 'visualizar página no navegador' ou 'abrir no navegador'. Depois de abrir o ficheiro, a sua mensagem deve aparecer no navegador. Observe a diferença entre abrir um ficheiro HTML com um navegador como o Firefox (que o interpreta), ou abrir o mesmo ficheiro com seu editor de texto (que não faz o mesmo). - -## Leituras sugeridas para aprender HTML - -- [W3 Schools HTML Tutorial][] -- [W3 Schools HTML5 Tutorial][] - - [W3 Schools HTML tutorial]: http://www.w3schools.com/html/default.asp - [W3 Schools HTML5 Tutorial]: http://www.w3schools.com/html/html5_intro.asp +--- +title: Noções básicas de páginas web e HTML +layout: lesson +slug: nocoes-basicas-paginas-web-html +date: 2012-07-17 +translation_date: 2021-05-12 +authors: +- William J. Turkel +- Adam Crymble +reviewers: +- Jim Clifford +- Amanda Morton +editors: +- Miriam Posner +translator: +- Aracele Torres +translation-editor: +- Danielle Sanches +translation-reviewer: +- Bruno Martins +- Rômulo Predes +difficulty: 2 +review-ticket: https://github.com/programminghistorian/ph-submissions/issues/318 +activity: presenting +topics: [python] +abstract: "Esta lição é uma introdução ao HTML e às páginas da web que ele estrutura." +next: trabalhando-ficheiros-texto-python +previous: introducao-e-instalacao +original: viewing-html-files +avatar_alt: Uma mulher ouvindo um homem através de uma trombeta de ouvido +doi: 10.46430/phpt0002 +--- + +{% include toc.html %} + + + + +## Visualizando arquivos HTML + +Quando você está trabalhando com fontes online, na maior parte do tempo utiliza +ficheiros contendo anotações em HTML (Hyper Text Markup Language). O seu navegador web já +sabe como interpretar HTML, apresentando a informação de uma forma adequada para leitores humanos. +A maioria dos navegadores também permite que você veja o *código-fonte* HTML de qualquer página que você visitar. +As duas imagens abaixo mostram uma página web típica (do *Old Bailey Online*) e o código +HTML usado para gerar essa página, que você pode ver com a opção do menu do Firefox +`Abrir menu -> Desenvolvimento web -> Código-fonte da página`. + +Quando você está trabalhando no navegador, normalmente não precisa (ou quer) ver o código-fonte HTML de uma página da web. +No entanto, se você está criando uma página própria, pode ser muito útil ver como outras pessoas realizaram um +determinado efeito. Você também vai querer estudar o código HTML enquanto escreve +programas para manipular páginas da web ou extrair informação automaticamente delas. + +{% include figure.html filename="obo.png" caption="Captura de tela do Old Bailey Online" %} + +{% include figure.html filename="obo-page-source.png" caption="Código HTML da página Old Bailey Online" %} + +(Para aprender mais sobre HTML, você pode achar útil nesse momento usar o [W3 Schools HTML Tutorial][]. Um conhecimento detalhado de HTML não é necessário para continuar lendo, mas qualquer tempo que você passe aprendendo HTML será amplamente recompensado no seu trabalho como historiador digital ou humanista digital.) + +## "Olá mundo" em HTML + +A HTML é conhecida como uma linguagem de *marcação*. Em outras palavras, HTML é o texto que foi "marcado" (i.e., anotado), com *tags* que fornecem informações para o interpretador (que geralmente é um navegador web). Suponha que está formatando uma entrada bibliográfica e quer indicar o título de um trabalho, colocando-o em itálico. Em HTML, pode utilizar tags `em` ("em" significa ênfase) para este efeito. Portanto, parte do seu ficheiro HTML pode ter a seguinte aparência: + +``` xml +... em Digital History de Cohen e Rosenzweig, por exemplo ... +``` + +O ficheiro HTML mais simples consiste em *tags* que indicam o início e o fim de todo o documento, e *tags* que identificam um `head` e um `body` dentro desse documento. A informação descritiva (i.e., os "meta-dados") sobre o ficheiro geralmente vai para o cabeçalho, enquanto que a informação que será exibida ao leitor humano geralmente vai para o corpo. + +``` xml + + +Olá mundo! + +``` + +Você pode tentar criar algum código HTML. Com o seu editor de texto, crie um novo ficheiro. Copie o código abaixo no editor. A primeira linha diz ao navegador qual o tipo do ficheiro. A *tag* `html` tem a direção do texto definida como `ltr` (da esquerda para a direita), e ainda a propriedade `lang` (idioma) definida como português. A *tag* `title` no cabeçalho do documento HTML contém informação que geralmente é exibida na barra superior de uma janela quando a página está sendo visualizada, e nas abas do Firefox. + + +``` xml + + + + + <!-- Insira seu título aqui --> + + + + + + +``` + +Altere + +``` xml + +``` + +e + +``` xml + +``` + +para + +``` xml +Olá mundo! +``` + +Guarde o ficheiro num diretório `programming-historian` como `ola-mundo.html`. De seguida, vá para o Firefox e escolha `Abrir menu -> Abrir ficheiro...` e +então escolha `ola-mundo.html`. Dependendo do seu editor de texto, você pode ter a opção 'visualizar página no navegador' ou 'abrir no navegador'. Depois de abrir o ficheiro, a sua mensagem deve aparecer no navegador. Observe a diferença entre abrir um ficheiro HTML com um navegador como o Firefox (que o interpreta), ou abrir o mesmo ficheiro com seu editor de texto (que não faz o mesmo). + +## Leituras sugeridas para aprender HTML + +- [W3 Schools HTML Tutorial][] +- [W3 Schools HTML5 Tutorial][] + + [W3 Schools HTML tutorial]: https://www.w3schools.com/html/default.asp + [W3 Schools HTML5 Tutorial]: https://www.w3schools.com/html/html5_intro.asp diff --git a/pt/licoes/normalizacao-dados-textuais-python.md b/pt/licoes/normalizacao-dados-textuais-python.md index b83e651b25..1782f21b0e 100644 --- a/pt/licoes/normalizacao-dados-textuais-python.md +++ b/pt/licoes/normalizacao-dados-textuais-python.md @@ -1,158 +1,158 @@ ---- -title: Normalização de Dados Textuais com Python -layout: lesson -collection: lessons -slug: normalizacao-dados-textuais-python -date: 2012-07-17 -translation_date: 2022-10-27 -authors: -- William J. Turkel -- Adam Crymble -reviewers: -- Jim Clifford -- Frederik Elwert -editors: -- Miriam Posner -translator: -- Felipe Lamarca -translation-editor: -- Jimmy Medeiros -translation-reviewer: -- André Salvo -- Gabriela Kucuruza -difficulty: 2 -review-ticket: https://github.com/programminghistorian/ph-submissions/issues/460 -activity: transforming -topics: [python] -abstract: "Nesta lição tornará a lista criada na lição 'De HTML para Lista de Palavras' mais fácil de ser analisada através da normalização desses dados." -original: normalizing-data -avatar_alt: Mulher alta a arrastar um jovem baixo -doi: 10.46430/phpt0029 ---- - - -{% include toc.html %} - -
    -O site do Old Bailey Online foi recentemente atualizado. Infelizmente, devido às diversas mudanças, muitos (se não todos) os elementos do site de exemplo usado nesta lição não funcionarão conforme descrito. No entanto, as metodologias ensinadas por esta lição permanecem relevantes e podem ser adaptadas pelos leitores para um site de exemplo diferente. Estamos trabalhando na adaptação da lição para o novo site do Old Bailey Online, mas ainda não temos cronograma preciso de quando a lição será atualizada. [Abril de 2024] -
    - -## Objetivos da Lição - -A lista que criámos na lição [De HTML para Lista de Palavras (parte 2)](/pt/licoes/HTML-lista-palavras-2) precisa ser normalizada antes de poder ser utilizada. Faremos isso através da aplicação de alguns métodos de string adicionais, bem como utilizando expressões regulares. Uma vez normalizados, estaremos aptos a analisar os nossos dados mais facilmente. - -## Ficheiros Necessários para esta Lição - -- `html-to-list1.py` -- `obo.py` - -Caso não tenha esses ficheiros das lições anteriores, pode fazer o *download* de um [zip](/assets/python-lessons3.zip). - -## Limpando a Lista - -Na lição [De HTML para Lista de Palavras (parte 2)](/pt/licoes/HTML-lista-palavras-2), escrevemos um programa em Python chamado `html-to-list1.py` que fazia o *download* de uma [página web](https://perma.cc/8LM6-W39K), removia a formatação HTML e os metadados e retornava uma lista de "palavras" como a apresentada abaixo. Tecnicamente, essas entidades são chamadas de "*tokens*" ao invés de "palavras". Elas incluem alguns elementos que, estritamente falando, não são palavras (como a abreviação &c. para "etcetera"). Elas também incluem elementos que podem ser considerados composições de mais de uma palavra. O possessivo "Akerman's", por exemplo, é ocasionalmente analisado por linguistas como duas palavras: "Akerman" e um marcador de posse. "o'clock" é uma palavra ou duas? E assim por diante. - -Volte ao seu programa `html-to-list1.py` e certifique-se de que o seu resultado se assemelha ao seguinte: - - -``` python -['324.', '\xc2\xa0', 'BENJAMIN', 'BOWSEY', '(a' 'blackmoor', ')', 'was', 'indicted', 'for', 'that', 'he', 'together', 'with', 'five', 'hundred', 'other', 'persons', 'and', 'more,', 'did,', 'unlawfully,' 'riotously,', 'and', 'tumultuously', 'assemble', 'on', 'the', '6th', 'of', 'June', 'to', 'the', 'disturbance', 'of', 'the', 'public', 'peace', 'and', 'did', 'begin', 'to', 'demolish', 'and', 'pull', 'down', 'the', 'dwelling', 'house', 'of', '\xc2\xa0', 'Richard', 'Akerman', ',', 'against', 'the', 'form', 'of', 'the', 'statute,', '&c.', '\xc2\xa0', 'ROSE', 'JENNINGS', ',', 'Esq.', 'sworn.', 'Had', 'you', 'any', 'occasion', 'to', 'be', 'in', 'this', 'part', 'of', 'the', 'town,', 'on', 'the', '6th', 'of', 'June', 'in', 'the', 'evening?', '-', 'I', 'dined', 'with', 'my', 'brother', 'who', 'lives', 'opposite', 'Mr.', "Akerman's", 'house.', 'They', 'attacked', 'Mr.', "Akerman's", 'house', 'precisely', 'at', 'seven', "o'clock;", 'they', 'were', 'preceded', 'by', 'a', 'man', 'better', 'dressed', 'than', 'the', 'rest,', 'who'] -``` - -Por si só, a habilidade de separar um documento em palavras não é muito útil, já que somos capazes de ler. Podemos usar o texto, no entanto, para executar tarefas que não são sempre possíveis sem *softwares* especiais. Começaremos calculando as frequências dos *tokens* e outras unidades linguísticas, uma forma clássica de mensurar textos. - -Está claro que a nossa lista precisará de uma limpeza antes de conseguirmos utilizá-la para contar frequências. Em linha com as práticas estabelecidas em [De HTML para Lista de Palavras (parte 1)](/pt/licoes/HTML-lista-palavras-1), vamos tentar descrever o nosso algoritmo em português primeiro. Desejamos saber a frequência de cada palavra com sentido que aparece na transcrição do julgamento. Desse modo, as etapas envolvidas podem ser semelhantes a estas: - -- Converter todas as palavras para letras minúsculas de modo que "BENJAMIN" e "benjamin" sejam contabilizadas como a mesma palavra -- Remover quaisquer caracteres estranhos ou incomuns -- Contar o número de vezes que cada palavra aparece -- Remover palavras excessivamente comuns como "it", "the", "and", etc. - -## Converter para Minúsculas - -Tipicamente tokens são convertidos em letras minúsculas ao contar frequências, então faremos isso através do método de string `lower` que foi introduzido em [Manipular strings com Python](/pt/licoes/manipular-strings-python). Já que este é um método de string, devemos aplicá-lo à string `text` no programa `html-to-list1.py`. Ajuste `html-to-list1.py` adicionando a *string tag* `lower()` ao final da string `text`. - - -``` python -#html-to-list1.py -import urllib.request, urllib.error, urllib.parse, obo - -url = 'http://www.oldbaileyonline.org/browse.jsp?id=t17800628-33&div=t17800628-33' - -response = urllib.request.urlopen(url) -html = str(response.read().decode('UTF-8')) -text = obo.stripTags(html).lower() #adicione o método de string aqui -wordlist = text.split() - -print(wordlist) -``` - -Agora deve ver a mesma lista de palavras de antes, mas com todos os caracteres minúsculos. - -Ao chamar métodos em sequência como mostrado, torna-se possível manter o nosso código curto e fazer mudanças bastante significativas no nosso programa. - -Como afirmámos anteriormente, o Python torna fácil a execução de muitas tarefas com pouquíssimo código. - -Neste ponto, podemos examinar uma série de outras entradas do *Old Bailey Online* e uma ampla gama de outras fontes em potencial para termos certeza de que não há outros caracteres especiais que causarão problema posteriormente. Também podemos tentar antecipar situações nas quais não desejamos nos livrar de pontuação (por exemplo, para distinguir valores monetários como "$1629” ou “£1295” de datas, ou reconhecer que “1629-40” carrega um significado diferente de “1629 40”). Isso é o que programadores profissionais são pagos para fazer: tentar pensar em tudo que pode dar errado e tratar isso com antecedência. - -Vamos adotar uma abordagem diferente. O nosso objetivo principal é desenvolver técnicas que um historiador em exercício pode utilizar durante o processo de investigação. Isso significa que quase sempre preferiremos soluções aproximadamente corretas que possam ser desenvolvidas rapidamente. Então, ao invés de perder tempo neste momento para tornar o nosso programa robusto em face de exceções, vamos simplesmente nos livrar de tudo que não seja uma letra com ou sem acento ou um algarismo arábico. Programação é tipicamente um processo de "refinamento gradual". Começamos com um problema e parte de uma solução, e depois continuamos refinando a solução até obter um resultado que funcione melhor. - -## Expressões Regulares de Python - -Nós eliminamos as letras maiúsculas. Agora só precisamos nos livrar da pontuação. A pontuação prejudicará as nossas contagens de frequência se as mantivermos lá. Desejamos que "evening?" seja contabilizado como "evening" e "1780." como "1780", claro. - -É possível utilizar o método de string `replace` para remover cada tipo de pontuação: - -``` python -text = text.replace('[', '') -text = text.replace(']', '') -text = text.replace(',', '') -#etc... -``` - -No entanto, isso não é muito eficiente. Em linha com o nosso objetivo de criar programas curtos e poderosos, utilizaremos um mecanismo chamado *expressões regulares*. Expressões regulares são fornecidas por muitas linguagens de programação de várias maneiras distintas. - -Expressões regulares permitem que busque por padrões bem definidos e podem diminuir drasticamente o comprimento do código. Por exemplo, se desejasse saber se uma substring corresponde a uma letra do alfabeto, ao invés de usar uma condição `if/else` para verificar se ela representa a letra "a", depois "b", depois "c" e assim por diante, poderia usar uma expressão regular para verificar se a substring se assemelha a uma letra entre "a" e "z". Ou poderia verificar a presença de um dígito, uma letra maiúscula, ou qualquer caractere alfanumérico, ou um [retorno de carro](https://perma.cc/T7DA-RG2L), ou qualquer combinação dos itens acima e muito mais. - -Em Python, expressões regulares estão disponíveis como um módulo. Para acelerar o processamento, ele não é carregado automaticamente porque nem todos os programas o exigem. Então, precisará importar (`import`) o módulo (chamado `re`, abreviação de *regular expressions*) da mesma forma que importou o módulo `obo.py`. - -Como estamos interessados apenas em caracteres alfanuméricos, criaremos uma expressão regular que irá isolá-los e removerá o resto. Copie a função a seguir e cole-a ao final do módulo `obo.py`. Pode manter as outras funções do módulo, já que continuaremos a usá-las. - - -``` python -# Dada uma string de texto, remova todos os caracteres não-alfanuméricos (usando a definição Unicode de alfanumérico) - -def stripNonAlphaNum(text): - import re - return re.compile(r'\W+', re.UNICODE).split(text) -``` - -A expressão regular no código acima é o material dentro da string, em outras palavras: `W+`. `W` é uma abreviatura para a classe de caracteres não-alfanuméricos. Numa expressão regular de Python, o sinal de adição (+) encontra uma ou mais cópias de um determinado caractere. `re.UNICODE` informa ao interpretador que desejamos incluir caracteres de outros idiomas do mundo em nossa definição de alfanumérico, assim como de "A" a "Z", "a" a "z" e 0-9 do português. Expressões regulares devem ser *compiladas* antes de poderem ser utilizadas, que é o que o resto do comando faz. Não se preocupe em compreender a parte da compilação agora. - -Agora que refinamos o nosso programa `html-to-list1.py`, ele se parece com isto: - -``` python -#html-to-list1.py -import urllib.request, urllib.error, urllib.parse, obo - -url = 'http://www.oldbaileyonline.org/browse.jsp?id=t17800628-33&div=t17800628-33' - -response = urllib.request.urlopen(url) -html = response.read().decode('UTF-8') -text = obo.stripTags(html).lower() -wordlist = obo.stripNonAlphaNum(text) - -print(wordlist) -``` - -Ao executar o programa e verificar a saída no painel "Saída de Comando", verá que ele fez um bom trabalho. Esse código irá dividir formas hifenizadas como "coach-wheels" em duas palavras e irá transformar o possessivo "s" ou "o'clock" em palavras separadas ao perderem o apóstrofo. Ainda assim, o código faz uma aproximação boa o suficiente para os nossos objetivos e devemos agora passar para a contagem de frequências antes de tentar melhorá-lo. (Caso trabalhe com fontes em mais de um idioma, precisa aprender um pouco mais a respeito do padrão [Unicode](https://perma.cc/7ACH-KCDN) e sobre o [suporte de Python](https://web.archive.org/web/20180502053841/http://www.diveintopython.net/xml_processing/unicode.html) a ele.) - -## Leituras Sugeridas - -Para praticar mais as Expressões Regulares, o capítulo 7 de "[Dive into Python](https://web.archive.org/web/20180416143856/http://www.diveintopython.net/regular_expressions/index.html)" de Mark Pilgrim pode ser um tutorial útil. - -## Sincronização de Código - -Para acompanhar as lições futuras, é importante que tenha os ficheiros e programas corretos no seu diretório *programming historian*. Ao final de cada capítulo nesta série pode fazer o *download* do ficheiro zip do programming historian para garantir que possui o código correto. - -- python-lessons4.zip ([zip sync](/assets/python-lessons4.zip)) +--- +title: Normalização de Dados Textuais com Python +layout: lesson +collection: lessons +slug: normalizacao-dados-textuais-python +date: 2012-07-17 +translation_date: 2022-10-27 +authors: +- William J. Turkel +- Adam Crymble +reviewers: +- Jim Clifford +- Frederik Elwert +editors: +- Miriam Posner +translator: +- Felipe Lamarca +translation-editor: +- Jimmy Medeiros +translation-reviewer: +- André Salvo +- Gabriela Kucuruza +difficulty: 2 +review-ticket: https://github.com/programminghistorian/ph-submissions/issues/460 +activity: transforming +topics: [python] +abstract: "Nesta lição tornará a lista criada na lição 'De HTML para Lista de Palavras' mais fácil de ser analisada através da normalização desses dados." +original: normalizing-data +avatar_alt: Mulher alta a arrastar um jovem baixo +doi: 10.46430/phpt0029 +--- + + +{% include toc.html %} + +
    +O site do Old Bailey Online foi recentemente atualizado. Infelizmente, devido às diversas mudanças, muitos (se não todos) os elementos do site de exemplo usado nesta lição não funcionarão conforme descrito. No entanto, as metodologias ensinadas por esta lição permanecem relevantes e podem ser adaptadas pelos leitores para um site de exemplo diferente. Estamos trabalhando na adaptação da lição para o novo site do Old Bailey Online, mas ainda não temos cronograma preciso de quando a lição será atualizada. [Abril de 2024] +
    + +## Objetivos da Lição + +A lista que criámos na lição [De HTML para Lista de Palavras (parte 2)](/pt/licoes/HTML-lista-palavras-2) precisa ser normalizada antes de poder ser utilizada. Faremos isso através da aplicação de alguns métodos de string adicionais, bem como utilizando expressões regulares. Uma vez normalizados, estaremos aptos a analisar os nossos dados mais facilmente. + +## Ficheiros Necessários para esta Lição + +- `html-to-list1.py` +- `obo.py` + +Caso não tenha esses ficheiros das lições anteriores, pode fazer o *download* de um [zip](/assets/python-lessons3.zip). + +## Limpando a Lista + +Na lição [De HTML para Lista de Palavras (parte 2)](/pt/licoes/HTML-lista-palavras-2), escrevemos um programa em Python chamado `html-to-list1.py` que fazia o *download* de uma [página web](https://perma.cc/8LM6-W39K), removia a formatação HTML e os metadados e retornava uma lista de "palavras" como a apresentada abaixo. Tecnicamente, essas entidades são chamadas de "*tokens*" ao invés de "palavras". Elas incluem alguns elementos que, estritamente falando, não são palavras (como a abreviação &c. para "etcetera"). Elas também incluem elementos que podem ser considerados composições de mais de uma palavra. O possessivo "Akerman's", por exemplo, é ocasionalmente analisado por linguistas como duas palavras: "Akerman" e um marcador de posse. "o'clock" é uma palavra ou duas? E assim por diante. + +Volte ao seu programa `html-to-list1.py` e certifique-se de que o seu resultado se assemelha ao seguinte: + + +``` python +['324.', '\xc2\xa0', 'BENJAMIN', 'BOWSEY', '(a' 'blackmoor', ')', 'was', 'indicted', 'for', 'that', 'he', 'together', 'with', 'five', 'hundred', 'other', 'persons', 'and', 'more,', 'did,', 'unlawfully,' 'riotously,', 'and', 'tumultuously', 'assemble', 'on', 'the', '6th', 'of', 'June', 'to', 'the', 'disturbance', 'of', 'the', 'public', 'peace', 'and', 'did', 'begin', 'to', 'demolish', 'and', 'pull', 'down', 'the', 'dwelling', 'house', 'of', '\xc2\xa0', 'Richard', 'Akerman', ',', 'against', 'the', 'form', 'of', 'the', 'statute,', '&c.', '\xc2\xa0', 'ROSE', 'JENNINGS', ',', 'Esq.', 'sworn.', 'Had', 'you', 'any', 'occasion', 'to', 'be', 'in', 'this', 'part', 'of', 'the', 'town,', 'on', 'the', '6th', 'of', 'June', 'in', 'the', 'evening?', '-', 'I', 'dined', 'with', 'my', 'brother', 'who', 'lives', 'opposite', 'Mr.', "Akerman's", 'house.', 'They', 'attacked', 'Mr.', "Akerman's", 'house', 'precisely', 'at', 'seven', "o'clock;", 'they', 'were', 'preceded', 'by', 'a', 'man', 'better', 'dressed', 'than', 'the', 'rest,', 'who'] +``` + +Por si só, a habilidade de separar um documento em palavras não é muito útil, já que somos capazes de ler. Podemos usar o texto, no entanto, para executar tarefas que não são sempre possíveis sem *softwares* especiais. Começaremos calculando as frequências dos *tokens* e outras unidades linguísticas, uma forma clássica de mensurar textos. + +Está claro que a nossa lista precisará de uma limpeza antes de conseguirmos utilizá-la para contar frequências. Em linha com as práticas estabelecidas em [De HTML para Lista de Palavras (parte 1)](/pt/licoes/HTML-lista-palavras-1), vamos tentar descrever o nosso algoritmo em português primeiro. Desejamos saber a frequência de cada palavra com sentido que aparece na transcrição do julgamento. Desse modo, as etapas envolvidas podem ser semelhantes a estas: + +- Converter todas as palavras para letras minúsculas de modo que "BENJAMIN" e "benjamin" sejam contabilizadas como a mesma palavra +- Remover quaisquer caracteres estranhos ou incomuns +- Contar o número de vezes que cada palavra aparece +- Remover palavras excessivamente comuns como "it", "the", "and", etc. + +## Converter para Minúsculas + +Tipicamente tokens são convertidos em letras minúsculas ao contar frequências, então faremos isso através do método de string `lower` que foi introduzido em [Manipular strings com Python](/pt/licoes/manipular-strings-python). Já que este é um método de string, devemos aplicá-lo à string `text` no programa `html-to-list1.py`. Ajuste `html-to-list1.py` adicionando a *string tag* `lower()` ao final da string `text`. + + +``` python +#html-to-list1.py +import urllib.request, urllib.error, urllib.parse, obo + +url = 'http://www.oldbaileyonline.org/browse.jsp?id=t17800628-33&div=t17800628-33' + +response = urllib.request.urlopen(url) +html = str(response.read().decode('UTF-8')) +text = obo.stripTags(html).lower() #adicione o método de string aqui +wordlist = text.split() + +print(wordlist) +``` + +Agora deve ver a mesma lista de palavras de antes, mas com todos os caracteres minúsculos. + +Ao chamar métodos em sequência como mostrado, torna-se possível manter o nosso código curto e fazer mudanças bastante significativas no nosso programa. + +Como afirmámos anteriormente, o Python torna fácil a execução de muitas tarefas com pouquíssimo código. + +Neste ponto, podemos examinar uma série de outras entradas do *Old Bailey Online* e uma ampla gama de outras fontes em potencial para termos certeza de que não há outros caracteres especiais que causarão problema posteriormente. Também podemos tentar antecipar situações nas quais não desejamos nos livrar de pontuação (por exemplo, para distinguir valores monetários como "$1629” ou “£1295” de datas, ou reconhecer que “1629-40” carrega um significado diferente de “1629 40”). Isso é o que programadores profissionais são pagos para fazer: tentar pensar em tudo que pode dar errado e tratar isso com antecedência. + +Vamos adotar uma abordagem diferente. O nosso objetivo principal é desenvolver técnicas que um historiador em exercício pode utilizar durante o processo de investigação. Isso significa que quase sempre preferiremos soluções aproximadamente corretas que possam ser desenvolvidas rapidamente. Então, ao invés de perder tempo neste momento para tornar o nosso programa robusto em face de exceções, vamos simplesmente nos livrar de tudo que não seja uma letra com ou sem acento ou um algarismo arábico. Programação é tipicamente um processo de "refinamento gradual". Começamos com um problema e parte de uma solução, e depois continuamos refinando a solução até obter um resultado que funcione melhor. + +## Expressões Regulares de Python + +Nós eliminamos as letras maiúsculas. Agora só precisamos nos livrar da pontuação. A pontuação prejudicará as nossas contagens de frequência se as mantivermos lá. Desejamos que "evening?" seja contabilizado como "evening" e "1780." como "1780", claro. + +É possível utilizar o método de string `replace` para remover cada tipo de pontuação: + +``` python +text = text.replace('[', '') +text = text.replace(']', '') +text = text.replace(',', '') +#etc... +``` + +No entanto, isso não é muito eficiente. Em linha com o nosso objetivo de criar programas curtos e poderosos, utilizaremos um mecanismo chamado *expressões regulares*. Expressões regulares são fornecidas por muitas linguagens de programação de várias maneiras distintas. + +Expressões regulares permitem que busque por padrões bem definidos e podem diminuir drasticamente o comprimento do código. Por exemplo, se desejasse saber se uma substring corresponde a uma letra do alfabeto, ao invés de usar uma condição `if/else` para verificar se ela representa a letra "a", depois "b", depois "c" e assim por diante, poderia usar uma expressão regular para verificar se a substring se assemelha a uma letra entre "a" e "z". Ou poderia verificar a presença de um dígito, uma letra maiúscula, ou qualquer caractere alfanumérico, ou um [retorno de carro](https://perma.cc/T7DA-RG2L), ou qualquer combinação dos itens acima e muito mais. + +Em Python, expressões regulares estão disponíveis como um módulo. Para acelerar o processamento, ele não é carregado automaticamente porque nem todos os programas o exigem. Então, precisará importar (`import`) o módulo (chamado `re`, abreviação de *regular expressions*) da mesma forma que importou o módulo `obo.py`. + +Como estamos interessados apenas em caracteres alfanuméricos, criaremos uma expressão regular que irá isolá-los e removerá o resto. Copie a função a seguir e cole-a ao final do módulo `obo.py`. Pode manter as outras funções do módulo, já que continuaremos a usá-las. + + +``` python +# Dada uma string de texto, remova todos os caracteres não-alfanuméricos (usando a definição Unicode de alfanumérico) + +def stripNonAlphaNum(text): + import re + return re.compile(r'\W+', re.UNICODE).split(text) +``` + +A expressão regular no código acima é o material dentro da string, em outras palavras: `W+`. `W` é uma abreviatura para a classe de caracteres não-alfanuméricos. Numa expressão regular de Python, o sinal de adição (+) encontra uma ou mais cópias de um determinado caractere. `re.UNICODE` informa ao interpretador que desejamos incluir caracteres de outros idiomas do mundo em nossa definição de alfanumérico, assim como de "A" a "Z", "a" a "z" e 0-9 do português. Expressões regulares devem ser *compiladas* antes de poderem ser utilizadas, que é o que o resto do comando faz. Não se preocupe em compreender a parte da compilação agora. + +Agora que refinamos o nosso programa `html-to-list1.py`, ele se parece com isto: + +``` python +#html-to-list1.py +import urllib.request, urllib.error, urllib.parse, obo + +url = 'http://www.oldbaileyonline.org/browse.jsp?id=t17800628-33&div=t17800628-33' + +response = urllib.request.urlopen(url) +html = response.read().decode('UTF-8') +text = obo.stripTags(html).lower() +wordlist = obo.stripNonAlphaNum(text) + +print(wordlist) +``` + +Ao executar o programa e verificar a saída no painel "Saída de Comando", verá que ele fez um bom trabalho. Esse código irá dividir formas hifenizadas como "coach-wheels" em duas palavras e irá transformar o possessivo "s" ou "o'clock" em palavras separadas ao perderem o apóstrofo. Ainda assim, o código faz uma aproximação boa o suficiente para os nossos objetivos e devemos agora passar para a contagem de frequências antes de tentar melhorá-lo. (Caso trabalhe com fontes em mais de um idioma, precisa aprender um pouco mais a respeito do padrão [Unicode](https://perma.cc/7ACH-KCDN) e sobre o [suporte de Python](https://web.archive.org/web/20180502053841/https://www.diveintopython.net/xml_processing/unicode.html) a ele.) + +## Leituras Sugeridas + +Para praticar mais as Expressões Regulares, o capítulo 7 de "[Dive into Python](https://web.archive.org/web/20180416143856/https://www.diveintopython.net/regular_expressions/index.html)" de Mark Pilgrim pode ser um tutorial útil. + +## Sincronização de Código + +Para acompanhar as lições futuras, é importante que tenha os ficheiros e programas corretos no seu diretório *programming historian*. Ao final de cada capítulo nesta série pode fazer o *download* do ficheiro zip do programming historian para garantir que possui o código correto. + +- python-lessons4.zip ([zip sync](/assets/python-lessons4.zip)) diff --git a/pt/licoes/preservar-os-seus-dados-de-investigacao.md b/pt/licoes/preservar-os-seus-dados-de-investigacao.md index 654a3133dd..8cab4af879 100644 --- a/pt/licoes/preservar-os-seus-dados-de-investigacao.md +++ b/pt/licoes/preservar-os-seus-dados-de-investigacao.md @@ -173,7 +173,7 @@ documentada), embora esquemas existentes, como o [Markdown][] estejam disponíve (os ficheiros do Markdown são salvos como .md). Uma excelente página de dicas do Markdown está disponível no GitHub ) para aqueles que desejam seguir - ou adaptar - este esquema existente. O Notepad++ - é recomendado para usuários do Windows, embora + é recomendado para usuários do Windows, embora de modo algum seja essencial para trabalhar com ficheiros .md. Usuários de Mac ou Unix podem achar útil o [Komodo Edit][] ou o [Text Wrangler][]. @@ -237,7 +237,7 @@ frequentemente usadas. O site Homens e Navios do Bacalhau estrutura seu arquivo usando o formato: - *nome do site*/registo/*número de referência* -- +- E o Arquivo Histórico Ultramarino usa o formato: @@ -419,11 +419,11 @@ blog (17 outubro 2013) Hitchcock, Tim, 'Judging a book by its URLs', Historyonics blog (3 janeiro 2014) - + Howard, Sharon, 'Unclean, unclean! What historians can do about sharing our messy research data', Early Modern Notes blog (18 maio 2013) - + Noble, William Stafford, A Quick Guide to Organizing Computational Biology Projects.PLoSComputBiol 5(7): e1000424 (2009) @@ -436,7 +436,7 @@ Information Management: Organising Humanities Material' (2011) Pennock, Maureen, 'The Twelve Principles of Digital Preservation (and a cartridge in a repository…)', British Library Collection Care blog (3 setembro 2013) - + Pritchard, Adam, 'Markdown Cheatsheet' (2013) @@ -445,10 +445,10 @@ Rosenzweig, Roy, 'Scarcity or Abundance? Preserving the Past in a Digital Era', The American Historical Review 108:3 (2003), 735-762. UK Data Archive, 'Documenting your Data' - + [PRINCE2]: https://pt.wikipedia.org/wiki/PRINCE2 [multiplataforma]: https://pt.wikipedia.org/wiki/Multiplataforma [Markdown]: https://pt.wikipedia.org/wiki/Markdown - [Komodo Edit]: http://komodoide.com/komodo-edit/ + [Komodo Edit]: https://komodoide.com/komodo-edit/ [Text Wrangler]: https://www.barebones.com/products/textwrangler/ diff --git a/pt/licoes/processamento-basico-texto-r.md b/pt/licoes/processamento-basico-texto-r.md index bf58fa6513..4bca3b0f2f 100644 --- a/pt/licoes/processamento-basico-texto-r.md +++ b/pt/licoes/processamento-basico-texto-r.md @@ -1,1078 +1,1078 @@ ---- -title: Processamento Básico de Texto em R -slug: processamento-basico-texto-r -layout: lesson -date: 2017-03-27 -translation_date: 2021-07-13 -authors: -- Taylor Arnold -- Lauren Tilton -reviewers: -- Brandon Walsh -- John Russell -editors: -- Jeri Wieringa -translator: -- Diana Rebelo Rodriguez -translation-editor: -- Jimmy Medeiros -translation-reviewer: -- Rômulo Predes -- Maria Guedes -difficulty: 2 -review-ticket: https://github.com/programminghistorian/ph-submissions/issues/381 -activity: analyzing -topics: [distant-reading, r, data-visualization] -abstract: "Aprenda a usar o R para analisar padrões de alto nível em textos, aplicar métodos estilométricos ao longo do tempo e entre autores, assim como a usar métodos para resumir informações para descrever um corpus" -original: basic-text-processing-in-r -avatar_alt: Crianças com livros junto a uma biblioteca itinerante -doi: 10.46430/phpt0013 ---- - -{% include toc.html %} - -## Objetivos - -Hoje em dia há uma quantidade substancial de dados históricos disponíveis em forma de texto simples e digitalizado. Alguns exemplos comuns são cartas, artigos de jornal, notas pessoais, diários, documentos legais e transcrições de discursos. Enquanto algumas aplicações de softwares independentes têm ferramentas para analisar dados textuais, o uso de linguagens de programação apresenta uma maior flexibilidade para analisar um corpus de documentos de texto. Neste tutorial, guiaremos os usuários no básico da análise de texto na linguagem de programação R. A nossa abordagem envolve usar apenas a tokenização que produz uma análise sintática do texto, com elementos como palavras, frases e orações. No final da presente lição, os usuários poderão: - -* utilizar análises exploratórias para verificar erros e detectar padrões gerais; -* aplicar métodos básicos de estilometria através do tempo e entre autores; -* conseguir resumir o conteúdo do documento para oferecer uma descrição geral do corpus. - -Para esta lição, será utilizado um conjunto de dados com os textos dos discursos presidenciais dos Estados Unidos da América sobre o [Estado da União](https://pt.wikipedia.org/wiki/Discurso_sobre_o_Estado_da_Uni%C3%A3o)[^1]. - -Assumimos que os usuários possuem um conhecimento básico da linguagem de programação R. A lição [Noções básicas de R com dados tabulares](/en/lessons/r-basics-with-tabular-data)[^2] (em inglês) é um excelente guia que contém todos os conhecimentos em R necessários aqui, tais como instalar e abrir R, instalar e carregar pacotes e importar e trabalhar com dados básicos de R. Os usuários podem fazer o download do R indicado para os seus sistemas operativos em [The Comprehensive R Archive Network](https://cran.r-project.org/). Ainda que não seja um pré-requisito, recomendamos que os novos usuários façam o download do [R Studio](https://www.rstudio.com/products/rstudio/#Desktop), um ambiente de desenvolvimento de código aberto para escrever e executar programas em R. - -Todo o código desta lição foi testado em R na versão 4.0.2, mas esperamos que ele rode adequadamente em qualquer versão futura do programa. - -# Um pequeno exemplo - -## Configuração de pacotes - -É necessário instalar dois pacotes de R antes de começar com o tutorial: o **tidyverse**[^3] e o **tokenizers**[^4]. O primeiro proporciona ferramentas convenientes para ler e trabalhar com grupos de dados e o segundo contém funções para dividir os dados do texto em palavras e orações. Para instalá-los, abra o R no seu computador e execute essas duas linhas de código no console: - -```{r} - -install.packages("tidyverse") - -install.packages("tokenizers") - -``` - -Dependendo da configuração do seu sistema, pode ser aberta uma caixa de diálogo solicitando a escolha de um lugar da internet para fazer o download. Caso apareça, escolha a opção mais perto de sua localização atual. O download e a instalação, provavelmente, irão ocorrer automaticamente. - -Agora que esses pacotes estão no seu computador, precisamos de avisar ao R que eles devem ser carregados para o uso. Isso é feito através do comando `library`. Pode ser que apareçam alguns avisos enquanto carregam outras dependências, mas eles podem ser ignorados sem nenhum problema. Execute essas duas linhas de código no console para habilitar o uso dos pacotes: - -```{r} - -library(tidyverse) - -library(tokenizers) - -``` - -O comando `install.packages` (instalar pacotes) só é necessário executar na primeira vez em que iniciar este tutorial, o comando `library` deverá ser executado todas as vezes que se inicia o R[^5]. - -## Segmentação de palavras - -Nesta seção, vamos trabalhar com um único parágrafo. Este exemplo pertence ao início do último discurso de Barack Obama sobre o Estado da União, em 2016. Para facilitar a compreensão do tutorial nesta primeira etapa, estudamos este parágrafo traduzido para português[^6]. - -Para carregar o texto, copie e cole o seguinte no console do R: - -``` - -texto <- paste("Também entendo que, pelo fato de estarmos em temporada eleitoral, as expectativas quanto ao que vamos realizar este ano são baixas. Mesmo assim, senhor presidente da Câmara, aprecio a atitude construtiva que o senhor e os outros líderes assumiram no final do ano passado para aprovar o orçamento e perpetuar a redução dos impostos sobre as famílias trabalhadoras. Desse modo, espero que possamos colaborar este ano sobre questões que são prioritárias para ambos os partidos, como a reforma da justiça criminal e a assistência às pessoas dependentes de drogas vendidas com receita médica. Quem sabe possamos surpreender os cínicos novamente.") - -``` - -Depois de executar o comando (clicando em “Enter”), escreva a palavra `texto` no console e pressione Enter. O R irá mostrar o conteúdo do objeto texto, uma vez que ele contém parte do discurso proferido por Obama. - -O primeiro passo do processamento de texto envolve utilizar a função `tokenize_words` (segmentar palavras) do pacote **tokenizers** para dividir o texto en palavras individuais. - -```{r} - -palavras <- tokenize_words(texto) - -``` - -Para apresentar os resultados na janela do console do R, mostrando tanto o resultado tokenizado como a posição de cada elemento na margem esquerda, execute palavras no console: - - -```{r} - -palavras - -``` - -Isso produz o seguinte resultado: - - -``` - -> [[1]] - -[1] "também" "entendo" "que" "pelo" "fato" - -[6] "de" "estarmos" "em" "temporada" "eleitoral" - -[11] "as" "expectativas" "quanto" "ao" "que" - -[16] "vamos" "realizar" "este" "ano" "são" - -[21] "baixas" "mesmo" "assim" "senhor" "presidente" - -[26] "da" "câmara" "aprecio" "a" "atitude" - -[31] "construtiva" "que" "o" "senhor" "e" - -[36] "os" "outros" "líderes" "assumiram" "no" - -[41] "final" "do" "ano" "passado" "para" - -[46] "aprovar" "o" "orçamento" "e" "perpetuar" - -[51] "a" "redução" "dos" "impostos" "sobre" - -[56] "as" "famílias" "trabalhadoras" "desse" "modo" - -[61] "espero" "que" "possamos" "colaborar" "este" - -[66] "ano" "sobre" "questões" "que" "são" - -[71] "prioritárias" "para" "ambos" "os" "partidos" - -[76] "como" "a" "reforma" "da" "justiça" - -[81] "criminal" "e" "a" "assistência" "às" - -[86] "pessoas" "dependentes" "de" "drogas" "vendidas" - -[91] "com" "receita" "médica" "quem" "sabe" - -[96] "possamos" "surpreender" "os" "cínicos" "novamente" - -``` - -Como o texto carregado mudou depois de se executar essa função de R? Ela removeu toda a pontuação, dividiu o texto em palavras individuais e converteu tudo para minúsculas. Em breve, veremos porque todas essas intervenções são úteis para a nossa análise. - -Quantas palavras existem neste fragmento de texto? Se usamos a função `length` (comprimento) diretamente no objeto `palavras`, o resultado não é muito útil. - - - -```{r} - -length(palavras) - -``` - - -O resultado é igual a: - - -```{r} - -[1] 1 - -``` - -O comprimento equivale a 1 porque a função `tokenize_words` retorna uma lista de objetos com uma entrada por documento carregado. O nosso carregamento possui apenas um documento, então a lista também possui apenas um elemento. Para ver as palavras dentro do primeiro documento, utilizamos o símbolo [], da seguinte forma: `[[1]]`. O objetivo é selecionar apenas o primeiro elemento da lista: - - -```{r} - -length(palavras[[1]]) - -``` - -O resultado é `100`, indicando que existem 100 palavras neste parágrafo. - -A separação do documento em palavras individuais torna possível calcular quantas vezes cada palavra foi utilizada durante o texto. Para fazer isso, primeiro aplicamos a função `table` (tabela) nas palavras do primeiro (e, neste caso, único) documento e depois separamos os nomes e os valores da tabela num novo objeto chamado _data frame_. O uso de um quadro de dados em R é semelhante ao uso de uma tabela numa base de dados. Esses passos, em conjunto com a impressão do resultado, são obtidos com as seguintes linhas de código: - - -```{r} - -tabela <- table(palavras[[1]]) - -tabela <- data_frame(palavra = names(tabela), contagem = as.numeric(tabela)) - -tabela - -``` - -O resultado deste comando deve aparecer assim no seu console (*tibble* é um tipo específico de _data frame_ criado no pacote [Tidy Data](https://en.wikipedia.org/wiki/Tidy_data)): - -``` - -# A tibble: 77 x 2 - -palavra contagem - - - -1 a 4. - -2 ambos 1. - -3 ano 3. - -4 ao 1. - -5 aprecio 1. - -6 aprovar 1. - -7 as 2. - -8 às 1. - -9 assim 1. - -10 assistência 1. - -# ... with 67 more rows - -``` - - -Há uma quantidade substancial de informação nesta amostra. Vemos que existem 77 palavras únicas, como indica a dimensão da tabela. As 10 primeiras fileiras do conjunto de dados são apresentadas, com a segunda coluna mostrando quantas vezes a palavra da primeira coluna foi utilizada. Por exemplo, “ano” foi usada três vezes, enquanto “aprovar”, apenas uma vez. - - - -Também podemos ordenar a tabela usando a função `arrange` (organizar). Esta função precisa do conjunto de dados a utilizar, aqui `tabela`, e depois o nome da coluna que serve de referência para ordená-lo. A função `desc` no segundo argumento indica que queremos ordenar em ordem decrescente. - - - -```{r} - -arrange(tabela, desc(contagem)) - -``` - - -E agora o resultado será: - - - -```{r} - -# A tibble: 77 x 2 - -palavra contagem - - - -1 que 5. - -2 a 4. - -3 ano 3. - -4 e 3. - -5 os 3. - -6 as 2. - -7 da 2. - -8 de 2. - -9 este 2. - -10 o 2. - -# … with 67 more rows - -``` - - - -As palavras mais comuns são pronomes e palavras funcionais tais como "que", "a", "e" e "os". Observe como a análise é facilitada pelo uso da versão em minúsculas de cada palavra. Qualquer contagem prevê que a palavra possa estar no início ou no meio da frase. - - - -Uma técnica popular é carregar uma lista de palavras frequentemente usadas e eliminá-las antes da análise formal. As palavras em tal lista são chamadas "*stopwords*" ou "palavras vazias" e são geralmente pronomes, conjugações dos verbos mais comuns e conjunções. Neste tutorial, temos uma variação sutil desta técnica. - - - -## Detectar frases - - - -O pacote **tokenizer** também contém a função `tokenize_sentences`, que detecta limites de frases, ao invés de palavras. Ele pode ser executado da seguinte maneira: - - - -```{r} - -frases <- tokenize_sentences(texto) - -frases - -``` - - - -Com o resultado: - - - -```{r} - -> frases - -[[1]] - -[1] "Também entendo que, pelo fato de estarmos em temporada eleitoral, as expectativas quanto ao que vamos realizar este ano são baixas." - -[2] "Mesmo assim, senhor presidente da Câmara, aprecio a atitude construtiva que o senhor e os outros líderes assumiram no final do ano passado para aprovar o orçamento e perpetuar a redução dos impostos sobre as famílias trabalhadoras." - -[3] "Desse modo, espero que possamos colaborar este ano sobre questões que são prioritárias para ambos os partidos, como a reforma da justiça criminal e a assistência às pessoas dependentes de drogas vendidas com receita médica." - -[4] "Quem sabe possamos surpreender os cínicos novamente." - -``` - - - -O resultado é um vetor de caracteres, um objeto unidimensional que consiste apenas em elementos representados como caracteres. Observe que o resultado marcou cada frase como um elemento separado. - - - -É possível conectar o resultado da divisão das frases com o resultado da divisão das palavras. Se executarmos a divisão de frases do parágrafo com a função `tokenize_words`, cada frase será tratada como um único documento. Execute isto usando a seguinte linha de código e veja se o resultado é o esperado, a segunda linha de comando serve para imprimir o resultado. - - - -```{r} - -frases_palavras <- tokenize_words(frases[[1]]) - -frases_palavras - -``` - - - -Se olharmos para o tamanho do resultado diretamente, podemos ver que existem quatro “documentos” no objeto `frases_palavras`: - - - -```{r} - -length(frases_palavras) - -``` - - - -Ao acessar cada uma delas diretamente, é possível saber quantas palavras há em cada frase do parágrafo: - - - -```{r} - -length(frases_palavras[[1]]) - -length(frases_palavras[[2]]) - -length(frases_palavras[[3]]) - -length(frases_palavras[[4]]) - -``` - - - -Isto pode demandar um pouco de esforço, mas felizmente existe uma maneira mais simples de o fazer. A função `sapply` executa a função no segundo argumento para cada elemento do primeiro argumento. Como resultado, podemos calcular a extensão de cada frase do primeiro parágrafo com uma única linha de código: - - - -```{r} - -sapply(frases_palavras, length) - -``` - - - -O resultado agora será assim: - - - -```{r} - -[1] 21 37 35 7 - -``` - - - -Podemos ver que existem quatro frases com um comprimento de 21, 37, 35 e 7 palavras. Utilizaremos esta função para trabalharmos com documentos maiores. - - - -# Analisar o discurso sobre o Estado da União de Barack Obama em 2016 - - - -## Análise exploratória - - - -Vamos aplicar as técnicas da seção anterior a um discurso sobre o Estado da União completo, desta vez, usando o original em inglês. Por uma questão de coerência, vamos usar o mesmo discurso de 2016 de Barack Obama. Agora, vamos carregar os dados de um ficheiro, uma vez que a cópia direta é difícil em grande escala. - - - -Para tal, vamos combinar a função `readLines` (ler linhas) para carregar o texto em R e a função `paste` (colar) para combinar todas as linhas num único objeto. Vamos criar a URL do arquivo de texto usando a função `sprintf`, uma vez que este formato permitirá que ele seja facilmente aproveitado para outros recursos online[^7],[^8]. - - - -```{r} - -base_url <- "https://raw.githubusercontent.com/programminghistorian/jekyll/gh-pages/assets/basic-text-processing-in-r/" - -url <- sprintf("%s/sotu_text/236.txt", base_url) - -texto <- paste(readLines(url), collapse = "\n") - -``` - - - -Como antes, vamos segmentar o texto e ver o número de palavras no documento. - - - -```{r} - -palavras <- tokenize_words(texto) - -length(palavras[[1]]) - -``` - - - -Vemos que este discurso contém um total de `6113` palavras. Ao combinar as funções `table` (tabela), `data_frame` e `arrange` (organizar), como fizemos no exemplo anterior, obtemos as palavras mais frequentes em todo o discurso. Ao fazer isso, observe como é fácil reutilizar o código anterior para repetir a análise num novo conjunto de dados. Este é um dos maiores benefícios de usar uma linguagem de programação para realizar uma análise baseada em dados [^9]. - - - -```{r} - -tabela <- table(palavras[[1]]) - -tabela <- data_frame(word = names(tabela), count = as.numeric(tabela)) - -tabela <- arrange(tabela, desc(count)) - -tabela - -``` - -O resultado deve ser: - - -```{r} - ->#A tibble: 1,590 x 2 - -word count - - - -1 the 281. - -2 to 209. - -3 and 189. - -4 of 148. - -5 that 125. - -6 we 124. - -7 a 120. - -8 in 105. - -9 our 96. - -10 is 72. - ->#... with 1,580 more rows - -``` - -Mais uma vez, palavras extremamente comuns como *the* ("o" ou "a"), *to* ("para") e *and* ("e") estão no topo da tabela. Estes termos não são particularmente esclarecedores se quisermos conhecer o assunto do discurso. Na realidade, queremos encontrar palavras que se destaquem mais neste texto do que num grande corpus externo em inglês. Para conseguir isso, precisamos de um conjunto de dados que forneça essas frequências. Aqui está o conjunto de dados de Peter Norviq usando o *Google Web Trillion Word Corpus* (Corpus de um trilhão de palavras da web do Google), coletado a partir dos dados compilados através do rastreamento de sites populares em inglês pelo Google [^10]: - - -```{r} - -palavras_frequentes <- read_csv(sprintf("%s/%s", base_url, "word_frequency.csv")) - -palavras_frequentes - -``` - - -A primeira coluna indica o idioma (sempre "en" para inglês neste caso), a segunda coluna - frequency - fornece a palavra em questão e a terceira coluna indica a percentagem com a qual ela aparece no *Corpus de um trilhão de palavras do Google*. Por exemplo, a palavra "for" aparece quase exatamente 1 vez a cada 100 palavras, pelo menos nos textos dos sites indexados pelo Google. - - - -Para combinar estas palavras frequentes com o conjunto de dados na `tabela` construída a partir do discurso do Estado da União, podemos usar a função `inner_join` (união interna). Esta função toma dois conjuntos de dados e combina-os em todas as colunas que têm o mesmo nome. Neste caso, a coluna comum é a chamada _word_ ("palavra"). - - - -```{r} - -tabela <- inner_join(tabela, palavras_frequentes) - -tabela - -``` - - - -Note que agora o nosso conjunto de dados tem duas colunas extras que fornecem o idioma (aqui relativamente pouco útil já que é sempre "en") e a frequência da palavra no corpus externo. Esta segunda nova coluna será muito útil, porque podemos filtrar linhas que têm uma frequência inferior a 0,1%, ou seja, que aparecem mais de uma vez em cada 1000 palavras: - - - -```{r} - -filter(tabela, frequency < 0.1) - -``` - - -Isto produz: - - -```{r} - ->#A tibble: 1,457 x 4 - -word count language frequency - - - -1 america 28. en 0.0232 - -2 people 27. en 0.0817 - -3 just 25. en 0.0787 - -4 world 23. en 0.0734 - -5 american 22. en 0.0387 - -6 work 22. en 0.0713 - -7 make 20. en 0.0689 - -8 want 19. en 0.0440 - -9 change 18. en 0.0358 - -10 years 18. en 0.0574 - ->#... with 1,447 more rows - -``` - - - -Esta lista está começando a se tornar mais interessante. Um termo como "america" aparece no topo da lista porque, podemos pensar, é muito usado nos discursos dos políticos e menos em outros campos. Ao estabelecer o limiar ainda mais baixo, em 0.002, obtemos um melhor resumo do discurso. Como seria útil ver mais do que as dez linhas padrão, vamos usar a função `print` (imprimir) junto com a opção `n` (de número) definida como 15 para que possamos ver mais linhas. - - - -```{r} - -print(filter(tabela, frequency < 0.002), n = 15) - -``` - - - -Isto agora nos mostra o seguinte resultado: - - - -```{r} - ->#A tibble: 463 x 4 - -word count language frequency - - - -1 laughter 11. en 0.000643 - -2 voices 8. en 0.00189 - -3 allies 4. en 0.000844 - -4 harder 4. en 0.00152 - -5 qaida 4. en 0.000183 - -6 terrorists 4. en 0.00122 - -7 bipartisan 3. en 0.000145 - -8 generations 3. en 0.00123 - -9 stamp 3. en 0.00166 - -10 strongest 3. en 0.000591 - -11 syria 3. en 0.00136 - -12 terrorist 3. en 0.00181 - -13 tougher 3. en 0.000247 - -14 weaken 3. en 0.000181 - -15 accelerate 2. en 0.000544 - ->#... with 448 more rows - -``` - -Os resultados parecem sugerir alguns dos temas principais deste discurso, como “syria” (Síria), “terrorist” (terrorista) e “qaida” (Qaeda) (o nome al-qaida foi dividido em “al” e “qaida” pelo tokenizador). - - - -## Sumarizar o documento - - - -Para fornecer informações contextuais para o conjunto de dados que estamos analisando, temos uma tabela com metadados sobre cada um dos discursos do Estado da União. Vamos carregá-la em R: - - ```{r} - -metadados <- read_csv(sprintf("%s/%s", base_url, "metadata.csv")) - -metadados - -``` - - -As primeiras dez linhas do grupo de dados aparecem assim: - - -```{r} - ->#A tibble: 236 x 4 - -president year party sotu_type - - - -1 George Washington 1790 Nonpartisan speech - -2 George Washington 1790 Nonpartisan speech - -3 George Washington 1791 Nonpartisan speech - -4 George Washington 1792 Nonpartisan speech - -5 George Washington 1793 Nonpartisan speech - -6 George Washington 1794 Nonpartisan speech - -7 George Washington 1795 Nonpartisan speech - -8 George Washington 1796 Nonpartisan speech - -9 John Adams 1797 Federalist speech - -10 John Adams 1798 Federalist speech - ->#... with 226 more rows - -``` - - -Temos o nome do presidente, o ano, o partido político do presidente e o formato de discurso do Estado da União (oral ou escrito) para cada discurso no conjunto. O discurso de 2016 está na linha 236 dos metadados que, por acaso, é a última linha. - - - -Na próxima seção, pode ser útil resumir os dados para um discurso numa única linha de texto. Podemos fazer isto extraindo as cinco palavras mais frequentes com uma frequência inferior a 0,002% no *Corpus de um trilhão de palavras do Google* e combinando isso com dados sobre o presidente e o ano. - - -```{r} - -tabela <- filter(tabela, frequency < 0.002) - -resultado <- c(metadados$president[236], metadados$year[236], tabela$word[1:5]) - -paste(resultado, collapse = "; ") - -``` - - - -Isto deveria dar-nos o seguinte resultado: - - - -```{r} - -[1] "Barack Obama; 2016; laughter; voices; allies; harder; qaida" - -[1] “Barack Obama; 2016; risadas; vozes; aliados; mais duro; qaeda” - -``` - -Esta linha capta tudo sobre o discurso? É evidente que não. O processamento de texto nunca substituirá a leitura atenta de um texto, mas ajuda a dar um resumo de alto nível das questões discutidas ("risadas" aparecem aqui porque as reações do público são anotadas no texto do discurso). Este resumo é útil de várias maneiras. Pode fornecer um título ad-hoc ou resumo para um documento que não tenha estas informações; pode servir para lembrar aos leitores que leram ou ouviram o discurso quais foram os principais temas discutidos; e compilar vários resumos com uma única ação pode mostrar padrões em grande escala que muitas vezes se perdem em grandes corpus. É a este último uso que recorremos agora ao aplicar as técnicas desta seção a um grupo maior de discursos do Estado da União. - - -# Análise dos discursos do Estado da União de 1790 a 2016 - -## Carregar o corpus - - -A primeira coisa a fazer para analisar o corpus de discursos do Estado da União é carregá-los em R. Isto envolve as mesmas funções `paste` (colar) e `readLines` (ler linhas) como antes, mas temos que gerar um loop `for` (para) que executa as funções nos 236 ficheiros de texto. Estas são combinadas com a função `c`. - - - -```{r} - -ficheiros <- sprintf("%s/sotu_text/%03d.txt", base_url, 1:236) - -texto <- c() - -for (f in ficheiros) { - -texto <- c(texto, paste(readLines(f), collapse = "\n")) - -} - -``` - -Esta técnica carrega todos os ficheiros um a um do Github. Opcionalmente, é possível baixar um arquivo zip (comprimido) com o corpus completo e carregar os ficheiros manualmente. Esta técnica é descrita na próxima seção. - - -## Forma alternativa de carregar o corpus (opcional) - -Pode fazer o download do corpus aqui: [sotu_text.zip](/assets/basic-text-processing-in-r/sotu_text.zip). Descompacte o repositório em algum lugar no seu computador e defina a variável `input_loc` (local de upload) para o caminho do diretório onde o arquivo foi descompactado. Por exemplo, se os ficheiros estão na área de trabalho de um computador macOS e o usuário é o stevejobs, `input_loc` deve ser: - - ```{r} - -input_loc <- "/Users/stevejobs/Desktop/sotu_text" - -``` - -Uma vez feito, pode usar o seguinte bloco de código para carregar todos os textos: - - ```{r} - -ficheiros <- dir(input_loc, full.names = TRUE) - -texto <- c() - -for (f in ficheiros) { - -texto <- c(texto, paste(readLines(f), collapse = "\n")) - -} - -``` - - -É possível usar esta mesma técnica para carregar seu próprio corpus de textos. - - -## Análise exploratória - - -Uma vez mais, com a função `tokenize_words`, podemos calcular o comprimento de cada discurso em número de palavras. - - ```{r} - -palavras <- tokenize_words(texto) - -sapply(palavras, length) - -``` - -Existe um padrão temporal na duração dos discursos? Como se compara a duração dos discursos de outros presidentes com os de Franklin D. Roosevelt, Abraham Lincoln e George Washington? - - -A melhor maneira de descobrir é criando um gráfico de dispersão. É possível construir um usando a função `qplot` (gráfico), com o ano (year) no eixo x ou horizontal e o número de palavras (lenght) no eixo y ou vertical. - -```{r} - -qplot(metadados$year, sapply(palavras, length)) + labs(x = "Ano", y = "Número de palavras") - -``` - -Isto cria um gráfico como este: - -![Number of words in each State of the Union Address plotted by year.](/images/basic-text-processing-in-r/sotu-number-of-words.jpg)Número de palavras em cada discurso do Estado da União por ano. - -Parece que a maioria dos discursos aumentaram de 1790 a 1850 e depois aumentaram novamente no final do século XIX. A duração diminuiu drasticamente em torno da Primeira Guerra Mundial, com alguns pontos discrepantes espalhados ao longo do século XX. - - -Existe alguma razão por trás dessas mudanças? Para explicar esta variação, podemos definir a cor dos pontos para denotar se são discursos que foram apresentados por escrito ou falados. O comando para fazer este gráfico envolve apenas uma pequena mudança no comando do gráfico: - - -```{r} - -qplot(metadados$year, sapply(palavras, length), color = metadados$sotu_type) + labs(x = "Ano", y = "Número de palavras", color = "Modalidade do discurso") - -``` - -Isto produz o seguinte gráfico: - -![Number of words in each State of the Union Address plotted by year, with color denoting whether it was a written or oral message.](/images/basic-text-processing-in-r/sotu-number-of-words-and-type.jpg)Número de palavras em cada discurso do Estado da União organizado por ano e com a cor denotando se se tratava de um discurso escrito ou oral. - - -Vemos que o aumento no século XIX foi quando os discursos se tornaram documentos escritos e que a queda drástica foi quando Woodrow Wilson (28º Presidente dos Estados Unidos, entre 1913 e 1921) rompeu com a tradição e deu o seu discurso sobre o Estado da União oralmente no Congresso. Os pontos discrepantes que vimos anteriormente eram discursos proferidos por escrito após a Segunda Guerra Mundial. - - - -## Análise estilométrica - - -A estilometria, o estudo linguístico do estilo, faz uso extensivo de métodos computacionais para descrever o estilo de escrita de um autor. Com o nosso corpus, é possível detectar mudanças no estilo de escrita ao longo dos séculos XIX e XX. Um estudo estilométrico mais formal, geralmente, envolve o uso de código de análise sintática ou de reduções dimensionais algorítmicas complexas, tais como a análise dos principais componentes a serem estudados ao longo do tempo e entre autores. Neste tutorial, continuaremos a nos concentrar no estudo do comprimento das frases. - -O corpus pode ser dividido em frases usando a função `tokenize_sentences`. Neste caso, o resultado é uma lista com 236 objetos, cada um representando um documento específico. - - - -```{r} - -frases <- tokenize_sentences(texto) - -``` - - - -Em seguida, queremos dividir cada frase em palavras. A função `tokenize_words` pode ser utilizada, mas não diretamente sobre a lista de objetos `frases`. Poderíamos fazer isso com um loop `for` de novo, mas há uma forma mais simples de o fazer. A função `sapply` oferece uma aproximação mais direta. Aqui, queremos aplicar a segmentação de palavras individualmente a cada documento e, para isso, esta função é perfeita. - - - -```{r} - -frases_palavras <- sapply(frases, tokenize_words) - -``` - -Agora, temos uma lista (com cada elemento representando um documento) de listas (com cada elemento representando as palavras de uma dada frase). O resultado que precisamos é uma lista de objetos que forneça o comprimento de cada frase num dado documento. Para isto, combinamos o loop `for` com a função `sapply`. - - - -```{r} - -comprimento_frases <- list() - -for (i in 1:nrow(metadados)) { - -comprimento_frases[[i]] <- sapply(frases_palavras[[i]], length) - -} - -``` - - -O resultado de `comprimento_frases` pode ser visualizado numa linha temporal. Primeiro, precisamos de resumir o comprimento de todas as frases de um documento a um único número. A função `median` (mediana), que encontra o 50º percentil dos dados inseridos, é uma boa opção para resumir as frases, porque não será muito afectada por possíveis erros de segmentação que podem ter criado uma frase artificialmente longa [^11]. - - - -```{r} - -mediana_comprimento_frases <- sapply(comprimento_frases, median) - -``` - - -Agora, criamos um diagrama com essa variável junto com os anos dos discursos utilizando, mais uma vez, a função `qplot`. - - - -```{r} - -qplot(metadados$year, mediana_comprimento_frases) + labs(x = "Ano", y = "Mediana do comprimento das frases") - -``` - - ![Median sentence length for each State of the Union Address.](/images/basic-text-processing-in-r/sotu-sentence-length.jpg)Duração mediana das frases por discurso do Estado da União. - -O gráfico mostra-nos uma forte tendência geral de frases mais curtas nos dois séculos do corpus. Lembre-se que alguns discursos no final da segunda metade do século XX eram longos e escritos, muito parecidos com os do século XIX. É particularmente interessante que estes não se destaquem em se tratando de mediana do comprimento das frases. - - -Para tornar esse padrão ainda mais explícito, é possível adicionar uma linha de tendência no gráfico com a função `geom_smooth` (geometrização suave). - - -```{r} - -qplot(metadados$year, mediana_comprimento_frases) + geom_smooth() + labs(x = "Ano", y = "Mediana do comprimento das frases") - -``` - ![Median sentence length for each State of the Union Address, with a smoothing line.](/images/basic-text-processing-in-r/sotu-sentence-length-smooth.jpg)Comprimento mediano de cada discurso do Estado da União com uma linha de tendência. - - -As linhas de tendência são um ótimo complemento aos gráficos. Elas possuem a função dupla de mostrar a tendência geral dos dados no tempo, enquanto destacam pontos atípicos ou periféricos. - - - -## Resumo do documento - - - -Como tarefa final, queremos aplicar a função de resumo simples que utilizamos na seção anterior a cada um dos documentos desse corpus mais amplo. Precisamos utilizar um loop outra vez, mas o código interno permanece quase o mesmo, com a exceção de que precisamos guardar os resultados como um elemento do vetor `description` (descrição). - - - -```{r} - -description <- c() - -``` - -```{r} - -for (i in 1:length(palavras)) { - -tabela <- table(palavras[[i]]) - -tabela <- data_frame(word = names(tabela), count = as.numeric(tabela)) - -tabela <- arrange(tabela, desc(count)) - -tabela <- inner_join(tabela, palavras_frequentes) - -tabela <- filter(tabela, frequency < 0.002) - -resultado <- c(metadados$president[i], metadados$year[i], tabela$word[1:5]) - -description <- c(description, paste(resultado, collapse = "; ")) - -} - -``` - - - -Enquanto se processa cada ficheiro como resultado da função `inner_join`, é possível ver uma linha que diz **Joining, by = “word”**. Como o loop pode demorar um ou mais minutos o processamento da função, esta linha serve para assegurar que o código está processando os ficheiros. Podemos ver o resultado do loop escrevendo `description` no console, mas, com a função `cat`, obtemos uma visão mais nítida dos resultados. - - ```{r} - -cat(description, sep = "\n") - -``` - - -Os resultados oferecem uma linha para cada discurso do Estado da União. Aqui, por exemplo, estão as linhas dos presidentes Bill Clinton, George W. Bush e Barack Obama: - - ``` - ->William J. Clinton; 1993; deficit; propose; incomes; invest; decade - -William J. Clinton; 1994; deficit; renew; ought; brady; cannot - -William J. Clinton; 1995; ought; covenant; deficit; bureaucracy; voted - -William J. Clinton; 1996; bipartisan; gangs; medicare; deficit; harder - -William J. Clinton; 1997; bipartisan; cannot; balanced; nato; immigrants - -William J. Clinton; 1998; bipartisan; deficit; propose; bosnia; millennium - -William J. Clinton; 1999; medicare; propose; surplus; balanced; bipartisan - -William J. Clinton; 2000; propose; laughter; medicare; bipartisan; prosperity - -George W. Bush; 2001; medicare; courage; surplus; josefina; laughter - -George W. Bush; 2002; terrorist; terrorists; allies; camps; homeland - -George W. Bush; 2003; hussein; saddam; inspectors; qaida; terrorists - -George W. Bush; 2004; terrorists; propose; medicare; seniors; killers - -George W. Bush; 2005; terrorists; iraqis; reforms; decades; generations - -George W. Bush; 2006; hopeful; offensive; retreat; terrorists; terrorist - -George W. Bush; 2007; terrorists; qaida; extremists; struggle; baghdad - -George W. Bush; 2008; terrorists; empower; qaida; extremists; deny - -Barack Obama; 2009; deficit; afford; cannot; lending; invest - -Barack Obama; 2010; deficit; laughter; afford; decade; decades - -Barack Obama; 2011; deficit; republicans; democrats; laughter; afghan - -Barack Obama; 2012; afford; deficit; tuition; cannot; doubling - -Barack Obama; 2013; deficit; deserve; stronger; bipartisan; medicare - -Barack Obama; 2014; cory; laughter; decades; diplomacy; invest - -Barack Obama; 2015; laughter; childcare; democrats; rebekah; republicans - -Barack Obama; 2016; laughter; voices; allies; harder; qaida - -``` - -Como já foi referido, estes resumos temáticos não são, de forma alguma, um substituto para uma leitura atenta de cada documento. Eles servem, no entanto, como um resumo geral e de alto nível de cada presidência. Vemos, por exemplo, o foco inicial no déficit durante os primeiros anos da presidência de Bill Clinton, sua mudança em direção ao bipartidarismo enquanto a Câmara e o Senado se inclinavam para os republicanos em meados dos anos 1990, e uma mudança em direção à reforma do Medicare no final de sua presidência. Os discursos de George W. Bush concentraram-se, principalmente, no terrorismo, com exceção do discurso de 2001 proferido antes dos ataques terroristas de 11 de setembro. Barack Obama voltou a preocupar-se com a economia sob a sombra da recessão de 2008. A palavra "riso" aparece frequentemente porque é adicionada às transcrições quando o riso do público faz com que o orador pare. - - - -# Próximos passos - - - -Neste pequeno tutorial exploramos algumas maneiras básicas de analisar dados textuais com a linguagem de programação R. Há várias direções que se pode tomar para se aprofundar nas novas técnicas de análise de texto. Aqui estão três exemplos particularmente interessantes: - - - -* conduzir uma análise completa com base em processamento de linguagem natural (NLP) num texto para extrair características tais como nomes de entidades, categorias gramaticais e relações de dependência. Estes estão disponíveis em vários pacotes R, incluindo o **cleanNLP**[^12], e para vários idiomas. - -* realizar uma modelagem por tópicos (*topic models*) para detectar discursos específicos no corpus usando pacotes como **mallet**[^13] e **topicmodels**[^14]. - -* aplicar técnicas de redução de dimensionalidade para traçar tendências estilísticas ao longo do tempo ou entre diferentes autores. Por exemplo, o pacote **tsne** [^15] realiza uma poderosa forma de redução de dimensionalidade particularmente favorável a gráficos detalhados. - - -Existem muitos tutoriais genéricos para estes três exemplos, assim como uma documentação detalhada dos pacotes[^16]. Esperamos oferecer tutoriais focados em aplicações históricas deles no futuro. - - - -# Notas - -[^1]: O nosso corpus contém 236 discursos sobre o Estado da União. Dependendo do que for contado, este número pode ser ligeiramente superior ou inferior. - -[^2]: Taryn Dewar, “R Basics with Tabular Data,” Programming Historian (05 September 2016), [/lessons/r-basics-with-tabular-data](/en/lessons/r-basics-with-tabular-data). - -[^3]: Hadley Wickham. “tidyverse: Easily Install and Load ‘Tidyverse’ Packages”. R Package, Version 1.1.1. https://cran.r-project.org/web/packages/tidyverse/index.html - -[^4]: Lincoln Mullen and Dmitriy Selivanov. “tokenizers: A Consistent Interface to Tokenize Natural Language Text Convert”. R Package, Version 0.1.4. https://cran.r-project.org/web/packages/tokenizers/index.html - -[^5]: Tenha em mente que os nomes das funções, como `library` e `install.packages`, sempre estarão em inglês. Apesar disso, colocamos uma tradução do significado para facilitar a compreensão e traduzimos os nomes das variáveis [N. de T.]. - -[^6]: Tradução publicada pela Folha em português (13 de janeiro de 2016) [https://www1.folha.uol.com.br/mundo/2016/01/1729011-leia-a-integra-do-ultimo-discurso-do-estado-da-uniao-de-obama.shtml](https://www1.folha.uol.com.br/mundo/2016/01/1729011-leia-a-integra-do-ultimo-discurso-do-estado-da-uniao-de-obama.shtml) [N. de T.] - -[^7]: Foi feito o download de todos os discursos presidenciais do The American Presidency Project da University of California Santa Barbara (acesso em 11 de novembro de 2016) [http://www.presidency.ucsb.edu/sou.php](http://www.presidency.ucsb.edu/sou.php) - -[^8]: Aqui, voltamos para a versão original do discurso, em inglês, para dar prosseguimento à análise e, particularmente, para observarmos a lista de palavras mais utilizadas em inglês. Continuaremos a traduzir os nomes das variáveis e das funções para facilitar a compreensão em português [N. de T.]. - -[^9]: Aqui, optamos por nomear as colunas da tabela em inglês, como *word* (palavra) e *count* (contagem), para facilitar a interação com o conjunto de dados que será introduzido depois com a função `inner_join` [N. de T.]. - -[^10]: Peter Norvig. “Google Web Trillion Word Corpus”. (Accedido el 11 de noviembre de 2016) http://norvig.com/ngrams/. - -[^11]: Isto ocorre em alguns discursos escritos do Estado da União, quando uma lista com numeração é segmentada numa única frase longa. - -[^12]: Taylor Arnold. “cleanNLP: A Tidy Data Model for Natural Language Processing”. R Package, Version 0.24. https://cran.r-project.org/web/packages/cleanNLP/index.html - -[^13]: David Mimno. “mallet: A wrapper around the Java machine learning tool MALLET”. R Package, Version 1.0. https://cran.r-project.org/web/packages/mallet/index.html - -[^14]: Bettina Grün and Kurt Hornik. “https://cran.r-project.org/web/packages/topicmodels/index.html”. R Package, Version 0.2-4. https://cran.r-project.org/web/packages/topicmodels/index.html - -[^15]: Ver o artigo" t-distributed stochastic neighbor embedding" na Wikipedia (em inglês). https://en.wikipedia.org/wiki/T-distributed_stochastic_neighbor_embedding [N. de T.] - -[^16]: Ver, por exemplo, o livro dos autores Taylor Arnold and Lauren Tilton. *Humanities Data in R: Exploring Networks, Geospatial Data, Images, and Text.* Springer, 2015. +--- +title: Processamento Básico de Texto em R +slug: processamento-basico-texto-r +layout: lesson +date: 2017-03-27 +translation_date: 2021-07-13 +authors: +- Taylor Arnold +- Lauren Tilton +reviewers: +- Brandon Walsh +- John Russell +editors: +- Jeri Wieringa +translator: +- Diana Rebelo Rodriguez +translation-editor: +- Jimmy Medeiros +translation-reviewer: +- Rômulo Predes +- Maria Guedes +difficulty: 2 +review-ticket: https://github.com/programminghistorian/ph-submissions/issues/381 +activity: analyzing +topics: [distant-reading, r, data-visualization] +abstract: "Aprenda a usar o R para analisar padrões de alto nível em textos, aplicar métodos estilométricos ao longo do tempo e entre autores, assim como a usar métodos para resumir informações para descrever um corpus" +original: basic-text-processing-in-r +avatar_alt: Crianças com livros junto a uma biblioteca itinerante +doi: 10.46430/phpt0013 +--- + +{% include toc.html %} + +## Objetivos + +Hoje em dia há uma quantidade substancial de dados históricos disponíveis em forma de texto simples e digitalizado. Alguns exemplos comuns são cartas, artigos de jornal, notas pessoais, diários, documentos legais e transcrições de discursos. Enquanto algumas aplicações de softwares independentes têm ferramentas para analisar dados textuais, o uso de linguagens de programação apresenta uma maior flexibilidade para analisar um corpus de documentos de texto. Neste tutorial, guiaremos os usuários no básico da análise de texto na linguagem de programação R. A nossa abordagem envolve usar apenas a tokenização que produz uma análise sintática do texto, com elementos como palavras, frases e orações. No final da presente lição, os usuários poderão: + +* utilizar análises exploratórias para verificar erros e detectar padrões gerais; +* aplicar métodos básicos de estilometria através do tempo e entre autores; +* conseguir resumir o conteúdo do documento para oferecer uma descrição geral do corpus. + +Para esta lição, será utilizado um conjunto de dados com os textos dos discursos presidenciais dos Estados Unidos da América sobre o [Estado da União](https://pt.wikipedia.org/wiki/Discurso_sobre_o_Estado_da_Uni%C3%A3o)[^1]. + +Assumimos que os usuários possuem um conhecimento básico da linguagem de programação R. A lição [Noções básicas de R com dados tabulares](/en/lessons/r-basics-with-tabular-data)[^2] (em inglês) é um excelente guia que contém todos os conhecimentos em R necessários aqui, tais como instalar e abrir R, instalar e carregar pacotes e importar e trabalhar com dados básicos de R. Os usuários podem fazer o download do R indicado para os seus sistemas operativos em [The Comprehensive R Archive Network](https://cran.r-project.org/). Ainda que não seja um pré-requisito, recomendamos que os novos usuários façam o download do [R Studio](https://www.rstudio.com/products/rstudio/#Desktop), um ambiente de desenvolvimento de código aberto para escrever e executar programas em R. + +Todo o código desta lição foi testado em R na versão 4.0.2, mas esperamos que ele rode adequadamente em qualquer versão futura do programa. + +# Um pequeno exemplo + +## Configuração de pacotes + +É necessário instalar dois pacotes de R antes de começar com o tutorial: o **tidyverse**[^3] e o **tokenizers**[^4]. O primeiro proporciona ferramentas convenientes para ler e trabalhar com grupos de dados e o segundo contém funções para dividir os dados do texto em palavras e orações. Para instalá-los, abra o R no seu computador e execute essas duas linhas de código no console: + +```{r} + +install.packages("tidyverse") + +install.packages("tokenizers") + +``` + +Dependendo da configuração do seu sistema, pode ser aberta uma caixa de diálogo solicitando a escolha de um lugar da internet para fazer o download. Caso apareça, escolha a opção mais perto de sua localização atual. O download e a instalação, provavelmente, irão ocorrer automaticamente. + +Agora que esses pacotes estão no seu computador, precisamos de avisar ao R que eles devem ser carregados para o uso. Isso é feito através do comando `library`. Pode ser que apareçam alguns avisos enquanto carregam outras dependências, mas eles podem ser ignorados sem nenhum problema. Execute essas duas linhas de código no console para habilitar o uso dos pacotes: + +```{r} + +library(tidyverse) + +library(tokenizers) + +``` + +O comando `install.packages` (instalar pacotes) só é necessário executar na primeira vez em que iniciar este tutorial, o comando `library` deverá ser executado todas as vezes que se inicia o R[^5]. + +## Segmentação de palavras + +Nesta seção, vamos trabalhar com um único parágrafo. Este exemplo pertence ao início do último discurso de Barack Obama sobre o Estado da União, em 2016. Para facilitar a compreensão do tutorial nesta primeira etapa, estudamos este parágrafo traduzido para português[^6]. + +Para carregar o texto, copie e cole o seguinte no console do R: + +``` + +texto <- paste("Também entendo que, pelo fato de estarmos em temporada eleitoral, as expectativas quanto ao que vamos realizar este ano são baixas. Mesmo assim, senhor presidente da Câmara, aprecio a atitude construtiva que o senhor e os outros líderes assumiram no final do ano passado para aprovar o orçamento e perpetuar a redução dos impostos sobre as famílias trabalhadoras. Desse modo, espero que possamos colaborar este ano sobre questões que são prioritárias para ambos os partidos, como a reforma da justiça criminal e a assistência às pessoas dependentes de drogas vendidas com receita médica. Quem sabe possamos surpreender os cínicos novamente.") + +``` + +Depois de executar o comando (clicando em “Enter”), escreva a palavra `texto` no console e pressione Enter. O R irá mostrar o conteúdo do objeto texto, uma vez que ele contém parte do discurso proferido por Obama. + +O primeiro passo do processamento de texto envolve utilizar a função `tokenize_words` (segmentar palavras) do pacote **tokenizers** para dividir o texto en palavras individuais. + +```{r} + +palavras <- tokenize_words(texto) + +``` + +Para apresentar os resultados na janela do console do R, mostrando tanto o resultado tokenizado como a posição de cada elemento na margem esquerda, execute palavras no console: + + +```{r} + +palavras + +``` + +Isso produz o seguinte resultado: + + +``` + +> [[1]] + +[1] "também" "entendo" "que" "pelo" "fato" + +[6] "de" "estarmos" "em" "temporada" "eleitoral" + +[11] "as" "expectativas" "quanto" "ao" "que" + +[16] "vamos" "realizar" "este" "ano" "são" + +[21] "baixas" "mesmo" "assim" "senhor" "presidente" + +[26] "da" "câmara" "aprecio" "a" "atitude" + +[31] "construtiva" "que" "o" "senhor" "e" + +[36] "os" "outros" "líderes" "assumiram" "no" + +[41] "final" "do" "ano" "passado" "para" + +[46] "aprovar" "o" "orçamento" "e" "perpetuar" + +[51] "a" "redução" "dos" "impostos" "sobre" + +[56] "as" "famílias" "trabalhadoras" "desse" "modo" + +[61] "espero" "que" "possamos" "colaborar" "este" + +[66] "ano" "sobre" "questões" "que" "são" + +[71] "prioritárias" "para" "ambos" "os" "partidos" + +[76] "como" "a" "reforma" "da" "justiça" + +[81] "criminal" "e" "a" "assistência" "às" + +[86] "pessoas" "dependentes" "de" "drogas" "vendidas" + +[91] "com" "receita" "médica" "quem" "sabe" + +[96] "possamos" "surpreender" "os" "cínicos" "novamente" + +``` + +Como o texto carregado mudou depois de se executar essa função de R? Ela removeu toda a pontuação, dividiu o texto em palavras individuais e converteu tudo para minúsculas. Em breve, veremos porque todas essas intervenções são úteis para a nossa análise. + +Quantas palavras existem neste fragmento de texto? Se usamos a função `length` (comprimento) diretamente no objeto `palavras`, o resultado não é muito útil. + + + +```{r} + +length(palavras) + +``` + + +O resultado é igual a: + + +```{r} + +[1] 1 + +``` + +O comprimento equivale a 1 porque a função `tokenize_words` retorna uma lista de objetos com uma entrada por documento carregado. O nosso carregamento possui apenas um documento, então a lista também possui apenas um elemento. Para ver as palavras dentro do primeiro documento, utilizamos o símbolo [], da seguinte forma: `[[1]]`. O objetivo é selecionar apenas o primeiro elemento da lista: + + +```{r} + +length(palavras[[1]]) + +``` + +O resultado é `100`, indicando que existem 100 palavras neste parágrafo. + +A separação do documento em palavras individuais torna possível calcular quantas vezes cada palavra foi utilizada durante o texto. Para fazer isso, primeiro aplicamos a função `table` (tabela) nas palavras do primeiro (e, neste caso, único) documento e depois separamos os nomes e os valores da tabela num novo objeto chamado _data frame_. O uso de um quadro de dados em R é semelhante ao uso de uma tabela numa base de dados. Esses passos, em conjunto com a impressão do resultado, são obtidos com as seguintes linhas de código: + + +```{r} + +tabela <- table(palavras[[1]]) + +tabela <- data_frame(palavra = names(tabela), contagem = as.numeric(tabela)) + +tabela + +``` + +O resultado deste comando deve aparecer assim no seu console (*tibble* é um tipo específico de _data frame_ criado no pacote [Tidy Data](https://en.wikipedia.org/wiki/Tidy_data)): + +``` + +# A tibble: 77 x 2 + +palavra contagem + + + +1 a 4. + +2 ambos 1. + +3 ano 3. + +4 ao 1. + +5 aprecio 1. + +6 aprovar 1. + +7 as 2. + +8 às 1. + +9 assim 1. + +10 assistência 1. + +# ... with 67 more rows + +``` + + +Há uma quantidade substancial de informação nesta amostra. Vemos que existem 77 palavras únicas, como indica a dimensão da tabela. As 10 primeiras fileiras do conjunto de dados são apresentadas, com a segunda coluna mostrando quantas vezes a palavra da primeira coluna foi utilizada. Por exemplo, “ano” foi usada três vezes, enquanto “aprovar”, apenas uma vez. + + + +Também podemos ordenar a tabela usando a função `arrange` (organizar). Esta função precisa do conjunto de dados a utilizar, aqui `tabela`, e depois o nome da coluna que serve de referência para ordená-lo. A função `desc` no segundo argumento indica que queremos ordenar em ordem decrescente. + + + +```{r} + +arrange(tabela, desc(contagem)) + +``` + + +E agora o resultado será: + + + +```{r} + +# A tibble: 77 x 2 + +palavra contagem + + + +1 que 5. + +2 a 4. + +3 ano 3. + +4 e 3. + +5 os 3. + +6 as 2. + +7 da 2. + +8 de 2. + +9 este 2. + +10 o 2. + +# … with 67 more rows + +``` + + + +As palavras mais comuns são pronomes e palavras funcionais tais como "que", "a", "e" e "os". Observe como a análise é facilitada pelo uso da versão em minúsculas de cada palavra. Qualquer contagem prevê que a palavra possa estar no início ou no meio da frase. + + + +Uma técnica popular é carregar uma lista de palavras frequentemente usadas e eliminá-las antes da análise formal. As palavras em tal lista são chamadas "*stopwords*" ou "palavras vazias" e são geralmente pronomes, conjugações dos verbos mais comuns e conjunções. Neste tutorial, temos uma variação sutil desta técnica. + + + +## Detectar frases + + + +O pacote **tokenizer** também contém a função `tokenize_sentences`, que detecta limites de frases, ao invés de palavras. Ele pode ser executado da seguinte maneira: + + + +```{r} + +frases <- tokenize_sentences(texto) + +frases + +``` + + + +Com o resultado: + + + +```{r} + +> frases + +[[1]] + +[1] "Também entendo que, pelo fato de estarmos em temporada eleitoral, as expectativas quanto ao que vamos realizar este ano são baixas." + +[2] "Mesmo assim, senhor presidente da Câmara, aprecio a atitude construtiva que o senhor e os outros líderes assumiram no final do ano passado para aprovar o orçamento e perpetuar a redução dos impostos sobre as famílias trabalhadoras." + +[3] "Desse modo, espero que possamos colaborar este ano sobre questões que são prioritárias para ambos os partidos, como a reforma da justiça criminal e a assistência às pessoas dependentes de drogas vendidas com receita médica." + +[4] "Quem sabe possamos surpreender os cínicos novamente." + +``` + + + +O resultado é um vetor de caracteres, um objeto unidimensional que consiste apenas em elementos representados como caracteres. Observe que o resultado marcou cada frase como um elemento separado. + + + +É possível conectar o resultado da divisão das frases com o resultado da divisão das palavras. Se executarmos a divisão de frases do parágrafo com a função `tokenize_words`, cada frase será tratada como um único documento. Execute isto usando a seguinte linha de código e veja se o resultado é o esperado, a segunda linha de comando serve para imprimir o resultado. + + + +```{r} + +frases_palavras <- tokenize_words(frases[[1]]) + +frases_palavras + +``` + + + +Se olharmos para o tamanho do resultado diretamente, podemos ver que existem quatro “documentos” no objeto `frases_palavras`: + + + +```{r} + +length(frases_palavras) + +``` + + + +Ao acessar cada uma delas diretamente, é possível saber quantas palavras há em cada frase do parágrafo: + + + +```{r} + +length(frases_palavras[[1]]) + +length(frases_palavras[[2]]) + +length(frases_palavras[[3]]) + +length(frases_palavras[[4]]) + +``` + + + +Isto pode demandar um pouco de esforço, mas felizmente existe uma maneira mais simples de o fazer. A função `sapply` executa a função no segundo argumento para cada elemento do primeiro argumento. Como resultado, podemos calcular a extensão de cada frase do primeiro parágrafo com uma única linha de código: + + + +```{r} + +sapply(frases_palavras, length) + +``` + + + +O resultado agora será assim: + + + +```{r} + +[1] 21 37 35 7 + +``` + + + +Podemos ver que existem quatro frases com um comprimento de 21, 37, 35 e 7 palavras. Utilizaremos esta função para trabalharmos com documentos maiores. + + + +# Analisar o discurso sobre o Estado da União de Barack Obama em 2016 + + + +## Análise exploratória + + + +Vamos aplicar as técnicas da seção anterior a um discurso sobre o Estado da União completo, desta vez, usando o original em inglês. Por uma questão de coerência, vamos usar o mesmo discurso de 2016 de Barack Obama. Agora, vamos carregar os dados de um ficheiro, uma vez que a cópia direta é difícil em grande escala. + + + +Para tal, vamos combinar a função `readLines` (ler linhas) para carregar o texto em R e a função `paste` (colar) para combinar todas as linhas num único objeto. Vamos criar a URL do arquivo de texto usando a função `sprintf`, uma vez que este formato permitirá que ele seja facilmente aproveitado para outros recursos online[^7],[^8]. + + + +```{r} + +base_url <- "https://raw.githubusercontent.com/programminghistorian/jekyll/gh-pages/assets/basic-text-processing-in-r/" + +url <- sprintf("%s/sotu_text/236.txt", base_url) + +texto <- paste(readLines(url), collapse = "\n") + +``` + + + +Como antes, vamos segmentar o texto e ver o número de palavras no documento. + + + +```{r} + +palavras <- tokenize_words(texto) + +length(palavras[[1]]) + +``` + + + +Vemos que este discurso contém um total de `6113` palavras. Ao combinar as funções `table` (tabela), `data_frame` e `arrange` (organizar), como fizemos no exemplo anterior, obtemos as palavras mais frequentes em todo o discurso. Ao fazer isso, observe como é fácil reutilizar o código anterior para repetir a análise num novo conjunto de dados. Este é um dos maiores benefícios de usar uma linguagem de programação para realizar uma análise baseada em dados [^9]. + + + +```{r} + +tabela <- table(palavras[[1]]) + +tabela <- data_frame(word = names(tabela), count = as.numeric(tabela)) + +tabela <- arrange(tabela, desc(count)) + +tabela + +``` + +O resultado deve ser: + + +```{r} + +>#A tibble: 1,590 x 2 + +word count + + + +1 the 281. + +2 to 209. + +3 and 189. + +4 of 148. + +5 that 125. + +6 we 124. + +7 a 120. + +8 in 105. + +9 our 96. + +10 is 72. + +>#... with 1,580 more rows + +``` + +Mais uma vez, palavras extremamente comuns como *the* ("o" ou "a"), *to* ("para") e *and* ("e") estão no topo da tabela. Estes termos não são particularmente esclarecedores se quisermos conhecer o assunto do discurso. Na realidade, queremos encontrar palavras que se destaquem mais neste texto do que num grande corpus externo em inglês. Para conseguir isso, precisamos de um conjunto de dados que forneça essas frequências. Aqui está o conjunto de dados de Peter Norviq usando o *Google Web Trillion Word Corpus* (Corpus de um trilhão de palavras da web do Google), coletado a partir dos dados compilados através do rastreamento de sites populares em inglês pelo Google [^10]: + + +```{r} + +palavras_frequentes <- read_csv(sprintf("%s/%s", base_url, "word_frequency.csv")) + +palavras_frequentes + +``` + + +A primeira coluna indica o idioma (sempre "en" para inglês neste caso), a segunda coluna - frequency - fornece a palavra em questão e a terceira coluna indica a percentagem com a qual ela aparece no *Corpus de um trilhão de palavras do Google*. Por exemplo, a palavra "for" aparece quase exatamente 1 vez a cada 100 palavras, pelo menos nos textos dos sites indexados pelo Google. + + + +Para combinar estas palavras frequentes com o conjunto de dados na `tabela` construída a partir do discurso do Estado da União, podemos usar a função `inner_join` (união interna). Esta função toma dois conjuntos de dados e combina-os em todas as colunas que têm o mesmo nome. Neste caso, a coluna comum é a chamada _word_ ("palavra"). + + + +```{r} + +tabela <- inner_join(tabela, palavras_frequentes) + +tabela + +``` + + + +Note que agora o nosso conjunto de dados tem duas colunas extras que fornecem o idioma (aqui relativamente pouco útil já que é sempre "en") e a frequência da palavra no corpus externo. Esta segunda nova coluna será muito útil, porque podemos filtrar linhas que têm uma frequência inferior a 0,1%, ou seja, que aparecem mais de uma vez em cada 1000 palavras: + + + +```{r} + +filter(tabela, frequency < 0.1) + +``` + + +Isto produz: + + +```{r} + +>#A tibble: 1,457 x 4 + +word count language frequency + + + +1 america 28. en 0.0232 + +2 people 27. en 0.0817 + +3 just 25. en 0.0787 + +4 world 23. en 0.0734 + +5 american 22. en 0.0387 + +6 work 22. en 0.0713 + +7 make 20. en 0.0689 + +8 want 19. en 0.0440 + +9 change 18. en 0.0358 + +10 years 18. en 0.0574 + +>#... with 1,447 more rows + +``` + + + +Esta lista está começando a se tornar mais interessante. Um termo como "america" aparece no topo da lista porque, podemos pensar, é muito usado nos discursos dos políticos e menos em outros campos. Ao estabelecer o limiar ainda mais baixo, em 0.002, obtemos um melhor resumo do discurso. Como seria útil ver mais do que as dez linhas padrão, vamos usar a função `print` (imprimir) junto com a opção `n` (de número) definida como 15 para que possamos ver mais linhas. + + + +```{r} + +print(filter(tabela, frequency < 0.002), n = 15) + +``` + + + +Isto agora nos mostra o seguinte resultado: + + + +```{r} + +>#A tibble: 463 x 4 + +word count language frequency + + + +1 laughter 11. en 0.000643 + +2 voices 8. en 0.00189 + +3 allies 4. en 0.000844 + +4 harder 4. en 0.00152 + +5 qaida 4. en 0.000183 + +6 terrorists 4. en 0.00122 + +7 bipartisan 3. en 0.000145 + +8 generations 3. en 0.00123 + +9 stamp 3. en 0.00166 + +10 strongest 3. en 0.000591 + +11 syria 3. en 0.00136 + +12 terrorist 3. en 0.00181 + +13 tougher 3. en 0.000247 + +14 weaken 3. en 0.000181 + +15 accelerate 2. en 0.000544 + +>#... with 448 more rows + +``` + +Os resultados parecem sugerir alguns dos temas principais deste discurso, como “syria” (Síria), “terrorist” (terrorista) e “qaida” (Qaeda) (o nome al-qaida foi dividido em “al” e “qaida” pelo tokenizador). + + + +## Sumarizar o documento + + + +Para fornecer informações contextuais para o conjunto de dados que estamos analisando, temos uma tabela com metadados sobre cada um dos discursos do Estado da União. Vamos carregá-la em R: + + ```{r} + +metadados <- read_csv(sprintf("%s/%s", base_url, "metadata.csv")) + +metadados + +``` + + +As primeiras dez linhas do grupo de dados aparecem assim: + + +```{r} + +>#A tibble: 236 x 4 + +president year party sotu_type + + + +1 George Washington 1790 Nonpartisan speech + +2 George Washington 1790 Nonpartisan speech + +3 George Washington 1791 Nonpartisan speech + +4 George Washington 1792 Nonpartisan speech + +5 George Washington 1793 Nonpartisan speech + +6 George Washington 1794 Nonpartisan speech + +7 George Washington 1795 Nonpartisan speech + +8 George Washington 1796 Nonpartisan speech + +9 John Adams 1797 Federalist speech + +10 John Adams 1798 Federalist speech + +>#... with 226 more rows + +``` + + +Temos o nome do presidente, o ano, o partido político do presidente e o formato de discurso do Estado da União (oral ou escrito) para cada discurso no conjunto. O discurso de 2016 está na linha 236 dos metadados que, por acaso, é a última linha. + + + +Na próxima seção, pode ser útil resumir os dados para um discurso numa única linha de texto. Podemos fazer isto extraindo as cinco palavras mais frequentes com uma frequência inferior a 0,002% no *Corpus de um trilhão de palavras do Google* e combinando isso com dados sobre o presidente e o ano. + + +```{r} + +tabela <- filter(tabela, frequency < 0.002) + +resultado <- c(metadados$president[236], metadados$year[236], tabela$word[1:5]) + +paste(resultado, collapse = "; ") + +``` + + + +Isto deveria dar-nos o seguinte resultado: + + + +```{r} + +[1] "Barack Obama; 2016; laughter; voices; allies; harder; qaida" + +[1] “Barack Obama; 2016; risadas; vozes; aliados; mais duro; qaeda” + +``` + +Esta linha capta tudo sobre o discurso? É evidente que não. O processamento de texto nunca substituirá a leitura atenta de um texto, mas ajuda a dar um resumo de alto nível das questões discutidas ("risadas" aparecem aqui porque as reações do público são anotadas no texto do discurso). Este resumo é útil de várias maneiras. Pode fornecer um título ad-hoc ou resumo para um documento que não tenha estas informações; pode servir para lembrar aos leitores que leram ou ouviram o discurso quais foram os principais temas discutidos; e compilar vários resumos com uma única ação pode mostrar padrões em grande escala que muitas vezes se perdem em grandes corpus. É a este último uso que recorremos agora ao aplicar as técnicas desta seção a um grupo maior de discursos do Estado da União. + + +# Análise dos discursos do Estado da União de 1790 a 2016 + +## Carregar o corpus + + +A primeira coisa a fazer para analisar o corpus de discursos do Estado da União é carregá-los em R. Isto envolve as mesmas funções `paste` (colar) e `readLines` (ler linhas) como antes, mas temos que gerar um loop `for` (para) que executa as funções nos 236 ficheiros de texto. Estas são combinadas com a função `c`. + + + +```{r} + +ficheiros <- sprintf("%s/sotu_text/%03d.txt", base_url, 1:236) + +texto <- c() + +for (f in ficheiros) { + +texto <- c(texto, paste(readLines(f), collapse = "\n")) + +} + +``` + +Esta técnica carrega todos os ficheiros um a um do Github. Opcionalmente, é possível baixar um arquivo zip (comprimido) com o corpus completo e carregar os ficheiros manualmente. Esta técnica é descrita na próxima seção. + + +## Forma alternativa de carregar o corpus (opcional) + +Pode fazer o download do corpus aqui: [sotu_text.zip](/assets/basic-text-processing-in-r/sotu_text.zip). Descompacte o repositório em algum lugar no seu computador e defina a variável `input_loc` (local de upload) para o caminho do diretório onde o arquivo foi descompactado. Por exemplo, se os ficheiros estão na área de trabalho de um computador macOS e o usuário é o stevejobs, `input_loc` deve ser: + + ```{r} + +input_loc <- "/Users/stevejobs/Desktop/sotu_text" + +``` + +Uma vez feito, pode usar o seguinte bloco de código para carregar todos os textos: + + ```{r} + +ficheiros <- dir(input_loc, full.names = TRUE) + +texto <- c() + +for (f in ficheiros) { + +texto <- c(texto, paste(readLines(f), collapse = "\n")) + +} + +``` + + +É possível usar esta mesma técnica para carregar seu próprio corpus de textos. + + +## Análise exploratória + + +Uma vez mais, com a função `tokenize_words`, podemos calcular o comprimento de cada discurso em número de palavras. + + ```{r} + +palavras <- tokenize_words(texto) + +sapply(palavras, length) + +``` + +Existe um padrão temporal na duração dos discursos? Como se compara a duração dos discursos de outros presidentes com os de Franklin D. Roosevelt, Abraham Lincoln e George Washington? + + +A melhor maneira de descobrir é criando um gráfico de dispersão. É possível construir um usando a função `qplot` (gráfico), com o ano (year) no eixo x ou horizontal e o número de palavras (lenght) no eixo y ou vertical. + +```{r} + +qplot(metadados$year, sapply(palavras, length)) + labs(x = "Ano", y = "Número de palavras") + +``` + +Isto cria um gráfico como este: + +![Number of words in each State of the Union Address plotted by year.](/images/basic-text-processing-in-r/sotu-number-of-words.jpg)Número de palavras em cada discurso do Estado da União por ano. + +Parece que a maioria dos discursos aumentaram de 1790 a 1850 e depois aumentaram novamente no final do século XIX. A duração diminuiu drasticamente em torno da Primeira Guerra Mundial, com alguns pontos discrepantes espalhados ao longo do século XX. + + +Existe alguma razão por trás dessas mudanças? Para explicar esta variação, podemos definir a cor dos pontos para denotar se são discursos que foram apresentados por escrito ou falados. O comando para fazer este gráfico envolve apenas uma pequena mudança no comando do gráfico: + + +```{r} + +qplot(metadados$year, sapply(palavras, length), color = metadados$sotu_type) + labs(x = "Ano", y = "Número de palavras", color = "Modalidade do discurso") + +``` + +Isto produz o seguinte gráfico: + +![Number of words in each State of the Union Address plotted by year, with color denoting whether it was a written or oral message.](/images/basic-text-processing-in-r/sotu-number-of-words-and-type.jpg)Número de palavras em cada discurso do Estado da União organizado por ano e com a cor denotando se se tratava de um discurso escrito ou oral. + + +Vemos que o aumento no século XIX foi quando os discursos se tornaram documentos escritos e que a queda drástica foi quando Woodrow Wilson (28º Presidente dos Estados Unidos, entre 1913 e 1921) rompeu com a tradição e deu o seu discurso sobre o Estado da União oralmente no Congresso. Os pontos discrepantes que vimos anteriormente eram discursos proferidos por escrito após a Segunda Guerra Mundial. + + + +## Análise estilométrica + + +A estilometria, o estudo linguístico do estilo, faz uso extensivo de métodos computacionais para descrever o estilo de escrita de um autor. Com o nosso corpus, é possível detectar mudanças no estilo de escrita ao longo dos séculos XIX e XX. Um estudo estilométrico mais formal, geralmente, envolve o uso de código de análise sintática ou de reduções dimensionais algorítmicas complexas, tais como a análise dos principais componentes a serem estudados ao longo do tempo e entre autores. Neste tutorial, continuaremos a nos concentrar no estudo do comprimento das frases. + +O corpus pode ser dividido em frases usando a função `tokenize_sentences`. Neste caso, o resultado é uma lista com 236 objetos, cada um representando um documento específico. + + + +```{r} + +frases <- tokenize_sentences(texto) + +``` + + + +Em seguida, queremos dividir cada frase em palavras. A função `tokenize_words` pode ser utilizada, mas não diretamente sobre a lista de objetos `frases`. Poderíamos fazer isso com um loop `for` de novo, mas há uma forma mais simples de o fazer. A função `sapply` oferece uma aproximação mais direta. Aqui, queremos aplicar a segmentação de palavras individualmente a cada documento e, para isso, esta função é perfeita. + + + +```{r} + +frases_palavras <- sapply(frases, tokenize_words) + +``` + +Agora, temos uma lista (com cada elemento representando um documento) de listas (com cada elemento representando as palavras de uma dada frase). O resultado que precisamos é uma lista de objetos que forneça o comprimento de cada frase num dado documento. Para isto, combinamos o loop `for` com a função `sapply`. + + + +```{r} + +comprimento_frases <- list() + +for (i in 1:nrow(metadados)) { + +comprimento_frases[[i]] <- sapply(frases_palavras[[i]], length) + +} + +``` + + +O resultado de `comprimento_frases` pode ser visualizado numa linha temporal. Primeiro, precisamos de resumir o comprimento de todas as frases de um documento a um único número. A função `median` (mediana), que encontra o 50º percentil dos dados inseridos, é uma boa opção para resumir as frases, porque não será muito afectada por possíveis erros de segmentação que podem ter criado uma frase artificialmente longa [^11]. + + + +```{r} + +mediana_comprimento_frases <- sapply(comprimento_frases, median) + +``` + + +Agora, criamos um diagrama com essa variável junto com os anos dos discursos utilizando, mais uma vez, a função `qplot`. + + + +```{r} + +qplot(metadados$year, mediana_comprimento_frases) + labs(x = "Ano", y = "Mediana do comprimento das frases") + +``` + + ![Median sentence length for each State of the Union Address.](/images/basic-text-processing-in-r/sotu-sentence-length.jpg)Duração mediana das frases por discurso do Estado da União. + +O gráfico mostra-nos uma forte tendência geral de frases mais curtas nos dois séculos do corpus. Lembre-se que alguns discursos no final da segunda metade do século XX eram longos e escritos, muito parecidos com os do século XIX. É particularmente interessante que estes não se destaquem em se tratando de mediana do comprimento das frases. + + +Para tornar esse padrão ainda mais explícito, é possível adicionar uma linha de tendência no gráfico com a função `geom_smooth` (geometrização suave). + + +```{r} + +qplot(metadados$year, mediana_comprimento_frases) + geom_smooth() + labs(x = "Ano", y = "Mediana do comprimento das frases") + +``` + ![Median sentence length for each State of the Union Address, with a smoothing line.](/images/basic-text-processing-in-r/sotu-sentence-length-smooth.jpg)Comprimento mediano de cada discurso do Estado da União com uma linha de tendência. + + +As linhas de tendência são um ótimo complemento aos gráficos. Elas possuem a função dupla de mostrar a tendência geral dos dados no tempo, enquanto destacam pontos atípicos ou periféricos. + + + +## Resumo do documento + + + +Como tarefa final, queremos aplicar a função de resumo simples que utilizamos na seção anterior a cada um dos documentos desse corpus mais amplo. Precisamos utilizar um loop outra vez, mas o código interno permanece quase o mesmo, com a exceção de que precisamos guardar os resultados como um elemento do vetor `description` (descrição). + + + +```{r} + +description <- c() + +``` + +```{r} + +for (i in 1:length(palavras)) { + +tabela <- table(palavras[[i]]) + +tabela <- data_frame(word = names(tabela), count = as.numeric(tabela)) + +tabela <- arrange(tabela, desc(count)) + +tabela <- inner_join(tabela, palavras_frequentes) + +tabela <- filter(tabela, frequency < 0.002) + +resultado <- c(metadados$president[i], metadados$year[i], tabela$word[1:5]) + +description <- c(description, paste(resultado, collapse = "; ")) + +} + +``` + + + +Enquanto se processa cada ficheiro como resultado da função `inner_join`, é possível ver uma linha que diz **Joining, by = “word”**. Como o loop pode demorar um ou mais minutos o processamento da função, esta linha serve para assegurar que o código está processando os ficheiros. Podemos ver o resultado do loop escrevendo `description` no console, mas, com a função `cat`, obtemos uma visão mais nítida dos resultados. + + ```{r} + +cat(description, sep = "\n") + +``` + + +Os resultados oferecem uma linha para cada discurso do Estado da União. Aqui, por exemplo, estão as linhas dos presidentes Bill Clinton, George W. Bush e Barack Obama: + + ``` + +>William J. Clinton; 1993; deficit; propose; incomes; invest; decade + +William J. Clinton; 1994; deficit; renew; ought; brady; cannot + +William J. Clinton; 1995; ought; covenant; deficit; bureaucracy; voted + +William J. Clinton; 1996; bipartisan; gangs; medicare; deficit; harder + +William J. Clinton; 1997; bipartisan; cannot; balanced; nato; immigrants + +William J. Clinton; 1998; bipartisan; deficit; propose; bosnia; millennium + +William J. Clinton; 1999; medicare; propose; surplus; balanced; bipartisan + +William J. Clinton; 2000; propose; laughter; medicare; bipartisan; prosperity + +George W. Bush; 2001; medicare; courage; surplus; josefina; laughter + +George W. Bush; 2002; terrorist; terrorists; allies; camps; homeland + +George W. Bush; 2003; hussein; saddam; inspectors; qaida; terrorists + +George W. Bush; 2004; terrorists; propose; medicare; seniors; killers + +George W. Bush; 2005; terrorists; iraqis; reforms; decades; generations + +George W. Bush; 2006; hopeful; offensive; retreat; terrorists; terrorist + +George W. Bush; 2007; terrorists; qaida; extremists; struggle; baghdad + +George W. Bush; 2008; terrorists; empower; qaida; extremists; deny + +Barack Obama; 2009; deficit; afford; cannot; lending; invest + +Barack Obama; 2010; deficit; laughter; afford; decade; decades + +Barack Obama; 2011; deficit; republicans; democrats; laughter; afghan + +Barack Obama; 2012; afford; deficit; tuition; cannot; doubling + +Barack Obama; 2013; deficit; deserve; stronger; bipartisan; medicare + +Barack Obama; 2014; cory; laughter; decades; diplomacy; invest + +Barack Obama; 2015; laughter; childcare; democrats; rebekah; republicans + +Barack Obama; 2016; laughter; voices; allies; harder; qaida + +``` + +Como já foi referido, estes resumos temáticos não são, de forma alguma, um substituto para uma leitura atenta de cada documento. Eles servem, no entanto, como um resumo geral e de alto nível de cada presidência. Vemos, por exemplo, o foco inicial no déficit durante os primeiros anos da presidência de Bill Clinton, sua mudança em direção ao bipartidarismo enquanto a Câmara e o Senado se inclinavam para os republicanos em meados dos anos 1990, e uma mudança em direção à reforma do Medicare no final de sua presidência. Os discursos de George W. Bush concentraram-se, principalmente, no terrorismo, com exceção do discurso de 2001 proferido antes dos ataques terroristas de 11 de setembro. Barack Obama voltou a preocupar-se com a economia sob a sombra da recessão de 2008. A palavra "riso" aparece frequentemente porque é adicionada às transcrições quando o riso do público faz com que o orador pare. + + + +# Próximos passos + + + +Neste pequeno tutorial exploramos algumas maneiras básicas de analisar dados textuais com a linguagem de programação R. Há várias direções que se pode tomar para se aprofundar nas novas técnicas de análise de texto. Aqui estão três exemplos particularmente interessantes: + + + +* conduzir uma análise completa com base em processamento de linguagem natural (NLP) num texto para extrair características tais como nomes de entidades, categorias gramaticais e relações de dependência. Estes estão disponíveis em vários pacotes R, incluindo o **cleanNLP**[^12], e para vários idiomas. + +* realizar uma modelagem por tópicos (*topic models*) para detectar discursos específicos no corpus usando pacotes como **mallet**[^13] e **topicmodels**[^14]. + +* aplicar técnicas de redução de dimensionalidade para traçar tendências estilísticas ao longo do tempo ou entre diferentes autores. Por exemplo, o pacote **tsne** [^15] realiza uma poderosa forma de redução de dimensionalidade particularmente favorável a gráficos detalhados. + + +Existem muitos tutoriais genéricos para estes três exemplos, assim como uma documentação detalhada dos pacotes[^16]. Esperamos oferecer tutoriais focados em aplicações históricas deles no futuro. + + + +# Notas + +[^1]: O nosso corpus contém 236 discursos sobre o Estado da União. Dependendo do que for contado, este número pode ser ligeiramente superior ou inferior. + +[^2]: Taryn Dewar, “R Basics with Tabular Data,” Programming Historian (05 September 2016), [/lessons/r-basics-with-tabular-data](/en/lessons/r-basics-with-tabular-data). + +[^3]: Hadley Wickham. “tidyverse: Easily Install and Load ‘Tidyverse’ Packages”. R Package, Version 1.1.1. https://cran.r-project.org/web/packages/tidyverse/index.html + +[^4]: Lincoln Mullen and Dmitriy Selivanov. “tokenizers: A Consistent Interface to Tokenize Natural Language Text Convert”. R Package, Version 0.1.4. https://cran.r-project.org/web/packages/tokenizers/index.html + +[^5]: Tenha em mente que os nomes das funções, como `library` e `install.packages`, sempre estarão em inglês. Apesar disso, colocamos uma tradução do significado para facilitar a compreensão e traduzimos os nomes das variáveis [N. de T.]. + +[^6]: Tradução publicada pela Folha em português (13 de janeiro de 2016) [https://www1.folha.uol.com.br/mundo/2016/01/1729011-leia-a-integra-do-ultimo-discurso-do-estado-da-uniao-de-obama.shtml](https://www1.folha.uol.com.br/mundo/2016/01/1729011-leia-a-integra-do-ultimo-discurso-do-estado-da-uniao-de-obama.shtml) [N. de T.] + +[^7]: Foi feito o download de todos os discursos presidenciais do The American Presidency Project da University of California Santa Barbara (acesso em 11 de novembro de 2016) [https://www.presidency.ucsb.edu/sou.php](https://www.presidency.ucsb.edu/sou.php) + +[^8]: Aqui, voltamos para a versão original do discurso, em inglês, para dar prosseguimento à análise e, particularmente, para observarmos a lista de palavras mais utilizadas em inglês. Continuaremos a traduzir os nomes das variáveis e das funções para facilitar a compreensão em português [N. de T.]. + +[^9]: Aqui, optamos por nomear as colunas da tabela em inglês, como *word* (palavra) e *count* (contagem), para facilitar a interação com o conjunto de dados que será introduzido depois com a função `inner_join` [N. de T.]. + +[^10]: Peter Norvig. “Google Web Trillion Word Corpus”. (Accedido el 11 de noviembre de 2016) https://norvig.com/ngrams/. + +[^11]: Isto ocorre em alguns discursos escritos do Estado da União, quando uma lista com numeração é segmentada numa única frase longa. + +[^12]: Taylor Arnold. “cleanNLP: A Tidy Data Model for Natural Language Processing”. R Package, Version 0.24. https://cran.r-project.org/web/packages/cleanNLP/index.html + +[^13]: David Mimno. “mallet: A wrapper around the Java machine learning tool MALLET”. R Package, Version 1.0. https://cran.r-project.org/web/packages/mallet/index.html + +[^14]: Bettina Grün and Kurt Hornik. “https://cran.r-project.org/web/packages/topicmodels/index.html”. R Package, Version 0.2-4. https://cran.r-project.org/web/packages/topicmodels/index.html + +[^15]: Ver o artigo" t-distributed stochastic neighbor embedding" na Wikipedia (em inglês). https://en.wikipedia.org/wiki/T-distributed_stochastic_neighbor_embedding [N. de T.] + +[^16]: Ver, por exemplo, o livro dos autores Taylor Arnold and Lauren Tilton. *Humanities Data in R: Exploring Networks, Geospatial Data, Images, and Text.* Springer, 2015. diff --git a/pt/licoes/qgis-camadas.md b/pt/licoes/qgis-camadas.md index f86990fb8b..8312313357 100644 --- a/pt/licoes/qgis-camadas.md +++ b/pt/licoes/qgis-camadas.md @@ -68,11 +68,11 @@ Estaremos utilizando alguns dados governamentais da província de Prince Edward Entre nos links abaixo em seu navegador. Desenvolvemos os dois últimos *shapefiles*, então eles devem baixar automaticamente: -1. -2. -3. -4. -5. +1. +2. +3. +4. +5. 6. [PEI Highways](/assets/qgis-layers/PEI_highway.zip) 7. [PEI Places](/assets/qgis-layers/PEI_placenames.zip) @@ -103,7 +103,7 @@ Selecione Propriedades do Projeto. - Perceba que a projeção mudou no canto inferior direito da janela do QGIS. Próximo a ela, verá a localização geográfica do seu cursor em metros. - Na janela **Projeto**, selecione _Salvar Projeto_ (é recomendado salvar seu projeto após cada etapa). -Agora está pronto para trabalhar no projeto de tutorial, mas pode ser que tenha algumas perguntas sobre qual SRC utilizar para o seu próprio projeto. O WGS83 pode funcionar a curto prazo, principalmente se estiver trabalhando em uma escala consideravelmente maior, mas apresentará dificuldades em trabalhar com precisão em mapas locais. Uma dica é saber quais SRC ou Projeções são utilizados para os mapas em papel da região. Caso digitalize um mapa físico de alta qualidade para utilizar como camada base, pode ser uma boa ideia utilizar a mesma projeção. Pode-se também tentar buscar na internet quais os SRC mais comuns para determinada região. Para aqueles trabalhando em projetos norte americanos, identificar o NAD83 correto par a sua região vai ser, geralmente, o melhor SRC. Aqui estão alguns links para outros recursos que lhe ajudarão a escolher um SRC para o seu próprio projeto: [Tutorial: Trabalhando com Projeções no QGIS](http://web.archive.org/web/20180715071501/http://www.qgistutorials.com/pt_BR/docs/working_with_projections.html) (em inglês). +Agora está pronto para trabalhar no projeto de tutorial, mas pode ser que tenha algumas perguntas sobre qual SRC utilizar para o seu próprio projeto. O WGS83 pode funcionar a curto prazo, principalmente se estiver trabalhando em uma escala consideravelmente maior, mas apresentará dificuldades em trabalhar com precisão em mapas locais. Uma dica é saber quais SRC ou Projeções são utilizados para os mapas em papel da região. Caso digitalize um mapa físico de alta qualidade para utilizar como camada base, pode ser uma boa ideia utilizar a mesma projeção. Pode-se também tentar buscar na internet quais os SRC mais comuns para determinada região. Para aqueles trabalhando em projetos norte americanos, identificar o NAD83 correto par a sua região vai ser, geralmente, o melhor SRC. Aqui estão alguns links para outros recursos que lhe ajudarão a escolher um SRC para o seu próprio projeto: [Tutorial: Trabalhando com Projeções no QGIS](https://web.archive.org/web/20180715071501/https://www.qgistutorials.com/pt_BR/docs/working_with_projections.html) (em inglês). ### Construindo um mapa base @@ -275,4 +275,4 @@ Após criar um mapa utilizando camadas vetoriais, agora nós iremos adicionar ou Aprendeu a instalar o QGIS e a adicionar camadas. Certifique-se de salvar o seu trabalho! -*Essa lição é parte do [Geospatial Historian](http://geospatialhistorian.wordpress.com/).* +*Essa lição é parte do [Geospatial Historian](https://geospatialhistorian.wordpress.com/).* diff --git a/pt/licoes/som-dados-sonificacao-historiadores.md b/pt/licoes/som-dados-sonificacao-historiadores.md index 6a3a84cb2f..d7cfed0879 100644 --- a/pt/licoes/som-dados-sonificacao-historiadores.md +++ b/pt/licoes/som-dados-sonificacao-historiadores.md @@ -1,487 +1,487 @@ ---- -title: Sonificação de dados (uma introdução à sonificação para historiadores) -layout: lesson -slug: som-dados-sonificacao-historiadores -date: 2016-06-07 -translation_date: 2021-03-26 -authors: -- Shawn Graham -reviewers: -- Jeff Veitch -- Tim Compeau -editors: -- Ian Milligan -translator: -- Gabriela Kucuruza -translation-editor: -- Jimmy Medeiros -translation-reviewer: -- Samuel Van Ransbeeck -- Juliana Marques da Silva -difficulty: 2 -review-ticket: https://github.com/programminghistorian/ph-submissions/issues/429 -activity: transforming -topics: [distant-reading] -abstract: "Existem inúmeras lições que o ajudarão a visualizar o passado, mas esta lição o ajudará a ouvir o passado." -original: sonification -avatar_alt: Um violino -doi: 10.46430/phpt0020 ---- - -{% include toc.html %} - -# Introdução - -ποίησις - fabricação, criação, produção - -Eu estou muito cansado de ver o passado. Existem diversos guias que irão ajudar a _visualizar_ o passado que não podemos ver, mas muitas vezes nós esquecemos que a visualização é um ato de criatividade. Nós talvez estejamos muito ligados às nossas telas, muito focados em "ver". Ao invés disso, deixe-me ouvir algo do passado. - -Enquanto existe uma história e uma literatura profundas sobre arqueoacústica e paisagens sonoras que tentam capturar o som de um lugar _como ele era_ ([veja por exemplo a Virtual St. Paul's](https://www.digitalstudies.org/articles/10.16995/dscn.58) ou o trabalho de [Jeff Veitch em Ostia antiga](https://jeffdveitch.wordpress.com/)), eu tenho interesse em 'sonificar' o que eu tenho _agora_, os dados eles mesmos. Eu quero descobrir uma gramática para representar dados em som que seja apropriada para História. [Drucker](#Drucker) [notoriamente nos lembra](http://www.digitalhumanities.org/dhq/vol/5/1/000091/000091.html) que ‘dados’ não são coisas dadas, mas ao invés disso, coisas capturadas, coisas transformadas. Na sonificação de dados, eu literalmente realizo o passado no presente, e então as suposições e as transformações que faço estão em primeiro plano. A experiência auditiva resultante é uma "deformação" literal que nos faz ouvir as camadas modernas do passado de uma nova maneira. - -Eu quero ouvir os significados do passado, mas eu sei que não posso. No entanto, quando ouço um instrumento, posso imaginar a materialidade do músico tocando; posso discernir o espaço físico em seus ecos e ressonâncias. Eu posso sentir o som, eu posso me mover no ritmo. A música engaja o meu corpo inteiro, minha imaginação inteira. As suas associações com sons, música e tons que eu ouvi antes criam uma experiência temporal profunda, um sistema de relações incorporadas entre eu e o passado. Visual? Nós temos representações visuais do passado há tanto tempo, que nós quase nos esquecemos dos aspectos artístico e performativo dessas gramáticas de expressão. - -Nesse tutorial, você aprenderá a fazer um pouco de barulho a partir dos seus dados sobre o passado. O _significado_ desse barulho, bem... isso depende de você. Parte do objetivo desse tutorial é te fazer estranhar os seus dados. Traduzindo-o, transcodificando-o, [remediando-o](http://blog.taracopplestone.co.uk/making-things-photobashing-as-archaeological-remediation/) (em inglês), nós começaremos a ver elementos dos dados que a nossa familiaridade com modelos visuais nos impediu de enxergar. Essa deformação está de acordo com os argumentos apresentados por, por exemplo, Mark Sample sobre [quebrar coisas](http://www.samplereality.com/2012/05/02/notes-towards-a-deformed-humanities/) (em inglês), ou Bethany Nowviskie sobre a '[resistência nos materiais](http://nowviskie.org/2013/resistance-in-the-materials/)' (em inglês). Sonificação nos move através do continuum de dados para captação, ciências sociais para arte, [falha para estética](http://nooart.org/post/73353953758/temkin-glitchhumancomputerinteraction) (em inglês). Então vamos ver como isso tudo soa. - -## Objetivos - -Nesse tutorial, apresentarei três maneiras diferentes de gerar som ou música a partir de seus dados. - -Na primeira, usaremos um sistema desenvolvido por Jonathan Middleton, disponível gratuitamente para uso, chamado _Musicalgorithms_ (Algorítmos Musicais) a fim de introduzir algumas das questões e termos-chaves envolvidos. Na segunda, usaremos uma pequena biblioteca do Python para 'mapear por parâmetro' os nossos dados contra o teclado de 88 teclas e introduzir um pouco de arte em nosso trabalho. Finalmente, aprenderemos como carregar nossos dados no ambiente de codificação ao vivo de código aberto para som e música, _Sonic Pi_, momento em que te deixarei para que explore os abundantes tutoriais e recursos desse projeto. - -Você verá que "sonificação" nos movimenta através do espectro partindo de simples 'visualização/auralização' para performance real. - -### Ferramentas -+ Musicalgorithms [http://musicalgorithms.org/](http://musicalgorithms.org/) -+ MIDITime [https://github.com/cirlabs/miditime](https://github.com/cirlabs/miditime) (Eu bifurquei uma cópia no GitHub [aqui](https://github.com/shawngraham/miditime)) -+ Sonic Pi [http://sonic-pi.net/](http://sonic-pi.net/) - -### Dados de Exemplo - -+ [Dados sobre artefatos romanos](/assets/sonification/sonification-roman-data.csv) -+ [Excerto do modelo de tópicos do diário de John Adams](/assets/sonification/sonification-diary.csv) -+ [Excerto do modelo de tópicos das relações jesuíticas](/assets/sonification/sonification-jesuittopics.csv) - -# Um pouco de contexto sobre sonificação - -Sonificação é a prática de mapear aspectos dos dados para produzir sinais sonoros. Em geral, uma técnica pode ser chamada de "sonificação" se cumprir certas condições. Elas incluem reprodutibilidade (os mesmos dados podem ser transformados da mesma maneira por outros pesquisadores de forma que produzam os mesmos resultados) e o que pode ser chamado de inteligibilidade - que os elementos "objetivos" dos dados originais sejam sistematicamente refletidos no som resultante (veja [Hermann (2008)](http://www.icad.org/Proceedings/2008/Hermann2008.pdf) (em inglês) para uma taxonomia da sonificação). [Last e Usyskin (2015)](https://www.researchgate.net/publication/282504359_Listen_to_the_Sound_of_Data) (em inglês) realizaram uma série de experimentos para determinar quais tarefas de análise de dados poderiam ser performadas quando os dados eram sonificados. Os seus resultados experimentais mostraram que mesmo um grupo de ouvintes não-treinados (sem treinamento formal em música) podem fazer distinções úteis nos dados. Eles encontraram ouvintes que conseguiam distinguir tarefas comuns de exploração de dados nos dados sonificados, como classificação e agrupamento. Os seus resultados sonificados mapearam os dados fundamentais da escala musical ocidental. - -Last e Usyskin focaram em dados de séries temporais. Eles argumentam que dados de séries temporais são particularmente bons para sonificação, pois há paralelos naturais com sons musicais. Música é sequencial, ela tem duração e ela se desenvolve ao longo do tempo, assim como dados de séries temporais. [(Last e Usyskin 2015, p. 424)](https://www.researchgate.net/publication/282504359_Listen_to_the_Sound_of_Data). Torna-se um problema combinar os dados com as saídas sônicas apropriadas. Em muitas aplicações de sonificação, uma técnica chamada "mapeamento de parâmetros" é usada para combinar aspectos dos dados ao longo de várias dimensões da audição, como [tom](#tom), variação, brilho e início. O problema com esta abordagem é que onde não há relação temporal (ou melhor, nenhuma relação não linear) entre os pontos de dados originais, o som resultante pode ser "confuso" (2015, p. 422). - -## Escutando as lacunas -Há também o modo que preenchemos as lacunas do som com as nossas expectativas. Considere esse vídeo em que [mp3](#mp3) foi convertido para [MIDI](#midi) e de volta para mp3; a música foi 'achatada' para que todas as informações sonoras sejam tocadas por apenas um instrumento. (Gerar esse efeito é como salvar uma página da web como .txt, abri-la no Word e, então, salvá-la novamente como .html). Todos os sons (inclusive vocais) foram traduzidos para os seus valores de nota correspondentes e, em seguida, transformados de volta em mp3. - -É barulhento, entretanto percebemos o significado. Considere o vídeo abaixo: - - - -O que está acontecendo aqui? Se já conhecia essa música, provavelmente ouviu as 'palavras'. No entanto, nenhuma palavra está presente na música! Se você não conhecia esse música, deve ter soado como um absurdo inaudível (veja mais exemplos no website de [Andy Baio](http://waxy.org/2015/12/if_drake_was_born_a_piano/)). Esse efeito é, às vezes, chamado de 'alucinação auditiva' (cf. [Koebler, 2015](#Koebler)). Esses exemplos mostram como qualquer representação de dados que podemos ouvir/ver não está lá, estritamente falando. Nós preenchemos as lacunas com as nossas próprias expectativas. - -Considere as implicações para a História. Se sonificarmos nossos dados e começarmos a ouvir padrões no som, ou pontos fora da curva, nossas expectativas culturais sobre como a música funciona (nossas memórias de fragmentos musicais semelhantes, ouvidos em contextos específicos) irão colorir nossa interpretação. Isso, eu argumentaria, é verdadeiro para todas as representações do passado, mas sonificar é apenas estranho o suficiente em relação aos nossos métodos regulares, de forma que essa autoconsciência nos ajudará a identificar ou comunicar os padrões críticos nos dados do passado. - -Iremos progredir por meio de três ferramentas diferentes para sonificação de dados, observando como as escolhas em uma ferramenta afetam o resultado e podem ser atenuadas imaginando novamente os dados por meio de outra ferramenta. No fim das contas, não há nada mais objetivo em 'sonificação' do que há em 'visualização', então quem pesquisa deve estar preparado para justificar as suas escolhas, e fazer escolhas transparentes e reprodutíveis para outros. E para que não pensemos que a sonificação e a música gerada por algoritmos são de alguma forma algo "novo", indico ao leitor interessado [Hedges, (1978)](http://www.icad.org/Proceedings/2008/Hermann2008.pdf). - -Em cada seção, irei dar uma introdução conceitual, seguida por um passo a passo usando dados arqueológicos ou históricos de amostra. - -# Musicalgorithms - -Há uma grande variedade de ferramentas para sonificar dados. Algumas, por exemplo, são pacotes amplamente usadas do [ambiente de estatística R](https://cran.r-project.org/), como ‘[playitbyR](https://cran.r-project.org/web/packages/playitbyr/index.html)’ e ‘[AudiolyzR](https://cran.r-project.org/web/packages/audiolyzR/index.html)’. O primeiro desses pacotes, entretanto, não tem sido mantido ou atualizado para as versões atuais do R (sua última atualização foi muitos anos atrás) e o segundo precisa de um número considerável de configurações adicionais de software para que funcione adequadamente. - -Por outro lado, o site [Musicalgorithms](http://musicalgorithms.org/) é bem fácil de usar. O site Musicalgorithms está online há mais de uma década. Embora não seja código aberto, ele é um projeto de pesquisa de longa-duração em música computacional do seu criador, Jonathan Middleton. Ele está atualmente em sua terceira maior iteração (interações anteriores permanecem disponíveis para uso online). Começaremos com o Musicalalgorithms porque ele nos permite entrar e ajustar os nossos dados para produzir um ficheiro de representação MIDI. Tenha atenção e seleccione a '[Versão 3](http://musicalgorithms.org/3.0/index.html)'. - -{% include figure.html filename="sonification-musicalgorithms-main-site-1.png" caption="O site Musicalgorithms como aparecia em 2 de agosto de 2016" %} - -> Nota da tradução: há novas versões disponíveis para uso, mas de forma a seguir o tutorial, seguimos a versão 3 do Musicallgorithms, usada em 2016, e ainda disponível no site para uso. - -O Musicalgorithms efetua uma série de transformações nos dados. Nos dados de amostra abaixo (o padrão do próprio site), há apenas uma linha de dados, mesmo que pareça várias linhas. Os dados de amostra são compostos de campos separados por vírgula que são delimitados por espaço. - -``` -# Of Voices, Text Area Name, Text Area Data -1,morphBox, -,areaPitch1,2 7 1 8 2 8 1 8 2 8 4 5 9 0 4 5 2 3 5 3 6 0 2 8 -,dAreaMap1,2 7 1 8 2 8 1 8 2 8 4 5 9 0 4 5 2 3 5 3 6 0 2 8 -,mapArea1,20 69 11 78 20 78 11 78 20 78 40 49 88 1 40 49 20 30 49 30 59 1 20 78 -,dMapArea1,1 5 1 5 1 5 1 5 1 5 3 3 6 0 3 3 1 2 3 2 4 0 1 5 -,so_text_area1,20 69 11 78 20 78 11 78 20 78 40 49 88 1 40 49 20 30 49 30 59 1 20 78 -``` - -Esses dados representam os dados de origem e as suas transformações; compartilhar esses dados permitiria a outro pesquisador replicar ou estender a sonificação usando outras ferramentas. No entanto, quando se começa, apenas os dados básicos abaixo são necessários (uma lista de pontos de dados): - -``` -# Of Voices, Text Area Name, Text Area Data -1,morphBox, -,areaPitch1,24 72 12 84 21 81 14 81 24 81 44 51 94 01 44 51 24 31 5 43 61 04 21 81 -``` - -O campo-chave para nós é ‘areaPitch1’, que contém os dados de entrada delimitados por espaço. Os outros campos serão preenchidos à medida que avançamos pelas várias configurações de Musicalgorithms. Nos dados acima (por exemplo, 24 72 12 84 etc.), os valores são contagens brutas de inscrições de uma série de locais ao longo de uma estrada romana na Grã-Bretanha. (Vamos praticar com outros dados em breve, abaixo). - -{% include figure.html filename="sonification-musicalgorithms-pitch-mapping-2.png" caption="Depois de carregar seus dados, é possível selecionar as diferentes operações na barra de menu superior do site. Na captura de tela, o mouseover de informações está explicando o que acontece com o dimensionamento de seus dados se você selecionar a operação de divisão para dimensionar os seus dados para o intervalo de notas selecionado." %} - -Agora, conforme se percorre as várias guias da interface ‘duration input’ (entrada de duração) , ‘pitch mapping' (mapeamento de tom), ‘duration mapping’ (mapeamento de duração), ‘scale options’ (opções de escala musical) é possível realizar várias transformações. Em ‘pitch mapping’ (mapeamento de tom), há uma série de opções matemáticas para mapear os dados contra as 88 teclas/tons completos de um teclado de piano (em um mapeamento linear, a _média_ dos dados de alguém seria mapeado para dó médio, ou 40). Também é possível escolher o tipo de escala, se é um tom maior ou menor. Nesse ponto, uma vez que se tenha selecionado várias transformações, salve o ficheiro de texto. No menu 'play' é possível realizar o download de um ficheiro MIDI. O seu programa de áudio padrão pode tocar ficheiros MIDI (geralmente padronizando para um tom de piano). Uma instrumentação mais complicada pode ser atribuída abrindo o ficheiro MIDI em programas de mixagem de música, como GarageBand (Mac) ou [LMMS](https://lmms.io/) (Windows, Mac, Linux). (O uso do Garageband ou LMMS está fora do escopo desse tutorial. Um tutorial em vídeo sobre LMMS está disponível [aqui](https://youtu.be/4dYxV3tqTUc), enquanto há muitos tutoriais do Garageband online. Lynda.com tem [um tutorial excelente](http://www.lynda.com/GarageBand-tutorials/Importing-audio-tracks/156620/164050-4.html)). - -Se tivesse várias colunas de dados para os mesmos pontos - digamos, em nosso exemplo da Grã-Bretanha romana, também queríamos sonificar contagens de um tipo de cerâmica para essas mesmas cidades - é possível recarregar sua próxima série de dados, efetuar as transformações e mapeamentos, e gerar outro ficheiro MIDI. Como o Garageband e o LMMS permitem a sobreposição de vozes, você pode começar a criar sequências musicais complicadas. - -{% include figure.html filename="sonification-garageband-john-adams-3.png" caption="Captura de tela do Garageband, onde os ficheiros MIDI são tópicos sonorizados do Diário de John Adams. Na interface do Garageband (o LMMS é semelhante), cada ficheiro MIDI é arrastado e solto no lugar. A instrumentação para cada ficheiro MIDI (ou seja, trilha) pode ser selecionada nos menus do Garageband. Os rótulos de cada faixa foram alterados aqui para refletir as palavras-chave em cada tópico. A área verde à direita representa uma visualização das notas em cada faixa. Você pode ver esta interface em ação e ouvir a música [aqui](https://youtu.be/ikqRXtI3JeA) (em inglês)" %} - -Quais transformações devem ser usadas? Se tiver duas colunas de dados, terá duas vozes. Pode fazer sentido, em nossos dados hipotéticos, tocar a primeira voz bem alto, em uma tonalidade maior: as inscrições 'falam' conosco, afinal de contas. (As inscrições romanas de fato se dirigem ao leitor, o transeunte, literalmente: 'Ó tu que passas ...'). Então, se acaso as cerâmicas de interesse forem mercadorias mais despretensiosas, talvez elas possam ser mapeadas em relação à extremidade inferior da escala ou receberem notas de duração mais longas para representar sua onipresença nas classes nessa região. - -_Não há forma 'certa' de representar os seus dados como som, ao menos não por enquanto_, mas mesmo com essa amostra de exemplo, começamos a ver como sombras de significado e interpretação podem ser atribuídas aos nossos dados e à nossa experiência dos dados. - -Mas e o tempo? Dados históricos usualmente têm um ponto de inflexão, um distinto "tempo quando" algo aconteceu. Então, a quantidade de tempo entre dois pontos de dados precisa ser considerada. É nesse ponto que a nossa próxima ferramenta se torna bem útil, para quando nossos pontos de dados tiverem uma relação com outro espaço temporal. Começamos a nos mover de sonificação (pontos de dados) para música (relações entre pontos). - -### Prática -O [conjunto de dados de amostra](/assets/sonification/sonification-roman-data.csv) apresentado contém a contagem de moedas romanas na sua primeira coluna e a contagem de materiais romanos dos mesmos locais, conforme contido no banco de dados do Portable Antiquities Scheme (Esquema de Antiguidades Portáveis) do British Museum. A sonificação desses dados pode revelar ou acentuar aspectos da situação econômica ao longo da rua Watling, uma grande rota através da Britânia Romana. Esses pontos de dados estão organizados geograficamente do Noroeste ao Sudeste; então, na medida em que o som toca, nós estamos escutando movimento através do espaço. Cada nota representa outro passo no caminho. - -1. Abra o [dados-sonificação-romana.csv](/assets/sonification/sonification-roman-data.csv) em uma tabela. Copie a primeira coluna em um editor de texto. Delete os finais das linhas de forma que os dados fiquem todos em uma linha única. -2. Adicione a seguinte informação de coluna assim: -``` -# Of Voices, Text Area Name, Text Area Data -1,morphBox, -,areaPitch1, -``` -...para que os seus dados sigam imediatamente depois da última vírgula (como [esse exemplo](/assets/sonification/sonification-romancoin-data-music.csv)). Salve o ficheiro com um nome útil como `sonsdasmoedas1.csv`. - -3. Acesse o site do [Musicalgorithms](http://musicalgorithms.org/3.0/index.html) (versão 3) e clique no botão "load" (carregar). No pop-up, clique no botão azul "load" (carregar) e selecione o ficheiro salvo no passo 2. O site carregará os seus materiais e exibirá uma marca de seleção verde se tiver sido carregado com êxito. Caso contrário, certifique-se de que os seus valores estejam separados por espaços e que sigam imediatamente a última vírgula no bloco de código na etapa 2. Também é possível tentar carregar o [ficheiro de demonstração desse tutorial](/assets/sonification/sonification-romancoin-data-music.csv) ao invés. - -{% include figure.html filename="sonification-musicalgorithms-upload-4.png" caption="Clique em 'load' na tela principal para acessar essa caixa de diálogo. Então 'load csv'. (carregue o csv) Selecione o ficheiro; ele aparecerá na caixa. Então clique no botão 'load' (carregar)." %} - -4. Clique em 'Pitch Input'. Os valores dos seus dados serão exibidos. Por enquanto, **não selecione** nenhuma outra opção nesse página (consequentemente, usaremos os valores padrão do site). - -5. Clique em 'Duration Input'. **Não selecione nenhuma opção aqui por enquanto**. As opções aqui irão mapear várias transformações em relação aos dados que alterarão a duração para cada nota. Não se preocupe com as opções por enquanto: siga adiante. -6. Clique em 'Pitch Mapping'. Essa é a escolha mais crucial, pois irá transformar (isso é, escalar) os seus dados brutos para um mapeamento em relação às teclas do teclado. Deixe a configuração de `mapping` em 'division'. (As outras opções são módulo e logarítmico). A opção `Range` 1 a 88 usa todas as 88 teclas do teclado, assim, seu valor mais baixo estaria de acordo com a nota mais profunda do piano e seu valor mais alto com a nota mais alta. Em vez disso, você pode restringir sua música em torno de dó médio, então insira 25 a 60 como seu intervalo. O resultado deveria mudar para: `31,34,34,34,25,28,30,60,28,25,26,26,25,25,60,25,25,38,33,26,25,25,25` Essas não são mais suas contagens; são as notas do teclado. - -{% include figure.html filename="sonification-musicalgorithms-settings-for-pitch-mapping-5.png" caption="Clique na caixa 'range' e defina-o para 25. Os valores abaixo serão alterados automaticamente. Clique na caixa 'to' e defina-o para 60. Clique novamente na outra caixa; os valores serão atualizados." %} - -8. Clique em 'Duration Mapping'. Como Pitch Mapping, isso pega o intervalo de tempo especificado e usa várias opções matemáticas para mapear o intervalo de possibilidade contra as suas notas. Se passar o seu cursor por cima de `i` verá como os números correspondem com notas inteiras, semínimas, colcheias e assim por diante. Deixe os valores padrão por enquanto. -9. Clique em 'Scale Options'. Aqui nós podemos começar a selecionar o que pode ser chamado de aspecto 'emocional' do som. Nós geralmente pensamos que escalas maiores são 'alegres' enquanto escalas menores são 'tristes'; para uma discussão acessível acesse esse [post de blog](http://www.ethanhein.com/wp/2010/scales-and-emotions/) (em inglês). Por enquanto, escolha 'scale by: major' (escala maior). Deixe a 'scale' (escala) como `C`. - -Agora sonificamos uma coluna de dados! Clique no botão 'save' (salvar), então 'save csv' (salvar csv). - -{% include figure.html filename="sonification-musicalgorithms-save-6.png" caption="A caixa de diálogo salvar dados." %} -Haverá um ficheiro que se parecerá com isso: - -``` -# Of Voices, Text Area Name, Text Area Data -1,morphBox, -,areaPitch1,80 128 128 128 1 40 77 495 48 2 21 19 1 1 500 1 3 190 115 13 5 1 3 -,dAreaMap1,2 7 1 8 2 8 1 8 2 8 4 5 9 0 4 5 2 3 5 3 6 0 2 -,mapArea1,31 34 34 34 25 28 30 60 28 25 26 26 25 25 60 25 25 38 33 26 25 25 25 -,dMapArea1,1 5 1 5 1 5 1 5 1 5 3 3 6 0 3 3 1 2 3 2 4 0 1 -,so_text_area1,32 35 35 35 25 28 30 59 28 25 27 27 25 25 59 25 25 39 33 27 25 25 25 -``` - -É possível ver os dados originais no campo 'areaPitch1' e os subsequentes mapeamentos. O site permite que sejam geradas até quatro vozes por vez em um ficheiro MIDI; dependendo de como se quer adicionar instrumentação depois, pode-se querer gerar um ficheiro MIDI por vez. Vamos tocar a música - clique em 'Play'. É possível selecionar o tempo aqui, e um instrumento. É possível ouvir os seus dados no navegador, ou salvá-los como um ficheiro MIDI clicando no botão azul 'Save MIDI file'. - -Retorne ao começo e carregue as duas colunas de dados nesse modelo: -``` -# Of Voices, Text Area Name, Text Area Data -2,morphBox, -,areaPitch1, -,areaPitch2, -``` - -{% include figure.html filename="sonification-2voices-7.png" caption="Coloque 2 na caixa de vozes no topo da interface. Quando você for para qualquer uma das páginas de opção - aqui, nós estamos em 'pitch input' - dois monitores abrem para mostrar os dados das duas vozes. Carregue os seus dados do csv como antes, mas formate o seu csv para ter o 'areaPitch1' e o 'areaPitch2' como descrito no texto principal. Os dados para a primeira voz irão aparecer na esquerda, e a segunda voz na direita." %} - -Quando se tem dados com várias vozes, o que se destaca? Observe que, nessa abordagem, a distância entre os pontos no mundo real não é considerada em nossa sonificação. Essa distância, se fosse considerada, poderia ser crucial. A distância, é claro, não precisa ser geográfica - pode ser temporal. A próxima ferramenta que exploraremos nos permite abordar isso em nossa sonificação explicitamente. - -# Algumas palavras sobre configurar o Python - -A próxima seção desse tutorial precisa de Python. Se não usou Python ainda, será preciso passar algum tempo [se familiarizando com a linha de comando (PC) ou terminal (OS)](/en/lessons/intro-to-bash) (em inglês). Você pode achar esse rápido [guia de instalação dos módulos do python](/pt/licoes/instalacao-modulos-python-pip) útil (mas retorne para ele depois de ler o resto da seção). - -Usuários do Mac já possuirão o Python instalado na máquina deles. É possível testar isso apertando o botão COMMAND e a barra de espaço; na janela de pesquisa, digite `terminal` e clique na aplicação do terminal. No prompt de comando, por exemplo, no cursor piscando em `$` digite `python --version` e o computador responderá com a versão do python existente no seu computador. _A próxima seção desse tutorial usa a versão Python 2.7; ela não foi testada em Python 3_. - -Para usuários do Windows, Python não é instalado por padrão na sua máquina então [essa página](http://docs.python-guide.org/en/latest/starting/install/win/) te ajudará a iniciar, apesar das coisas serem um pouco mais complicadas do que parece de acordo com a página (nota de tradução: pode usar também a [lição de instalação do Python](/pt/licoes/introducao-instalacao-python) do _Programming Historian em português_, mas tenha em atenção que nessa lição é instalada a versão 3 do Python). Primeiro, realize o download do ficheiro `.msi` que a página recomenda (Python 2.7). Clique duas vezes no ficheiro e ele deve se instalar em um novo diretório, por exemplo, `C:\Python27\`. Então, nós temos de dizer para o Windows a localização para onde buscar pelo Python sempre que um programa em python for executado; ou seja, colocaremos a localização do diretório no seu 'path', ou a variável do ambiente que o Windows sempre apresenta quando confrontado com um novo comando. Existem algumas formas de fazer isso, mas talvez a mais fácil seja buscar no seu computador pelo programa `Powershell` (digite 'powershell' na janela de pesquisa do seu computador). Abra o Powershell e, no `>` prompt, copie essa linha inteira: - -`[Environment]::SetEnvironmentVariable("Path", "$env:Path;C:\Python27\;C:\Python27\Scripts\", "User")` - -Feche o powershell quando terminar. Você saberá que funcionou se nada acontecer quando clicar em 'enter'. Para testar se tudo está funcionando, abra o prompt de comando (aqui há [10 forma de fazer isso](http://www.howtogeek.com/235101/10-ways-to-open-the-command-prompt-in-windows-10/)) (em inglês) e digite no prompt `>`, `python --version`. Ele deve retornar `Python 2.7.10` ou algo similar. - -A última peça do quebra-cabeça que todos os usuários precisarão é um programa chamado `Pip`. Os usuários de Mac podem instalá-lo digitando no terminal: :`sudo easy_install pip`. Usuários do Windows terão um pouco mais de dificuldade (nota de tradução: pode usar também a [lição de instalação de módulos Python com pip](/pt/licoes/instalacao-modulos-python-pip) do _Programming Historian em português_, mas tenha em atenção que nessa lição é usada a versão 3 do Python). Primeiro, clique no botão direito do seu cursor e salve esse link: [https://bootstrap.pypa.io/get-pip.py](https://bootstrap.pypa.io/get-pip.py) (Se apenas clicar no link, ele irá te mostrar o código no seu navegador). Salve em algum lugar útil. Abra o prompt de comando no diretório em que salvou `get-pip.py`. Então, digite no prompt de comando, `python get-pip.py`. Convencionalmente, nos tutoriais, verá `>` ou `$` em lugares em que é preciso digitar algo no prompt de comando ou no terminal. Nunca é necessário digitar esses dois caracteres. - -Finalmente, quando você tem um código python que deseja executar, pode inseri-lo em seu editor de texto e salvá-lo com a extensão `.py` (nota de tradução: pode também seguir as indicações das lições “Configurar um ambiente de desenvolvimento integrado para Python”, do _Programming Historian em português_, nas suas versões [Windows](/pt/licoes/instalacao-windows) ou [Mac](/pt/licoes/instalacao-mac), mas tenha em atenção que nessas lições é usada a versão 3 do Python). O seu ficheiro é um ficheiro de texto, mas a **extensão** do ficheiro diz para o seu computador para usar o Python para interpretá-lo; mas lembre, digite `python` no prompt primeiro, por exemplo: `$ python meu-script-legal.py`. - -# MIDITime - -MIDITime é um pacote do python desenvolvido por [Reveal News (antes, Centro de Reportagens Investigativas)](https://www.revealnews.org/). O seu [repositório no Github está aqui](https://github.com/cirlabs/miditime). Miditime foi construído explicitamente para dados de séries temporais (ou seja, uma sequencia de observações coletadas ao longo do tempo). - -Enquanto a ferramenta Musicalgorithms tem uma interface mais ou menos intuitiva, quem pesquisa sacrifica a possibilidade de saber o que, exatamente, está acontecendo internamente. -Em princípio, alguém poderia examinar o código subjacente para o pacote MIDITime para saber o que está acontecendo. Mais importante ainda, na ferramenta anterior não há nenhuma habilidade de contabilizar os dados em que os pontos estão distantes uns dos outros no tempo do relógio. MIDITime nos permite considerar que os nossos dados podem ser agrupados pelo tempo. - -Vamos supor que você tenha um diário histórico no qual você encaixou um [modelo de tópicos](/en/lessons/topic-modeling-and-mallet). A saída resultante pode ter entradas de diário como linhas, e a composição percentual de cada tópico contribui para essa entrada como colunas. Nesse caso, _ouvir_ esses valores pode te ajudar a entender os padrões de pensamento no diário de uma forma que a visualização como um gráfico pode não permitir. Outliers ou padrões musicais recorrentes poderiam se destacar ao serem ouvidos de um modo que a gramática dos gráficos obscurece. - -### Instalando o MIDITime -Instalar MIDItime é simples com o [pip](/pt/licoes/instalacao-modulos-python-pip): - -`$ pip install miditime` ou `$ sudo pip install miditime` para uma máquina Mac ou Linux ; -`> pip install miditime` em uma máquina Windows. (Usuários Windows, se as instruções acima não funcionaram muito bem, talvez queira tentar [esse programa de ajuda](https://sites.google.com/site/pydatalog/python/pip-for-windows) para fazer o Pip funcionar adequadamente na sua máquina ou então seguir as instruções da [lição sobre pip](/pt/licoes/instalacao-modulos-python-pip) do _Programming Historian em português_). - -### Prática -Vamos olhar para o exemplo de script providenciado. Abra o seu editor de texto, e copie e cole o script de exemplo em: - -```python -#!/usr/bin/python - -from miditime.miditime import MIDITime - -# Instancie a classe com uma frequência (120bpm é o padrão) e o destino do ficheiro resultante. -mymidi = MIDITime(120, 'meuficheiro.mid') - -# Crie uma lista de notas. Cada nota é uma lista: [tempo, tom, ataque, duração] -midinotes = [ - [0, 60, 200, 3], #Na batida 0 (o começo), C Médio com ataque 200, para 3 batidas - [10, 61, 200, 4] #Em 10 batidas (12 segndos a partir do começo), C#5 com ataque 200, para quatro batidas 4 -] - -# Adicione uma faixa com essas notas -mymidi.add_track(midinotes) - -# Resultado do ficheiro .mid -mymidi.save_midi() -``` - -Salve o script como `musica1.py`. No seu terminal ou prompt de comando, execute o script: - -`$ python musica1.py` - -O novo ficheiro, `meuficheiro.mid` será registrado no seu diretório. Para ouvir esse ficheiro, é possível abri-lo com Quicktime ou Windows Media Player. (É possível adicionar instrumentação abrindo o ficheiro no Garageband ou [LMMS](https://lmms.io/)). - -`Musica1.py` importa miditime (lembre, é preciso realizar o `pip install miditime` antes de executar o script). Então, ele cria um ficheiro resultante de destinação e configura o tempo. Todas as notas são listadas individualmente, onde o primeiro número é o tempo em que a nota deve ser tocada, o tom da nota (ou seja, a nota de fato!), o quão forte ou ritmicamente a nota é atingida (o ataque), e a duração da nota. As notas musicais são então registradas na faixa e a faixa é registrada no `myfile.mid`. - -Agora, execute o script e adicione mais notas. As notas para a 'A barata diz que tem' são: - -``` -C7, F, Gm, Am, Bb, C, F, Dm, Gm, C, F -A ... Barata diz que tem sete saias de filó -``` - -Você consegue fazer o seu computador tocar essa música? (Esse [material](http://www.electronics.dit.ie/staff/tscarff/Music_technology/midi/midi_note_numbers_for_octaves.html) (em inglês) irá ajudar). - -**A propósito**, há uma especificação de ficheiro de texto para descrever música chamado [Notação ABC](https://pt.wikipedia.org/wiki/ABC_(nota%C3%A7%C3%A3o_musical)). Por enquanto, está além de nossa compreensão, mas alguém poderia escrever um script de sonificação em, por exemplo, uma planilha, mapeando valores para nomes de notas na especificação ABC (se você já usou um IF - THEN no Excel para converter notas percentuais em notas alfabéticas, terá uma noção de como isso pode ser feito) e então usando um site como [esse](http://trillian.mit.edu/~jc/music/abc/ABCcontrib.html) (em inglês) para converter a notação ABC em um ficheiro .mid. - -### Inserindo os seus próprios dados -[Esse ficheiro](/assets/sonification/sonification-diary.csv) é uma seleção do modelo de tópicos dos Diários de John Adams do [The Macroscope](http://themacroscope.org) (Explorando Grandes Dados Históricos: O Macroscópico do Historiador). Apenas os sinais mais fortes foram preservados através do arredondamento dos valores nas colunas para duas casas decimais (lembrando que 0.25, por exemplo, indica que aquele tópico está contribuindo para um quarto da composição daquela entrada do diário). Para obter esses dados em seu script de Python, eles devem ser formatados de uma maneira específica. A parte complicada é acertar o campo de data. - -_Para os propósitos desse tutorial, nós iremos deixar os nomes das variáveis sem alterações em relação ao script de amostra. O script de amostra foi desenvolvido com dados de um terremoto em mente; então onde diz 'magnitude' podemos pensar como '% composição do tópico.'_ - -``` -meus_dados = [ - {'data_evento': , 'magnitude': 3.4}, - {'data_evento': , 'magnitude': 3.2}, - {'data_evento': , 'magnitude': 3.6}, - {'data_evento': , 'magnitude': 3.0}, - {'data_evento': , 'magnitude': 5.6}, - {'data_evento': , 'magnitude': 4.0} -] -``` - -Alguém poderia abordar o problema de obter os nossos dados no formato usando expressões regulares; pode ser mais fácil abrir o modelo de tópicos em uma tabela. Copie os tópicos de dados em uma nova planilha, e deixe as colunas na esquerda e na direita dos dados. No exemplo abaixo, eu coloquei na coluna D e, então, preenchi o resto dos dados ao redor dela, assim: - -| | A | B | C | D | E | -|---|---|---|---|---|---| -|1 | {'data_evento': datetime |(1753,6,8) |, 'magnitude': |0.0024499630 |}, | -|2 | | | | | | -|3 | | | | | | - -Então copie e cole os elementos que não mudaram para preencher a coluna inteira. O elemento de data tem de ser (ano, mês, dia). Uma vez que preencheu a tabela, copie e cole no seu editor de texto de forma que se torne parte do arranjo `meus_dados`, como: - -Nota da tradução: note que a ordem do _datetime_ segue o padrão em inglês estadunidense. -``` -meus_dados = [ -{'data_evento': datetime(1753,6,8), 'magnitude':0.0024499630}, -{'data_evento': datetime(1753,6,9), 'magnitude':0.0035766320}, -{'data_evento': datetime(1753,6,10), 'magnitude':0.0022171550}, -{'data_evento': datetime(1753,6,11), 'magnitude':0.0033220150}, -{'data_evento': datetime(1753,6,12), 'magnitude':0.0046445900}, -{'data_evento': datetime(1753,6,13), 'magnitude':0.0035766320}, -{'data_evento': datetime(1753,6,14), 'magnitude':0.0042241550} -] -``` - -Note que a última linha não tem uma vírgula no seu fim. - -O seu script final será similar a essa, usando o exemplo da página do Miditime (as seções de código abaixo foram interrompidas pelos comentários, mas devem ser coladas no seu editor de texto como um ficheiro único): - -```python -from miditime.miditime import MIDITime -from datetime import datetime -import random - -meumidi = MIDITime(108, 'johnadams1.mid', 3, 4, 1) -``` - -Os valores após MIDITime, `MIDITime(108, 'johnadams1.mid', 3, 4, 1)` configuram -+ as batidas por minuto (108), -+ o ficheiro resultante ('johnadams1.mid'), -+ o número de segundos para representar o ano na música (3 segundos no calendário anual, então todas as notas para as entradas desse diário de 1753 serão escaladas contra 3 segundos; há 50 anos nos dados, então a música final terá duração de 50 x 3, ou um pouco mais de dois minutos), -+ a oitava base para a música (C médio é convencionalmente representado como C5, então aqui 4 representa uma oitava abaixo do C médio), -+ o nº de oitavas em que os tons são mapeados. - -Agora passamos os seus dados para o script inserindo-o no arranjo `meus_dados` (isso será colado em seguida): - -```python -meus_dados = [ -{'data_evento': datetime(1753,6,8), 'magnitude':0.0024499630}, -{'data_evento': datetime(1753,6,9), 'magnitude':0.0035766320}, -``` - -...tenha os seus dados aqui, lembrando-se de terminar a linha final data_evento **sem** uma vírgula, e finalizando os dados com um `]` na sua própria linha, por exemplo - -```python -{'data_evento': datetime(1753,6,14), 'magnitude':0.0042241550} -] -``` - -e então copie: - -```python -meus_dados_epoca = [{'dias_desde_epoca': meumidi.days_since_epoch(d['data_evento']), 'magnitude': d['magnitude']} for d in meus_dados] - -meus_dados_tempo = [{'beat': meumidi.beat(d['dias_desde_epoca']), 'magnitude': d['magnitude']} for d in meus_dados_epoca] - -tempo_inicio = meus_dados_tempo[0]['beat'] -``` - -Esta parte calcula o tempo entre as diferentes entradas do diário; diários que estão próximos no tempo terão, portanto, suas notas soando mais próximas. Finalmente, nós definimos como os dados serão mapeados em relação ao tom. Lembre-se que os nossos dados são porcentagens variando de 0.01 (ou seja, 1%) a 0.99 (99%), em `escala_pct` entre 0 e 1. Se não estiver lidando com porcentagens, seria usado o menor valor e o maior valor (se, por exemplo, os seus dados fossem contagens de algum elemento de interesse, como nos dados arqueológicos usados anteriormente). Então, nós colamos: - -```python -def sintonia_mag_para_tom(magnitude): - escala_pct = meumidi.linear_scale_pct(0, 1, magnitude) - # Pick a range of notes. This allows you to play in a key. - c_major = ['C', 'C#', 'D', 'D#', 'E', 'E#', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B', 'B#'] - - #Encontre as notas que correspondem com os pontos dos seus dados - nota = meumidi.scale_to_note(escala_pct, c_major) - - #Traduza essa nota em um tom MIDI - midi_tom = meumidi.note_to_midi_pitch(nota) - - return midi_tom - -lista_notas = [] - -for d in meus_dados_tempo: - lista_notas.append([ - d['beat'] - tempo_inicio, - sintonia_mag_para_tom(d['magnitude']), - random.randint(0,200), # ataque - random.randint(1,4) # duration, in beats - ]) -``` - -e então cole nessa parte final do código para escrever os seus valores de som no ficheiro: - -``` -# Adicione uma faixa com essas notas -meumidi.add_track(lista_notas) - -# Ficheiro .mid resultante -meumidi.save_midi() -``` - -Salve esse ficheiro com um novo nome e a extensão de ficheiro `.py`. - -Para cada coluna de dados nos seus dados originais, **tenha um script único e lembre-se de mudar o nome do ficheiro de saída**, pois, caso contrário, você irá sobrescrever seus dados. Então, você pode carregar os ficheiros individuais midi no Garageband ou LMMS para instrumentação. Aqui está a íntegra do [Diário de John Adams](https://www.youtube.com/watch?v=ikqRXtI3JeA). - -# Sonic Pi - -Harmonizar ficheiros MIDI únicos (no Garageband ou em algum outro programa de composição musical) nos leva de sonificação para composição e arte sonora. Nessa seção final, não será oferecido um tutorial completo sobre como usar o [Sonic Pi](http://sonic-pi.net), mas um direcionamento para um ambiente que permite a performance da codificação dos seus dados ao vivo (veja [esse vídeo](https://www.youtube.com/watch?v=oW-3HVOeUQA) para uma performance ao vivo real de codificação). Os tutoriais do próprio Sonic Pi's mostrarão o potencial do uso do computador como um instrumento musical (em que você digita código em Ruby no editor interno enquanto o interpretador toca o que está sendo codificado). - -Por que alguém iria querer fazer isso? Como progressivamente ficou evidente no tutorial, quando os seus dados são sonificados, escolhas passam a ser feitas sobre como mapear os dados em som, e essas escolhas refletem implícita ou explicitamente decisões sobre quais dados importam. Existe um _continuum_ de 'objetividade', se quiser. Em uma extremidade, uma sonificação que apoia uma discussão sobre o passado; do outro, uma apresentação sobre o passado tão fascinante e pessoal quanto qualquer palestra pública bem-feita. A sonificação tira nossos dados das páginas e os leva aos ouvidos de nossos ouvintes: é uma espécie de história pública. Apresentando nossos dados ... imagine só! - -Aqui, eu ofereço simplesmente um trecho de código que possibilitará a importação dos seus dados, que aqui são simplesmente uma lista de valores salvos como csv. Estou em dívida com a bibliotecária da George Washington University, Laura Wrubel, que postou em [gist.github.com](https://gist.github.com/lwrubel) os experimentos dela de sonificação das transações de circulação de sua biblioteca. - -Nesse [ficheiro de amostra](/assets/sonification/sonification-jesuittopics.csv) (um modelo de tópicos gerado do [Jesuit Relations](http://puffin.creighton.edu/jesuit/relations/), (Relações Jesuítas)), há dois tópicos. A primeira linha contem os cabeçalhos: topic1 (em PT-BR, tópico1), topic2 (em PT-BR, tópico2). - -### Prática - -Siga os tutoriais iniciais que o Sonic Pi oferece até se sentir confortável com a interface e algumas das suas possibilidades. (Esses tutoriais também estão agrupados [aqui](https://gist.github.com/jwinder/e59be201082cca694df9); também é possível escutar uma entrevista com Sam Aaron, o criador do Sonic Pi, [aqui](https://devchat.cachefly.net/rubyrogues/RR215SonicPi.mp3?rss=true)). Então, em uma nova janela de edição, copie o seguinte (novamente, o trecho de código a seguir eventualmente será agrupado em um script único na sua janela do Sonic Pi): - -``` -require 'csv' -dados = CSV.parse(File.read("/path/to/your/directory/dados.csv"), headers: true, header_converters: :symbol) -use_bpm 100 -``` - -Lembre, `path/to/your/directory/` é a localização real dos seus dados na sua máquina. Tenha certeza de que eles estão nomeados como `dados.csv` ou altere a linha acima de forma que o seu ficheiro seja carregado! - -Agora, vamos carregar esses dados na nossa música: - -``` -#esse pedaço de código será executado apenas uma vez, a menos que você tire o comentário da linha com -#'live_loop', e também retirar o comentário do 'end' final na parte inferior -# desse blóco de código -#'retirar o comentário' signfica remover o sinal #. - -# live_loop :jesuit do -dados.each do |line| - topic1 = line[:topic1].to_f - topic2 = line[:topic2].to_f - - use_synth :piano - play topic1*100, attack: rand(0.5), decay: rand(1), amp: rand(0.25) - use_synth :piano - play topic2*100, attack: rand(0.5), decay: rand(1), amp: rand(0.25) - sleep (0.5) - - end -# end -``` - -As primeiras linhas carregam as colunas de dados; então dizemos qual amostra de som que desejamos usar (piano) e, em seguida, dizemos ao Sonic Pi para tocar o tópico 1 de acordo com os seguintes critérios (um valor aleatório menor que 0,5 para o ataque; um decaimento usando um valor aleatório menor que 1; e uma [amplitude](#amplitude) com um valor aleatório menor que 0.25). Vê o x 100 na linha? Isso pega os valores dos nossos dados (que são um decimal, lembre) e torna-os em um número inteiro. Nessa parte do código, (do modo que eu escrevi), aquele número equivale diretamente a nota. Se 88 é a menor nota e 1 é a maior, é possível ver que essa abordagem é um pouco problemática: nós não fizemos nenhum mapeamento de tom aqui! Nesse caso, é possível usar o Musicalgorithms para fazer o seu mapeamento de tom, e então inserir esses valores no Sonic Pi. Alternativamente, uma vez que esse código é praticamente em Ruby, é possível buscar como normalizar os dados e então realizar um mapeamento linear dos valores entre 1 - 88. Um bom lugar para começar seria estudar [essa tabela do Steve Lloyd](https://github.com/stevelloyd/Learn-sonification-with-Sonic-Pi) sobre sonificação de dados de clima com Sonic Pi. Finalmente, outra coisa a se notar é que o valor 'rand' (random, aleatório) permite que se adiciona um pouco de 'humanidade' na música em termos de dinâmicas. Então nós faremos a mesma coisa novamente para o topic2 (tópico2). - -É possível adicionar batidas, loops, amostras, e toda a parafernália que o Sonic Pi permite. Onde você coloca os seus pedaços de código afeta a reprodução, se os loops forem colocados antes dos dados acima, ele será reproduzido primeiro. Por exemplo, se o trecho a seguir for inserido depois da linha `use_bpm 100`, - -``` -#pedaço de intro -sleep 2 -sample :ambi_choir, attack: 2, sustain: 4, rate: 0.25, release: 1 -sleep 6 -``` - -Haverá um pouco de uma introdução na sua obra. Há uma pausa de 2 segundos, a amostra 'ambi_choir' é reproduzida, então há uma pausa de mais 6 segundos antes dos seus dados serem tocados. Se quiser adicionar um pouco de um som de bateria sinistro ao longo da sua obra, insira esse trecho a seguir (e antes de seus próprios dados): - -``` -#trecho que continua tocando ao longo da música -live_loop :boom do - with_fx :reverb, room: 0.5 do - sample :bd_boom, rate: 1, amp: 1 - end - sleep 2 -end -``` - -O código é bem simples: realize um loop da amostra 'bd_boom' com o efeito de som de ressonância, em um ritmo particular. Pause por 2 segundos entre os loops. - -A propósito, 'codificação ao vivo'? O que torna esse ambiente um espaço de 'codificação ao vivo' é a possibilidade de se fazer alterações no código _enquanto o Sonic Pi o transforma em música_. Não gosta do que está ouvindo? Altere o código na hora! - -Para mais sobre o Sonic Pi, [esse site de workshop](https://www.miskatonic.org/music/access2015/) (em inglês) é um bom lugar para começar. Veja também o [relatório de Laura Wrubel sobre participar desse worksop, e o trabalho dela e de seus colegas na área](http://library.gwu.edu/scholarly-technology-group/posts/sound-library-work) (em inglês). - -# Nihil Novi Sub Sole - -Mais uma vez, para que não pensemos que estamos na vanguarda através da nossa geração algorítmica de música, um lembrete foi publicado em 1978 sobre 'jogos de música de dados' no século XVIII, em que o lançamento de dados determinava a recombinação de trechos pré-escritos de música. [Alguns desses jogos foram explorados e recodificados para o Sonic-Pi por Robin Newman](https://rbnrpi.wordpress.com/project-list/mozart-dice-generated-waltz-revisited-with-sonic-pi/). Newman também usa uma ferramenta que poderia ser descrita como um Markdown+Pandoc da notação musical, [Lilypond](http://www.lilypond.org/) para pontuar essas composições. Os antecedentes para tudo que pode ser encontrado no _The Programming Historian_ são mais profundos do que se pode suspeitar! - -# Conclusão - -Sonificar os nossos dados nos faz confrontar os modos como os nossos dados são, muitas vezes, não sobre o passado, mas sobre o que construímos dele. Isso ocorre em parte em virtude de sua novidade, e da arte e do artifício necessários para mapear os dados para o som. Mas isso também acontece pelo contraste com as nossas noções pré-concebidas sobre visualização de dados. Pode ser que os sons gerados por alguém nunca cheguem ao nível da 'música'; mas se ajudar a transformar como nós encontramos o passado, e como outros engajam com o passado, então o esforço terá sido frutífero. Como Trevor Owens pode ter colocado, 'Sonificação é sobre [descoberta, não justificação](http://www.trevorowens.org/2012/11/discovery-and-justification-are-different-notes-on-sciencing-the-humanities/)'. - -## Termos - -+ **MIDI**, interface digital de instrumento musical. É uma descrição do valor e do tempo de uma nota, não de sua dinâmica ou de como alguém pode tocá-la (esta é uma distinção importante). Ele permite que computadores e instrumentos conversem entre si; pode-se aplicar instrumentação diferente a um ficheiro MIDI da mesma forma que se mudaria a fonte em um pedaço de texto (ou executar um ficheiro Markdown por meio do Pandoc). -+ **MP3**, formato de compressão que remove dados como parte de sua rotina de compactação. -+ **Tom**, a nota em si (C médio, etc) -+ **Ataque**, como a nota é tocada ou atingida -+ **Duração**, quanto tempo a nota dura (notas inteiras, semínimas, colcheias etc) -+ **Mapeamento do Tom e Mapeamento da Duração**, dimensionamento de valores de dados em relação a um intervalo de notas ou a duração da nota -+ **Amplitude**, em resumo, o volume da nota - -# Referências -Baio, Andy. 2015. 'If Drake Was Born A Piano'. Waxy. [http://waxy.org/2015/12/if_drake_was_born_a_piano/](https://waxy.org/2015/12/if_drake_was_born_a_piano/) - -Drucker, Johanna. 2011. Humanities Approaches to Graphical Display. DHQ 5.1 [http://www.digitalhumanities.org/dhq/vol/5/1/000091/000091.html](http://www.digitalhumanities.org/dhq/vol/5/1/000091/000091.html) - -Hedges, Stephen A. 1978. “Dice Music in the Eighteenth Century”. Music & Letters 59 (2). Oxford University Press: 180–87. [http://www.jstor.org/stable/734136](http://www.jstor.org/stable/734136). - -Hermann, T. 2008. "Taxonomy and definitions for sonification and auditory display". In P. Susini and O. Warusfel (eds.) Proceedings of the 14th international conference on auditory display (ICAD 2008). IRCAM, Paris. [http://www.icad.org/Proceedings/2008/Hermann2008.pdf](http://www.icad.org/Proceedings/2008/Hermann2008.pdf) - -Koebler, Jason. 2015. "The Strange Acoustic Phenomenon Behind These Wacked-Out Versions of Pop Songs" Motherboard, Dec 18. [http://motherboard.vice.com/read/the-strange-acoustic-phenomenon-behind-these-wacked-out-versions-of-pop-songs](http://motherboard.vice.com/read/the-strange-acoustic-phenomenon-behind-these-wacked-out-versions-of-pop-songs) - -Last and Usyskin, 2015. "Listen to the Sound of Data". In Aaron K. Baughman et al. (eds.) Multimedia Data Mining and Analytics. Springer: Heidelberg. Pp. 419-446 [https://www.researchgate.net/publication/282504359_Listen_to_the_Sound_of_Data](https://www.researchgate.net/publication/282504359_Listen_to_the_Sound_of_Data) +--- +title: Sonificação de dados (uma introdução à sonificação para historiadores) +layout: lesson +slug: som-dados-sonificacao-historiadores +date: 2016-06-07 +translation_date: 2021-03-26 +authors: +- Shawn Graham +reviewers: +- Jeff Veitch +- Tim Compeau +editors: +- Ian Milligan +translator: +- Gabriela Kucuruza +translation-editor: +- Jimmy Medeiros +translation-reviewer: +- Samuel Van Ransbeeck +- Juliana Marques da Silva +difficulty: 2 +review-ticket: https://github.com/programminghistorian/ph-submissions/issues/429 +activity: transforming +topics: [distant-reading] +abstract: "Existem inúmeras lições que o ajudarão a visualizar o passado, mas esta lição o ajudará a ouvir o passado." +original: sonification +avatar_alt: Um violino +doi: 10.46430/phpt0020 +--- + +{% include toc.html %} + +# Introdução + +ποίησις - fabricação, criação, produção + +Eu estou muito cansado de ver o passado. Existem diversos guias que irão ajudar a _visualizar_ o passado que não podemos ver, mas muitas vezes nós esquecemos que a visualização é um ato de criatividade. Nós talvez estejamos muito ligados às nossas telas, muito focados em "ver". Ao invés disso, deixe-me ouvir algo do passado. + +Enquanto existe uma história e uma literatura profundas sobre arqueoacústica e paisagens sonoras que tentam capturar o som de um lugar _como ele era_ ([veja por exemplo a Virtual St. Paul's](https://www.digitalstudies.org/articles/10.16995/dscn.58) ou o trabalho de [Jeff Veitch em Ostia antiga](https://jeffdveitch.wordpress.com/)), eu tenho interesse em 'sonificar' o que eu tenho _agora_, os dados eles mesmos. Eu quero descobrir uma gramática para representar dados em som que seja apropriada para História. [Drucker](#Drucker) [notoriamente nos lembra](https://www.digitalhumanities.org/dhq/vol/5/1/000091/000091.html) que ‘dados’ não são coisas dadas, mas ao invés disso, coisas capturadas, coisas transformadas. Na sonificação de dados, eu literalmente realizo o passado no presente, e então as suposições e as transformações que faço estão em primeiro plano. A experiência auditiva resultante é uma "deformação" literal que nos faz ouvir as camadas modernas do passado de uma nova maneira. + +Eu quero ouvir os significados do passado, mas eu sei que não posso. No entanto, quando ouço um instrumento, posso imaginar a materialidade do músico tocando; posso discernir o espaço físico em seus ecos e ressonâncias. Eu posso sentir o som, eu posso me mover no ritmo. A música engaja o meu corpo inteiro, minha imaginação inteira. As suas associações com sons, música e tons que eu ouvi antes criam uma experiência temporal profunda, um sistema de relações incorporadas entre eu e o passado. Visual? Nós temos representações visuais do passado há tanto tempo, que nós quase nos esquecemos dos aspectos artístico e performativo dessas gramáticas de expressão. + +Nesse tutorial, você aprenderá a fazer um pouco de barulho a partir dos seus dados sobre o passado. O _significado_ desse barulho, bem... isso depende de você. Parte do objetivo desse tutorial é te fazer estranhar os seus dados. Traduzindo-o, transcodificando-o, [remediando-o](https://blog.taracopplestone.co.uk/making-things-photobashing-as-archaeological-remediation/) (em inglês), nós começaremos a ver elementos dos dados que a nossa familiaridade com modelos visuais nos impediu de enxergar. Essa deformação está de acordo com os argumentos apresentados por, por exemplo, Mark Sample sobre [quebrar coisas](https://www.samplereality.com/2012/05/02/notes-towards-a-deformed-humanities/) (em inglês), ou Bethany Nowviskie sobre a '[resistência nos materiais](https://nowviskie.org/2013/resistance-in-the-materials/)' (em inglês). Sonificação nos move através do continuum de dados para captação, ciências sociais para arte, [falha para estética](https://nooart.org/post/73353953758/temkin-glitchhumancomputerinteraction) (em inglês). Então vamos ver como isso tudo soa. + +## Objetivos + +Nesse tutorial, apresentarei três maneiras diferentes de gerar som ou música a partir de seus dados. + +Na primeira, usaremos um sistema desenvolvido por Jonathan Middleton, disponível gratuitamente para uso, chamado _Musicalgorithms_ (Algorítmos Musicais) a fim de introduzir algumas das questões e termos-chaves envolvidos. Na segunda, usaremos uma pequena biblioteca do Python para 'mapear por parâmetro' os nossos dados contra o teclado de 88 teclas e introduzir um pouco de arte em nosso trabalho. Finalmente, aprenderemos como carregar nossos dados no ambiente de codificação ao vivo de código aberto para som e música, _Sonic Pi_, momento em que te deixarei para que explore os abundantes tutoriais e recursos desse projeto. + +Você verá que "sonificação" nos movimenta através do espectro partindo de simples 'visualização/auralização' para performance real. + +### Ferramentas ++ Musicalgorithms [https://musicalgorithms.org/](https://musicalgorithms.org/) ++ MIDITime [https://github.com/cirlabs/miditime](https://github.com/cirlabs/miditime) (Eu bifurquei uma cópia no GitHub [aqui](https://github.com/shawngraham/miditime)) ++ Sonic Pi [https://sonic-pi.net/](https://sonic-pi.net/) + +### Dados de Exemplo + ++ [Dados sobre artefatos romanos](/assets/sonification/sonification-roman-data.csv) ++ [Excerto do modelo de tópicos do diário de John Adams](/assets/sonification/sonification-diary.csv) ++ [Excerto do modelo de tópicos das relações jesuíticas](/assets/sonification/sonification-jesuittopics.csv) + +# Um pouco de contexto sobre sonificação + +Sonificação é a prática de mapear aspectos dos dados para produzir sinais sonoros. Em geral, uma técnica pode ser chamada de "sonificação" se cumprir certas condições. Elas incluem reprodutibilidade (os mesmos dados podem ser transformados da mesma maneira por outros pesquisadores de forma que produzam os mesmos resultados) e o que pode ser chamado de inteligibilidade - que os elementos "objetivos" dos dados originais sejam sistematicamente refletidos no som resultante (veja [Hermann (2008)](https://www.icad.org/Proceedings/2008/Hermann2008.pdf) (em inglês) para uma taxonomia da sonificação). [Last e Usyskin (2015)](https://www.researchgate.net/publication/282504359_Listen_to_the_Sound_of_Data) (em inglês) realizaram uma série de experimentos para determinar quais tarefas de análise de dados poderiam ser performadas quando os dados eram sonificados. Os seus resultados experimentais mostraram que mesmo um grupo de ouvintes não-treinados (sem treinamento formal em música) podem fazer distinções úteis nos dados. Eles encontraram ouvintes que conseguiam distinguir tarefas comuns de exploração de dados nos dados sonificados, como classificação e agrupamento. Os seus resultados sonificados mapearam os dados fundamentais da escala musical ocidental. + +Last e Usyskin focaram em dados de séries temporais. Eles argumentam que dados de séries temporais são particularmente bons para sonificação, pois há paralelos naturais com sons musicais. Música é sequencial, ela tem duração e ela se desenvolve ao longo do tempo, assim como dados de séries temporais. [(Last e Usyskin 2015, p. 424)](https://www.researchgate.net/publication/282504359_Listen_to_the_Sound_of_Data). Torna-se um problema combinar os dados com as saídas sônicas apropriadas. Em muitas aplicações de sonificação, uma técnica chamada "mapeamento de parâmetros" é usada para combinar aspectos dos dados ao longo de várias dimensões da audição, como [tom](#tom), variação, brilho e início. O problema com esta abordagem é que onde não há relação temporal (ou melhor, nenhuma relação não linear) entre os pontos de dados originais, o som resultante pode ser "confuso" (2015, p. 422). + +## Escutando as lacunas +Há também o modo que preenchemos as lacunas do som com as nossas expectativas. Considere esse vídeo em que [mp3](#mp3) foi convertido para [MIDI](#midi) e de volta para mp3; a música foi 'achatada' para que todas as informações sonoras sejam tocadas por apenas um instrumento. (Gerar esse efeito é como salvar uma página da web como .txt, abri-la no Word e, então, salvá-la novamente como .html). Todos os sons (inclusive vocais) foram traduzidos para os seus valores de nota correspondentes e, em seguida, transformados de volta em mp3. + +É barulhento, entretanto percebemos o significado. Considere o vídeo abaixo: + + + +O que está acontecendo aqui? Se já conhecia essa música, provavelmente ouviu as 'palavras'. No entanto, nenhuma palavra está presente na música! Se você não conhecia esse música, deve ter soado como um absurdo inaudível (veja mais exemplos no website de [Andy Baio](https://waxy.org/2015/12/if_drake_was_born_a_piano/)). Esse efeito é, às vezes, chamado de 'alucinação auditiva' (cf. [Koebler, 2015](#Koebler)). Esses exemplos mostram como qualquer representação de dados que podemos ouvir/ver não está lá, estritamente falando. Nós preenchemos as lacunas com as nossas próprias expectativas. + +Considere as implicações para a História. Se sonificarmos nossos dados e começarmos a ouvir padrões no som, ou pontos fora da curva, nossas expectativas culturais sobre como a música funciona (nossas memórias de fragmentos musicais semelhantes, ouvidos em contextos específicos) irão colorir nossa interpretação. Isso, eu argumentaria, é verdadeiro para todas as representações do passado, mas sonificar é apenas estranho o suficiente em relação aos nossos métodos regulares, de forma que essa autoconsciência nos ajudará a identificar ou comunicar os padrões críticos nos dados do passado. + +Iremos progredir por meio de três ferramentas diferentes para sonificação de dados, observando como as escolhas em uma ferramenta afetam o resultado e podem ser atenuadas imaginando novamente os dados por meio de outra ferramenta. No fim das contas, não há nada mais objetivo em 'sonificação' do que há em 'visualização', então quem pesquisa deve estar preparado para justificar as suas escolhas, e fazer escolhas transparentes e reprodutíveis para outros. E para que não pensemos que a sonificação e a música gerada por algoritmos são de alguma forma algo "novo", indico ao leitor interessado [Hedges, (1978)](https://www.icad.org/Proceedings/2008/Hermann2008.pdf). + +Em cada seção, irei dar uma introdução conceitual, seguida por um passo a passo usando dados arqueológicos ou históricos de amostra. + +# Musicalgorithms + +Há uma grande variedade de ferramentas para sonificar dados. Algumas, por exemplo, são pacotes amplamente usadas do [ambiente de estatística R](https://cran.r-project.org/), como ‘[playitbyR](https://cran.r-project.org/web/packages/playitbyr/index.html)’ e ‘[AudiolyzR](https://cran.r-project.org/web/packages/audiolyzR/index.html)’. O primeiro desses pacotes, entretanto, não tem sido mantido ou atualizado para as versões atuais do R (sua última atualização foi muitos anos atrás) e o segundo precisa de um número considerável de configurações adicionais de software para que funcione adequadamente. + +Por outro lado, o site [Musicalgorithms](https://musicalgorithms.org/) é bem fácil de usar. O site Musicalgorithms está online há mais de uma década. Embora não seja código aberto, ele é um projeto de pesquisa de longa-duração em música computacional do seu criador, Jonathan Middleton. Ele está atualmente em sua terceira maior iteração (interações anteriores permanecem disponíveis para uso online). Começaremos com o Musicalalgorithms porque ele nos permite entrar e ajustar os nossos dados para produzir um ficheiro de representação MIDI. Tenha atenção e seleccione a '[Versão 3](https://musicalgorithms.org/3.0/index.html)'. + +{% include figure.html filename="sonification-musicalgorithms-main-site-1.png" caption="O site Musicalgorithms como aparecia em 2 de agosto de 2016" %} + +> Nota da tradução: há novas versões disponíveis para uso, mas de forma a seguir o tutorial, seguimos a versão 3 do Musicallgorithms, usada em 2016, e ainda disponível no site para uso. + +O Musicalgorithms efetua uma série de transformações nos dados. Nos dados de amostra abaixo (o padrão do próprio site), há apenas uma linha de dados, mesmo que pareça várias linhas. Os dados de amostra são compostos de campos separados por vírgula que são delimitados por espaço. + +``` +# Of Voices, Text Area Name, Text Area Data +1,morphBox, +,areaPitch1,2 7 1 8 2 8 1 8 2 8 4 5 9 0 4 5 2 3 5 3 6 0 2 8 +,dAreaMap1,2 7 1 8 2 8 1 8 2 8 4 5 9 0 4 5 2 3 5 3 6 0 2 8 +,mapArea1,20 69 11 78 20 78 11 78 20 78 40 49 88 1 40 49 20 30 49 30 59 1 20 78 +,dMapArea1,1 5 1 5 1 5 1 5 1 5 3 3 6 0 3 3 1 2 3 2 4 0 1 5 +,so_text_area1,20 69 11 78 20 78 11 78 20 78 40 49 88 1 40 49 20 30 49 30 59 1 20 78 +``` + +Esses dados representam os dados de origem e as suas transformações; compartilhar esses dados permitiria a outro pesquisador replicar ou estender a sonificação usando outras ferramentas. No entanto, quando se começa, apenas os dados básicos abaixo são necessários (uma lista de pontos de dados): + +``` +# Of Voices, Text Area Name, Text Area Data +1,morphBox, +,areaPitch1,24 72 12 84 21 81 14 81 24 81 44 51 94 01 44 51 24 31 5 43 61 04 21 81 +``` + +O campo-chave para nós é ‘areaPitch1’, que contém os dados de entrada delimitados por espaço. Os outros campos serão preenchidos à medida que avançamos pelas várias configurações de Musicalgorithms. Nos dados acima (por exemplo, 24 72 12 84 etc.), os valores são contagens brutas de inscrições de uma série de locais ao longo de uma estrada romana na Grã-Bretanha. (Vamos praticar com outros dados em breve, abaixo). + +{% include figure.html filename="sonification-musicalgorithms-pitch-mapping-2.png" caption="Depois de carregar seus dados, é possível selecionar as diferentes operações na barra de menu superior do site. Na captura de tela, o mouseover de informações está explicando o que acontece com o dimensionamento de seus dados se você selecionar a operação de divisão para dimensionar os seus dados para o intervalo de notas selecionado." %} + +Agora, conforme se percorre as várias guias da interface ‘duration input’ (entrada de duração) , ‘pitch mapping' (mapeamento de tom), ‘duration mapping’ (mapeamento de duração), ‘scale options’ (opções de escala musical) é possível realizar várias transformações. Em ‘pitch mapping’ (mapeamento de tom), há uma série de opções matemáticas para mapear os dados contra as 88 teclas/tons completos de um teclado de piano (em um mapeamento linear, a _média_ dos dados de alguém seria mapeado para dó médio, ou 40). Também é possível escolher o tipo de escala, se é um tom maior ou menor. Nesse ponto, uma vez que se tenha selecionado várias transformações, salve o ficheiro de texto. No menu 'play' é possível realizar o download de um ficheiro MIDI. O seu programa de áudio padrão pode tocar ficheiros MIDI (geralmente padronizando para um tom de piano). Uma instrumentação mais complicada pode ser atribuída abrindo o ficheiro MIDI em programas de mixagem de música, como GarageBand (Mac) ou [LMMS](https://lmms.io/) (Windows, Mac, Linux). (O uso do Garageband ou LMMS está fora do escopo desse tutorial. Um tutorial em vídeo sobre LMMS está disponível [aqui](https://youtu.be/4dYxV3tqTUc), enquanto há muitos tutoriais do Garageband online. Lynda.com tem [um tutorial excelente](https://www.lynda.com/GarageBand-tutorials/Importing-audio-tracks/156620/164050-4.html)). + +Se tivesse várias colunas de dados para os mesmos pontos - digamos, em nosso exemplo da Grã-Bretanha romana, também queríamos sonificar contagens de um tipo de cerâmica para essas mesmas cidades - é possível recarregar sua próxima série de dados, efetuar as transformações e mapeamentos, e gerar outro ficheiro MIDI. Como o Garageband e o LMMS permitem a sobreposição de vozes, você pode começar a criar sequências musicais complicadas. + +{% include figure.html filename="sonification-garageband-john-adams-3.png" caption="Captura de tela do Garageband, onde os ficheiros MIDI são tópicos sonorizados do Diário de John Adams. Na interface do Garageband (o LMMS é semelhante), cada ficheiro MIDI é arrastado e solto no lugar. A instrumentação para cada ficheiro MIDI (ou seja, trilha) pode ser selecionada nos menus do Garageband. Os rótulos de cada faixa foram alterados aqui para refletir as palavras-chave em cada tópico. A área verde à direita representa uma visualização das notas em cada faixa. Você pode ver esta interface em ação e ouvir a música [aqui](https://youtu.be/ikqRXtI3JeA) (em inglês)" %} + +Quais transformações devem ser usadas? Se tiver duas colunas de dados, terá duas vozes. Pode fazer sentido, em nossos dados hipotéticos, tocar a primeira voz bem alto, em uma tonalidade maior: as inscrições 'falam' conosco, afinal de contas. (As inscrições romanas de fato se dirigem ao leitor, o transeunte, literalmente: 'Ó tu que passas ...'). Então, se acaso as cerâmicas de interesse forem mercadorias mais despretensiosas, talvez elas possam ser mapeadas em relação à extremidade inferior da escala ou receberem notas de duração mais longas para representar sua onipresença nas classes nessa região. + +_Não há forma 'certa' de representar os seus dados como som, ao menos não por enquanto_, mas mesmo com essa amostra de exemplo, começamos a ver como sombras de significado e interpretação podem ser atribuídas aos nossos dados e à nossa experiência dos dados. + +Mas e o tempo? Dados históricos usualmente têm um ponto de inflexão, um distinto "tempo quando" algo aconteceu. Então, a quantidade de tempo entre dois pontos de dados precisa ser considerada. É nesse ponto que a nossa próxima ferramenta se torna bem útil, para quando nossos pontos de dados tiverem uma relação com outro espaço temporal. Começamos a nos mover de sonificação (pontos de dados) para música (relações entre pontos). + +### Prática +O [conjunto de dados de amostra](/assets/sonification/sonification-roman-data.csv) apresentado contém a contagem de moedas romanas na sua primeira coluna e a contagem de materiais romanos dos mesmos locais, conforme contido no banco de dados do Portable Antiquities Scheme (Esquema de Antiguidades Portáveis) do British Museum. A sonificação desses dados pode revelar ou acentuar aspectos da situação econômica ao longo da rua Watling, uma grande rota através da Britânia Romana. Esses pontos de dados estão organizados geograficamente do Noroeste ao Sudeste; então, na medida em que o som toca, nós estamos escutando movimento através do espaço. Cada nota representa outro passo no caminho. + +1. Abra o [dados-sonificação-romana.csv](/assets/sonification/sonification-roman-data.csv) em uma tabela. Copie a primeira coluna em um editor de texto. Delete os finais das linhas de forma que os dados fiquem todos em uma linha única. +2. Adicione a seguinte informação de coluna assim: +``` +# Of Voices, Text Area Name, Text Area Data +1,morphBox, +,areaPitch1, +``` +...para que os seus dados sigam imediatamente depois da última vírgula (como [esse exemplo](/assets/sonification/sonification-romancoin-data-music.csv)). Salve o ficheiro com um nome útil como `sonsdasmoedas1.csv`. + +3. Acesse o site do [Musicalgorithms](https://musicalgorithms.org/3.0/index.html) (versão 3) e clique no botão "load" (carregar). No pop-up, clique no botão azul "load" (carregar) e selecione o ficheiro salvo no passo 2. O site carregará os seus materiais e exibirá uma marca de seleção verde se tiver sido carregado com êxito. Caso contrário, certifique-se de que os seus valores estejam separados por espaços e que sigam imediatamente a última vírgula no bloco de código na etapa 2. Também é possível tentar carregar o [ficheiro de demonstração desse tutorial](/assets/sonification/sonification-romancoin-data-music.csv) ao invés. + +{% include figure.html filename="sonification-musicalgorithms-upload-4.png" caption="Clique em 'load' na tela principal para acessar essa caixa de diálogo. Então 'load csv'. (carregue o csv) Selecione o ficheiro; ele aparecerá na caixa. Então clique no botão 'load' (carregar)." %} + +4. Clique em 'Pitch Input'. Os valores dos seus dados serão exibidos. Por enquanto, **não selecione** nenhuma outra opção nesse página (consequentemente, usaremos os valores padrão do site). + +5. Clique em 'Duration Input'. **Não selecione nenhuma opção aqui por enquanto**. As opções aqui irão mapear várias transformações em relação aos dados que alterarão a duração para cada nota. Não se preocupe com as opções por enquanto: siga adiante. +6. Clique em 'Pitch Mapping'. Essa é a escolha mais crucial, pois irá transformar (isso é, escalar) os seus dados brutos para um mapeamento em relação às teclas do teclado. Deixe a configuração de `mapping` em 'division'. (As outras opções são módulo e logarítmico). A opção `Range` 1 a 88 usa todas as 88 teclas do teclado, assim, seu valor mais baixo estaria de acordo com a nota mais profunda do piano e seu valor mais alto com a nota mais alta. Em vez disso, você pode restringir sua música em torno de dó médio, então insira 25 a 60 como seu intervalo. O resultado deveria mudar para: `31,34,34,34,25,28,30,60,28,25,26,26,25,25,60,25,25,38,33,26,25,25,25` Essas não são mais suas contagens; são as notas do teclado. + +{% include figure.html filename="sonification-musicalgorithms-settings-for-pitch-mapping-5.png" caption="Clique na caixa 'range' e defina-o para 25. Os valores abaixo serão alterados automaticamente. Clique na caixa 'to' e defina-o para 60. Clique novamente na outra caixa; os valores serão atualizados." %} + +8. Clique em 'Duration Mapping'. Como Pitch Mapping, isso pega o intervalo de tempo especificado e usa várias opções matemáticas para mapear o intervalo de possibilidade contra as suas notas. Se passar o seu cursor por cima de `i` verá como os números correspondem com notas inteiras, semínimas, colcheias e assim por diante. Deixe os valores padrão por enquanto. +9. Clique em 'Scale Options'. Aqui nós podemos começar a selecionar o que pode ser chamado de aspecto 'emocional' do som. Nós geralmente pensamos que escalas maiores são 'alegres' enquanto escalas menores são 'tristes'; para uma discussão acessível acesse esse [post de blog](https://www.ethanhein.com/wp/2010/scales-and-emotions/) (em inglês). Por enquanto, escolha 'scale by: major' (escala maior). Deixe a 'scale' (escala) como `C`. + +Agora sonificamos uma coluna de dados! Clique no botão 'save' (salvar), então 'save csv' (salvar csv). + +{% include figure.html filename="sonification-musicalgorithms-save-6.png" caption="A caixa de diálogo salvar dados." %} +Haverá um ficheiro que se parecerá com isso: + +``` +# Of Voices, Text Area Name, Text Area Data +1,morphBox, +,areaPitch1,80 128 128 128 1 40 77 495 48 2 21 19 1 1 500 1 3 190 115 13 5 1 3 +,dAreaMap1,2 7 1 8 2 8 1 8 2 8 4 5 9 0 4 5 2 3 5 3 6 0 2 +,mapArea1,31 34 34 34 25 28 30 60 28 25 26 26 25 25 60 25 25 38 33 26 25 25 25 +,dMapArea1,1 5 1 5 1 5 1 5 1 5 3 3 6 0 3 3 1 2 3 2 4 0 1 +,so_text_area1,32 35 35 35 25 28 30 59 28 25 27 27 25 25 59 25 25 39 33 27 25 25 25 +``` + +É possível ver os dados originais no campo 'areaPitch1' e os subsequentes mapeamentos. O site permite que sejam geradas até quatro vozes por vez em um ficheiro MIDI; dependendo de como se quer adicionar instrumentação depois, pode-se querer gerar um ficheiro MIDI por vez. Vamos tocar a música - clique em 'Play'. É possível selecionar o tempo aqui, e um instrumento. É possível ouvir os seus dados no navegador, ou salvá-los como um ficheiro MIDI clicando no botão azul 'Save MIDI file'. + +Retorne ao começo e carregue as duas colunas de dados nesse modelo: +``` +# Of Voices, Text Area Name, Text Area Data +2,morphBox, +,areaPitch1, +,areaPitch2, +``` + +{% include figure.html filename="sonification-2voices-7.png" caption="Coloque 2 na caixa de vozes no topo da interface. Quando você for para qualquer uma das páginas de opção - aqui, nós estamos em 'pitch input' - dois monitores abrem para mostrar os dados das duas vozes. Carregue os seus dados do csv como antes, mas formate o seu csv para ter o 'areaPitch1' e o 'areaPitch2' como descrito no texto principal. Os dados para a primeira voz irão aparecer na esquerda, e a segunda voz na direita." %} + +Quando se tem dados com várias vozes, o que se destaca? Observe que, nessa abordagem, a distância entre os pontos no mundo real não é considerada em nossa sonificação. Essa distância, se fosse considerada, poderia ser crucial. A distância, é claro, não precisa ser geográfica - pode ser temporal. A próxima ferramenta que exploraremos nos permite abordar isso em nossa sonificação explicitamente. + +# Algumas palavras sobre configurar o Python + +A próxima seção desse tutorial precisa de Python. Se não usou Python ainda, será preciso passar algum tempo [se familiarizando com a linha de comando (PC) ou terminal (OS)](/en/lessons/intro-to-bash) (em inglês). Você pode achar esse rápido [guia de instalação dos módulos do python](/pt/licoes/instalacao-modulos-python-pip) útil (mas retorne para ele depois de ler o resto da seção). + +Usuários do Mac já possuirão o Python instalado na máquina deles. É possível testar isso apertando o botão COMMAND e a barra de espaço; na janela de pesquisa, digite `terminal` e clique na aplicação do terminal. No prompt de comando, por exemplo, no cursor piscando em `$` digite `python --version` e o computador responderá com a versão do python existente no seu computador. _A próxima seção desse tutorial usa a versão Python 2.7; ela não foi testada em Python 3_. + +Para usuários do Windows, Python não é instalado por padrão na sua máquina então [essa página](https://docs.python-guide.org/en/latest/starting/install/win/) te ajudará a iniciar, apesar das coisas serem um pouco mais complicadas do que parece de acordo com a página (nota de tradução: pode usar também a [lição de instalação do Python](/pt/licoes/introducao-instalacao-python) do _Programming Historian em português_, mas tenha em atenção que nessa lição é instalada a versão 3 do Python). Primeiro, realize o download do ficheiro `.msi` que a página recomenda (Python 2.7). Clique duas vezes no ficheiro e ele deve se instalar em um novo diretório, por exemplo, `C:\Python27\`. Então, nós temos de dizer para o Windows a localização para onde buscar pelo Python sempre que um programa em python for executado; ou seja, colocaremos a localização do diretório no seu 'path', ou a variável do ambiente que o Windows sempre apresenta quando confrontado com um novo comando. Existem algumas formas de fazer isso, mas talvez a mais fácil seja buscar no seu computador pelo programa `Powershell` (digite 'powershell' na janela de pesquisa do seu computador). Abra o Powershell e, no `>` prompt, copie essa linha inteira: + +`[Environment]::SetEnvironmentVariable("Path", "$env:Path;C:\Python27\;C:\Python27\Scripts\", "User")` + +Feche o powershell quando terminar. Você saberá que funcionou se nada acontecer quando clicar em 'enter'. Para testar se tudo está funcionando, abra o prompt de comando (aqui há [10 forma de fazer isso](https://www.howtogeek.com/235101/10-ways-to-open-the-command-prompt-in-windows-10/)) (em inglês) e digite no prompt `>`, `python --version`. Ele deve retornar `Python 2.7.10` ou algo similar. + +A última peça do quebra-cabeça que todos os usuários precisarão é um programa chamado `Pip`. Os usuários de Mac podem instalá-lo digitando no terminal: :`sudo easy_install pip`. Usuários do Windows terão um pouco mais de dificuldade (nota de tradução: pode usar também a [lição de instalação de módulos Python com pip](/pt/licoes/instalacao-modulos-python-pip) do _Programming Historian em português_, mas tenha em atenção que nessa lição é usada a versão 3 do Python). Primeiro, clique no botão direito do seu cursor e salve esse link: [https://bootstrap.pypa.io/get-pip.py](https://bootstrap.pypa.io/get-pip.py) (Se apenas clicar no link, ele irá te mostrar o código no seu navegador). Salve em algum lugar útil. Abra o prompt de comando no diretório em que salvou `get-pip.py`. Então, digite no prompt de comando, `python get-pip.py`. Convencionalmente, nos tutoriais, verá `>` ou `$` em lugares em que é preciso digitar algo no prompt de comando ou no terminal. Nunca é necessário digitar esses dois caracteres. + +Finalmente, quando você tem um código python que deseja executar, pode inseri-lo em seu editor de texto e salvá-lo com a extensão `.py` (nota de tradução: pode também seguir as indicações das lições “Configurar um ambiente de desenvolvimento integrado para Python”, do _Programming Historian em português_, nas suas versões [Windows](/pt/licoes/instalacao-windows) ou [Mac](/pt/licoes/instalacao-mac), mas tenha em atenção que nessas lições é usada a versão 3 do Python). O seu ficheiro é um ficheiro de texto, mas a **extensão** do ficheiro diz para o seu computador para usar o Python para interpretá-lo; mas lembre, digite `python` no prompt primeiro, por exemplo: `$ python meu-script-legal.py`. + +# MIDITime + +MIDITime é um pacote do python desenvolvido por [Reveal News (antes, Centro de Reportagens Investigativas)](https://www.revealnews.org/). O seu [repositório no Github está aqui](https://github.com/cirlabs/miditime). Miditime foi construído explicitamente para dados de séries temporais (ou seja, uma sequencia de observações coletadas ao longo do tempo). + +Enquanto a ferramenta Musicalgorithms tem uma interface mais ou menos intuitiva, quem pesquisa sacrifica a possibilidade de saber o que, exatamente, está acontecendo internamente. +Em princípio, alguém poderia examinar o código subjacente para o pacote MIDITime para saber o que está acontecendo. Mais importante ainda, na ferramenta anterior não há nenhuma habilidade de contabilizar os dados em que os pontos estão distantes uns dos outros no tempo do relógio. MIDITime nos permite considerar que os nossos dados podem ser agrupados pelo tempo. + +Vamos supor que você tenha um diário histórico no qual você encaixou um [modelo de tópicos](/en/lessons/topic-modeling-and-mallet). A saída resultante pode ter entradas de diário como linhas, e a composição percentual de cada tópico contribui para essa entrada como colunas. Nesse caso, _ouvir_ esses valores pode te ajudar a entender os padrões de pensamento no diário de uma forma que a visualização como um gráfico pode não permitir. Outliers ou padrões musicais recorrentes poderiam se destacar ao serem ouvidos de um modo que a gramática dos gráficos obscurece. + +### Instalando o MIDITime +Instalar MIDItime é simples com o [pip](/pt/licoes/instalacao-modulos-python-pip): + +`$ pip install miditime` ou `$ sudo pip install miditime` para uma máquina Mac ou Linux ; +`> pip install miditime` em uma máquina Windows. (Usuários Windows, se as instruções acima não funcionaram muito bem, talvez queira tentar [esse programa de ajuda](https://sites.google.com/site/pydatalog/python/pip-for-windows) para fazer o Pip funcionar adequadamente na sua máquina ou então seguir as instruções da [lição sobre pip](/pt/licoes/instalacao-modulos-python-pip) do _Programming Historian em português_). + +### Prática +Vamos olhar para o exemplo de script providenciado. Abra o seu editor de texto, e copie e cole o script de exemplo em: + +```python +#!/usr/bin/python + +from miditime.miditime import MIDITime + +# Instancie a classe com uma frequência (120bpm é o padrão) e o destino do ficheiro resultante. +mymidi = MIDITime(120, 'meuficheiro.mid') + +# Crie uma lista de notas. Cada nota é uma lista: [tempo, tom, ataque, duração] +midinotes = [ + [0, 60, 200, 3], #Na batida 0 (o começo), C Médio com ataque 200, para 3 batidas + [10, 61, 200, 4] #Em 10 batidas (12 segndos a partir do começo), C#5 com ataque 200, para quatro batidas 4 +] + +# Adicione uma faixa com essas notas +mymidi.add_track(midinotes) + +# Resultado do ficheiro .mid +mymidi.save_midi() +``` + +Salve o script como `musica1.py`. No seu terminal ou prompt de comando, execute o script: + +`$ python musica1.py` + +O novo ficheiro, `meuficheiro.mid` será registrado no seu diretório. Para ouvir esse ficheiro, é possível abri-lo com Quicktime ou Windows Media Player. (É possível adicionar instrumentação abrindo o ficheiro no Garageband ou [LMMS](https://lmms.io/)). + +`Musica1.py` importa miditime (lembre, é preciso realizar o `pip install miditime` antes de executar o script). Então, ele cria um ficheiro resultante de destinação e configura o tempo. Todas as notas são listadas individualmente, onde o primeiro número é o tempo em que a nota deve ser tocada, o tom da nota (ou seja, a nota de fato!), o quão forte ou ritmicamente a nota é atingida (o ataque), e a duração da nota. As notas musicais são então registradas na faixa e a faixa é registrada no `myfile.mid`. + +Agora, execute o script e adicione mais notas. As notas para a 'A barata diz que tem' são: + +``` +C7, F, Gm, Am, Bb, C, F, Dm, Gm, C, F +A ... Barata diz que tem sete saias de filó +``` + +Você consegue fazer o seu computador tocar essa música? (Esse [material](https://www.electronics.dit.ie/staff/tscarff/Music_technology/midi/midi_note_numbers_for_octaves.html) (em inglês) irá ajudar). + +**A propósito**, há uma especificação de ficheiro de texto para descrever música chamado [Notação ABC](https://pt.wikipedia.org/wiki/ABC_(nota%C3%A7%C3%A3o_musical)). Por enquanto, está além de nossa compreensão, mas alguém poderia escrever um script de sonificação em, por exemplo, uma planilha, mapeando valores para nomes de notas na especificação ABC (se você já usou um IF - THEN no Excel para converter notas percentuais em notas alfabéticas, terá uma noção de como isso pode ser feito) e então usando um site como [esse](https://trillian.mit.edu/~jc/music/abc/ABCcontrib.html) (em inglês) para converter a notação ABC em um ficheiro .mid. + +### Inserindo os seus próprios dados +[Esse ficheiro](/assets/sonification/sonification-diary.csv) é uma seleção do modelo de tópicos dos Diários de John Adams do [The Macroscope](https://themacroscope.org) (Explorando Grandes Dados Históricos: O Macroscópico do Historiador). Apenas os sinais mais fortes foram preservados através do arredondamento dos valores nas colunas para duas casas decimais (lembrando que 0.25, por exemplo, indica que aquele tópico está contribuindo para um quarto da composição daquela entrada do diário). Para obter esses dados em seu script de Python, eles devem ser formatados de uma maneira específica. A parte complicada é acertar o campo de data. + +_Para os propósitos desse tutorial, nós iremos deixar os nomes das variáveis sem alterações em relação ao script de amostra. O script de amostra foi desenvolvido com dados de um terremoto em mente; então onde diz 'magnitude' podemos pensar como '% composição do tópico.'_ + +``` +meus_dados = [ + {'data_evento': , 'magnitude': 3.4}, + {'data_evento': , 'magnitude': 3.2}, + {'data_evento': , 'magnitude': 3.6}, + {'data_evento': , 'magnitude': 3.0}, + {'data_evento': , 'magnitude': 5.6}, + {'data_evento': , 'magnitude': 4.0} +] +``` + +Alguém poderia abordar o problema de obter os nossos dados no formato usando expressões regulares; pode ser mais fácil abrir o modelo de tópicos em uma tabela. Copie os tópicos de dados em uma nova planilha, e deixe as colunas na esquerda e na direita dos dados. No exemplo abaixo, eu coloquei na coluna D e, então, preenchi o resto dos dados ao redor dela, assim: + +| | A | B | C | D | E | +|---|---|---|---|---|---| +|1 | {'data_evento': datetime |(1753,6,8) |, 'magnitude': |0.0024499630 |}, | +|2 | | | | | | +|3 | | | | | | + +Então copie e cole os elementos que não mudaram para preencher a coluna inteira. O elemento de data tem de ser (ano, mês, dia). Uma vez que preencheu a tabela, copie e cole no seu editor de texto de forma que se torne parte do arranjo `meus_dados`, como: + +Nota da tradução: note que a ordem do _datetime_ segue o padrão em inglês estadunidense. +``` +meus_dados = [ +{'data_evento': datetime(1753,6,8), 'magnitude':0.0024499630}, +{'data_evento': datetime(1753,6,9), 'magnitude':0.0035766320}, +{'data_evento': datetime(1753,6,10), 'magnitude':0.0022171550}, +{'data_evento': datetime(1753,6,11), 'magnitude':0.0033220150}, +{'data_evento': datetime(1753,6,12), 'magnitude':0.0046445900}, +{'data_evento': datetime(1753,6,13), 'magnitude':0.0035766320}, +{'data_evento': datetime(1753,6,14), 'magnitude':0.0042241550} +] +``` + +Note que a última linha não tem uma vírgula no seu fim. + +O seu script final será similar a essa, usando o exemplo da página do Miditime (as seções de código abaixo foram interrompidas pelos comentários, mas devem ser coladas no seu editor de texto como um ficheiro único): + +```python +from miditime.miditime import MIDITime +from datetime import datetime +import random + +meumidi = MIDITime(108, 'johnadams1.mid', 3, 4, 1) +``` + +Os valores após MIDITime, `MIDITime(108, 'johnadams1.mid', 3, 4, 1)` configuram ++ as batidas por minuto (108), ++ o ficheiro resultante ('johnadams1.mid'), ++ o número de segundos para representar o ano na música (3 segundos no calendário anual, então todas as notas para as entradas desse diário de 1753 serão escaladas contra 3 segundos; há 50 anos nos dados, então a música final terá duração de 50 x 3, ou um pouco mais de dois minutos), ++ a oitava base para a música (C médio é convencionalmente representado como C5, então aqui 4 representa uma oitava abaixo do C médio), ++ o nº de oitavas em que os tons são mapeados. + +Agora passamos os seus dados para o script inserindo-o no arranjo `meus_dados` (isso será colado em seguida): + +```python +meus_dados = [ +{'data_evento': datetime(1753,6,8), 'magnitude':0.0024499630}, +{'data_evento': datetime(1753,6,9), 'magnitude':0.0035766320}, +``` + +...tenha os seus dados aqui, lembrando-se de terminar a linha final data_evento **sem** uma vírgula, e finalizando os dados com um `]` na sua própria linha, por exemplo + +```python +{'data_evento': datetime(1753,6,14), 'magnitude':0.0042241550} +] +``` + +e então copie: + +```python +meus_dados_epoca = [{'dias_desde_epoca': meumidi.days_since_epoch(d['data_evento']), 'magnitude': d['magnitude']} for d in meus_dados] + +meus_dados_tempo = [{'beat': meumidi.beat(d['dias_desde_epoca']), 'magnitude': d['magnitude']} for d in meus_dados_epoca] + +tempo_inicio = meus_dados_tempo[0]['beat'] +``` + +Esta parte calcula o tempo entre as diferentes entradas do diário; diários que estão próximos no tempo terão, portanto, suas notas soando mais próximas. Finalmente, nós definimos como os dados serão mapeados em relação ao tom. Lembre-se que os nossos dados são porcentagens variando de 0.01 (ou seja, 1%) a 0.99 (99%), em `escala_pct` entre 0 e 1. Se não estiver lidando com porcentagens, seria usado o menor valor e o maior valor (se, por exemplo, os seus dados fossem contagens de algum elemento de interesse, como nos dados arqueológicos usados anteriormente). Então, nós colamos: + +```python +def sintonia_mag_para_tom(magnitude): + escala_pct = meumidi.linear_scale_pct(0, 1, magnitude) + # Pick a range of notes. This allows you to play in a key. + c_major = ['C', 'C#', 'D', 'D#', 'E', 'E#', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B', 'B#'] + + #Encontre as notas que correspondem com os pontos dos seus dados + nota = meumidi.scale_to_note(escala_pct, c_major) + + #Traduza essa nota em um tom MIDI + midi_tom = meumidi.note_to_midi_pitch(nota) + + return midi_tom + +lista_notas = [] + +for d in meus_dados_tempo: + lista_notas.append([ + d['beat'] - tempo_inicio, + sintonia_mag_para_tom(d['magnitude']), + random.randint(0,200), # ataque + random.randint(1,4) # duration, in beats + ]) +``` + +e então cole nessa parte final do código para escrever os seus valores de som no ficheiro: + +``` +# Adicione uma faixa com essas notas +meumidi.add_track(lista_notas) + +# Ficheiro .mid resultante +meumidi.save_midi() +``` + +Salve esse ficheiro com um novo nome e a extensão de ficheiro `.py`. + +Para cada coluna de dados nos seus dados originais, **tenha um script único e lembre-se de mudar o nome do ficheiro de saída**, pois, caso contrário, você irá sobrescrever seus dados. Então, você pode carregar os ficheiros individuais midi no Garageband ou LMMS para instrumentação. Aqui está a íntegra do [Diário de John Adams](https://www.youtube.com/watch?v=ikqRXtI3JeA). + +# Sonic Pi + +Harmonizar ficheiros MIDI únicos (no Garageband ou em algum outro programa de composição musical) nos leva de sonificação para composição e arte sonora. Nessa seção final, não será oferecido um tutorial completo sobre como usar o [Sonic Pi](https://sonic-pi.net), mas um direcionamento para um ambiente que permite a performance da codificação dos seus dados ao vivo (veja [esse vídeo](https://www.youtube.com/watch?v=oW-3HVOeUQA) para uma performance ao vivo real de codificação). Os tutoriais do próprio Sonic Pi's mostrarão o potencial do uso do computador como um instrumento musical (em que você digita código em Ruby no editor interno enquanto o interpretador toca o que está sendo codificado). + +Por que alguém iria querer fazer isso? Como progressivamente ficou evidente no tutorial, quando os seus dados são sonificados, escolhas passam a ser feitas sobre como mapear os dados em som, e essas escolhas refletem implícita ou explicitamente decisões sobre quais dados importam. Existe um _continuum_ de 'objetividade', se quiser. Em uma extremidade, uma sonificação que apoia uma discussão sobre o passado; do outro, uma apresentação sobre o passado tão fascinante e pessoal quanto qualquer palestra pública bem-feita. A sonificação tira nossos dados das páginas e os leva aos ouvidos de nossos ouvintes: é uma espécie de história pública. Apresentando nossos dados ... imagine só! + +Aqui, eu ofereço simplesmente um trecho de código que possibilitará a importação dos seus dados, que aqui são simplesmente uma lista de valores salvos como csv. Estou em dívida com a bibliotecária da George Washington University, Laura Wrubel, que postou em [gist.github.com](https://gist.github.com/lwrubel) os experimentos dela de sonificação das transações de circulação de sua biblioteca. + +Nesse [ficheiro de amostra](/assets/sonification/sonification-jesuittopics.csv) (um modelo de tópicos gerado do [Jesuit Relations](https://puffin.creighton.edu/jesuit/relations/), (Relações Jesuítas)), há dois tópicos. A primeira linha contem os cabeçalhos: topic1 (em PT-BR, tópico1), topic2 (em PT-BR, tópico2). + +### Prática + +Siga os tutoriais iniciais que o Sonic Pi oferece até se sentir confortável com a interface e algumas das suas possibilidades. (Esses tutoriais também estão agrupados [aqui](https://gist.github.com/jwinder/e59be201082cca694df9); também é possível escutar uma entrevista com Sam Aaron, o criador do Sonic Pi, [aqui](https://devchat.cachefly.net/rubyrogues/RR215SonicPi.mp3?rss=true)). Então, em uma nova janela de edição, copie o seguinte (novamente, o trecho de código a seguir eventualmente será agrupado em um script único na sua janela do Sonic Pi): + +``` +require 'csv' +dados = CSV.parse(File.read("/path/to/your/directory/dados.csv"), headers: true, header_converters: :symbol) +use_bpm 100 +``` + +Lembre, `path/to/your/directory/` é a localização real dos seus dados na sua máquina. Tenha certeza de que eles estão nomeados como `dados.csv` ou altere a linha acima de forma que o seu ficheiro seja carregado! + +Agora, vamos carregar esses dados na nossa música: + +``` +#esse pedaço de código será executado apenas uma vez, a menos que você tire o comentário da linha com +#'live_loop', e também retirar o comentário do 'end' final na parte inferior +# desse blóco de código +#'retirar o comentário' signfica remover o sinal #. + +# live_loop :jesuit do +dados.each do |line| + topic1 = line[:topic1].to_f + topic2 = line[:topic2].to_f + + use_synth :piano + play topic1*100, attack: rand(0.5), decay: rand(1), amp: rand(0.25) + use_synth :piano + play topic2*100, attack: rand(0.5), decay: rand(1), amp: rand(0.25) + sleep (0.5) + + end +# end +``` + +As primeiras linhas carregam as colunas de dados; então dizemos qual amostra de som que desejamos usar (piano) e, em seguida, dizemos ao Sonic Pi para tocar o tópico 1 de acordo com os seguintes critérios (um valor aleatório menor que 0,5 para o ataque; um decaimento usando um valor aleatório menor que 1; e uma [amplitude](#amplitude) com um valor aleatório menor que 0.25). Vê o x 100 na linha? Isso pega os valores dos nossos dados (que são um decimal, lembre) e torna-os em um número inteiro. Nessa parte do código, (do modo que eu escrevi), aquele número equivale diretamente a nota. Se 88 é a menor nota e 1 é a maior, é possível ver que essa abordagem é um pouco problemática: nós não fizemos nenhum mapeamento de tom aqui! Nesse caso, é possível usar o Musicalgorithms para fazer o seu mapeamento de tom, e então inserir esses valores no Sonic Pi. Alternativamente, uma vez que esse código é praticamente em Ruby, é possível buscar como normalizar os dados e então realizar um mapeamento linear dos valores entre 1 - 88. Um bom lugar para começar seria estudar [essa tabela do Steve Lloyd](https://github.com/stevelloyd/Learn-sonification-with-Sonic-Pi) sobre sonificação de dados de clima com Sonic Pi. Finalmente, outra coisa a se notar é que o valor 'rand' (random, aleatório) permite que se adiciona um pouco de 'humanidade' na música em termos de dinâmicas. Então nós faremos a mesma coisa novamente para o topic2 (tópico2). + +É possível adicionar batidas, loops, amostras, e toda a parafernália que o Sonic Pi permite. Onde você coloca os seus pedaços de código afeta a reprodução, se os loops forem colocados antes dos dados acima, ele será reproduzido primeiro. Por exemplo, se o trecho a seguir for inserido depois da linha `use_bpm 100`, + +``` +#pedaço de intro +sleep 2 +sample :ambi_choir, attack: 2, sustain: 4, rate: 0.25, release: 1 +sleep 6 +``` + +Haverá um pouco de uma introdução na sua obra. Há uma pausa de 2 segundos, a amostra 'ambi_choir' é reproduzida, então há uma pausa de mais 6 segundos antes dos seus dados serem tocados. Se quiser adicionar um pouco de um som de bateria sinistro ao longo da sua obra, insira esse trecho a seguir (e antes de seus próprios dados): + +``` +#trecho que continua tocando ao longo da música +live_loop :boom do + with_fx :reverb, room: 0.5 do + sample :bd_boom, rate: 1, amp: 1 + end + sleep 2 +end +``` + +O código é bem simples: realize um loop da amostra 'bd_boom' com o efeito de som de ressonância, em um ritmo particular. Pause por 2 segundos entre os loops. + +A propósito, 'codificação ao vivo'? O que torna esse ambiente um espaço de 'codificação ao vivo' é a possibilidade de se fazer alterações no código _enquanto o Sonic Pi o transforma em música_. Não gosta do que está ouvindo? Altere o código na hora! + +Para mais sobre o Sonic Pi, [esse site de workshop](https://www.miskatonic.org/music/access2015/) (em inglês) é um bom lugar para começar. Veja também o [relatório de Laura Wrubel sobre participar desse worksop, e o trabalho dela e de seus colegas na área](https://library.gwu.edu/scholarly-technology-group/posts/sound-library-work) (em inglês). + +# Nihil Novi Sub Sole + +Mais uma vez, para que não pensemos que estamos na vanguarda através da nossa geração algorítmica de música, um lembrete foi publicado em 1978 sobre 'jogos de música de dados' no século XVIII, em que o lançamento de dados determinava a recombinação de trechos pré-escritos de música. [Alguns desses jogos foram explorados e recodificados para o Sonic-Pi por Robin Newman](https://rbnrpi.wordpress.com/project-list/mozart-dice-generated-waltz-revisited-with-sonic-pi/). Newman também usa uma ferramenta que poderia ser descrita como um Markdown+Pandoc da notação musical, [Lilypond](https://www.lilypond.org/) para pontuar essas composições. Os antecedentes para tudo que pode ser encontrado no _The Programming Historian_ são mais profundos do que se pode suspeitar! + +# Conclusão + +Sonificar os nossos dados nos faz confrontar os modos como os nossos dados são, muitas vezes, não sobre o passado, mas sobre o que construímos dele. Isso ocorre em parte em virtude de sua novidade, e da arte e do artifício necessários para mapear os dados para o som. Mas isso também acontece pelo contraste com as nossas noções pré-concebidas sobre visualização de dados. Pode ser que os sons gerados por alguém nunca cheguem ao nível da 'música'; mas se ajudar a transformar como nós encontramos o passado, e como outros engajam com o passado, então o esforço terá sido frutífero. Como Trevor Owens pode ter colocado, 'Sonificação é sobre [descoberta, não justificação](https://www.trevorowens.org/2012/11/discovery-and-justification-are-different-notes-on-sciencing-the-humanities/)'. + +## Termos + ++ **MIDI**, interface digital de instrumento musical. É uma descrição do valor e do tempo de uma nota, não de sua dinâmica ou de como alguém pode tocá-la (esta é uma distinção importante). Ele permite que computadores e instrumentos conversem entre si; pode-se aplicar instrumentação diferente a um ficheiro MIDI da mesma forma que se mudaria a fonte em um pedaço de texto (ou executar um ficheiro Markdown por meio do Pandoc). ++ **MP3**, formato de compressão que remove dados como parte de sua rotina de compactação. ++ **Tom**, a nota em si (C médio, etc) ++ **Ataque**, como a nota é tocada ou atingida ++ **Duração**, quanto tempo a nota dura (notas inteiras, semínimas, colcheias etc) ++ **Mapeamento do Tom e Mapeamento da Duração**, dimensionamento de valores de dados em relação a um intervalo de notas ou a duração da nota ++ **Amplitude**, em resumo, o volume da nota + +# Referências +Baio, Andy. 2015. 'If Drake Was Born A Piano'. Waxy. [https://waxy.org/2015/12/if_drake_was_born_a_piano/](https://waxy.org/2015/12/if_drake_was_born_a_piano/) + +Drucker, Johanna. 2011. Humanities Approaches to Graphical Display. DHQ 5.1 [https://www.digitalhumanities.org/dhq/vol/5/1/000091/000091.html](https://www.digitalhumanities.org/dhq/vol/5/1/000091/000091.html) + +Hedges, Stephen A. 1978. “Dice Music in the Eighteenth Century”. Music & Letters 59 (2). Oxford University Press: 180–87. [https://www.jstor.org/stable/734136](https://www.jstor.org/stable/734136). + +Hermann, T. 2008. "Taxonomy and definitions for sonification and auditory display". In P. Susini and O. Warusfel (eds.) Proceedings of the 14th international conference on auditory display (ICAD 2008). IRCAM, Paris. [https://www.icad.org/Proceedings/2008/Hermann2008.pdf](https://www.icad.org/Proceedings/2008/Hermann2008.pdf) + +Koebler, Jason. 2015. "The Strange Acoustic Phenomenon Behind These Wacked-Out Versions of Pop Songs" Motherboard, Dec 18. [https://motherboard.vice.com/read/the-strange-acoustic-phenomenon-behind-these-wacked-out-versions-of-pop-songs](https://motherboard.vice.com/read/the-strange-acoustic-phenomenon-behind-these-wacked-out-versions-of-pop-songs) + +Last and Usyskin, 2015. "Listen to the Sound of Data". In Aaron K. Baughman et al. (eds.) Multimedia Data Mining and Analytics. Springer: Heidelberg. Pp. 419-446 [https://www.researchgate.net/publication/282504359_Listen_to_the_Sound_of_Data](https://www.researchgate.net/publication/282504359_Listen_to_the_Sound_of_Data) diff --git a/pt/licoes/sumarizacao-narrativas-web-python.md b/pt/licoes/sumarizacao-narrativas-web-python.md index a30543f785..cd7e64aa99 100644 --- a/pt/licoes/sumarizacao-narrativas-web-python.md +++ b/pt/licoes/sumarizacao-narrativas-web-python.md @@ -1,447 +1,447 @@ ---- -title: "Sumarização de narrativas acerca de eventos do passado documentados na web utilizando Python: o caso do Arquivo.pt" -slug: sumarizacao-narrativas-web-python -collection: lessons -layout: lesson -date: 2023-04-29 -authors: -- Ricardo Campos -- Daniel Gomes -reviewers: -- Daniela Major -- Salete Farias -editors: -- Josir Cardoso Gomes -review-ticket: https://github.com/programminghistorian/ph-submissions/issues/420 -difficulty: 2 -activity: transforming -topics: [api, python, data-manipulation, web-archiving] -avatar_alt: Homem sentado ensinando várias crianças -abstract: Nesta lição aprenderá a criar automaticamente resumos de eventos do passado a partir de conteúdos históricos arquivados da web. Em particular, demonstraremos como obter resultados relevantes ao combinar o uso da API do Arquivo.pt com a utilização do *Conta-me Histórias* permitindo, desta forma, processar um elevado volume de dados num curto espaço de tempo. -lesson-partners: [Jisc, The National Archives] -partnership-url: /pt/jisc-tna-parceria -doi: 10.46430/phpt0037 ---- - -{% include toc.html %} - -# Introdução - -Ao longo dos séculos a comunicação evoluiu paralelamente à evolução do homem. Esta, que antes se fazia a partir de meios físicos, é hoje digital e tem presença online. A "culpa" é da web, que desde o final dos anos 90 do século passado, se tornou na principal fonte de informação e comunicação do século XXI. Porém, cerca de [80% da informação disponível na web desaparece ou é alterada no prazo de apenas 1 ano](https://dl.acm.org/doi/10.1145/1145581.1145623) (em inglês). Este facto origina a perda de informação fundamental para documentar os eventos da era digital. - -A mudança para um paradigma de comunicação baseado na internet obrigou a uma alteração profunda na forma como as informações publicadas são preservadas. Os arquivos da web assumem especial relevância, ao preservarem as informações publicadas online desde a década de 1990. - -Apesar dos avanços recentes na preservação de informações arquivadas a partir da web, o problema de explorar de forma eficiente o património histórico preservado por estes arquivos permanece por resolver devido às enormes quantidades de dados históricos arquivados ao longo do tempo e à inexistência de ferramentas que possam processar automaticamente esse volume de dados. Neste contexto, as *timelines* (sistemas automáticos de sumarização temporal) surgem como a solução ideal para a produção automática de resumos de eventos ao longo do tempo e para a análise das informações publicadas online que os documentam, como é o caso das notícias. - -Neste tutorial, pretendemos mostrar como explorar o [Arquivo.pt](http://arquivo.pt), o arquivo da web portuguesa, e como criar automaticamente resumos de eventos do passado a partir de conteúdos históricos arquivados da web. Mais concretamente, demonstraremos como obter resultados relevantes ao combinar o uso da [API (Interface de Programação de Aplicações)](https://perma.cc/6ASS-KZFW) do Arquivo.pt com a utilização do [*Conta-me Histórias*](https://contamehistorias.pt), um sistema que permite criar automaticamente narrativas temporais sobre qualquer tema objeto de notícia. Para a concretização desse objetivo disponibilizamos um Jupyter Notebook que os usuários poderão usar para interagir com ambas as ferramentas. - -Na primeira parte do tutorial, iremos apresentar sumariamente as funções de pesquisa e acesso disponibilizadas pelo Arquivo.pt. Demonstraremos como podem ser utilizadas de forma automática através da invocação dos métodos disponibilizados pela API do Arquivo.pt, recorrendo a exemplos simples e práticos. A pesquisa automática de palavras em páginas arquivadas ao longo do tempo é o serviço base para desenvolver rapidamente aplicações informáticas inovadoras, que permitem explorar e tirar maior partido da informação histórica preservada pelo Arquivo.pt, como é caso do projeto *Conta-me Histórias*. - -Na segunda parte, recorremos ao *Conta-me Histórias* para exemplificar o processo de sumarização temporal de um evento. Nesse sentido, demonstraremos a forma como os usuários podem obter informações históricas resumidas sobre um determinado tópico (por exemplo, sobre [Jorge Sampaio](https://perma.cc/AWX8-9CA3), presidente da República Portuguesa entre 1996 e 2006), que envolva notícias do passado preservadas pelo Arquivo.pt. Uma tal infraestrutura permite aos usuários ter acesso a um conjunto de informações históricas a partir de páginas web que, muito provavelmente, já não existirão naquela que convencionalmente designamos como a web atual. - -# Pré-requisitos - -A participação neste tutorial pressupõe conhecimentos básicos de programação (nomeadamente Python) bem como familiarização com a instalação de pacotes python (via [git](https://perma.cc/6BK8-XZKR) (em inglês)), com o [formato JSON](https://www.w3schools.com/js/js_json_intro.asp) (em inglês) e com o consumo de APIs. A execução do código pressupõe o recurso ao Jupyter Notebook. Para a instalação deste *software* recomendamos o tutorial [Introduction to Jupyter Notebooks](/en/lessons/jupyter-notebooks#installing-jupyter-notebooks) (em inglês) ou, em alternativa, o recurso ao [Google Colab](https://colab.research.google.com/). Este tutorial foi testado com a versão 3.6.5 do Python. - -# Objetivos de Aprendizagem - -No final deste tutorial os participantes devem estar aptos a: -- Extrair informação relevante a partir do Arquivo.pt fazendo uso da [Arquivo.pt API (Full-text & URL search)](https://github.com/arquivo/pwa-technologies/wiki/Arquivo.pt-API) (em inglês) -- Saber usar a biblioteca Python do [*Conta-me Histórias*](https://github.com/LIAAD/TemporalSummarizationFramework) (em inglês) no contexto da sumarização temporal automática de eventos a partir de elevados volumes de dados preservados no arquivo da web portuguesa - -# Arquivo.pt - -O [Arquivo.pt](https://www.arquivo.pt) é um serviço público e gratuito disponibilizado pela [Fundação para a Ciência e a Tecnologia I.P.](https://perma.cc/D3XA-5J78), que permite a qualquer pessoa pesquisar e aceder a informação histórica preservada da web desde os anos 90. Embora se foque na preservação de informação de interesse para a comunidade portuguesa, contém também páginas escritas em várias línguas de interesse para a comunidade internacional e cerca de metade dos seus usuários são oriundos de fora de Portugal. - -[Este vídeo](https://www.youtube.com/embed/EnSys0HDnCc) introduz brevemente o Arquivo.pt. - -## Contributos - -O Arquivo.pt contém milhares de milhões de ficheiros recolhidos ao longo do tempo a partir de websites em várias línguas que documentam eventos nacionais e internacionais. Os serviços de pesquisa que fornece incluem a pesquisa de texto integral, a pesquisa de imagens, a listagem do histórico de versões, a pesquisa avançada e [APIs](https://arquivo.pt/api), que facilitam o desenvolvimento por terceiros de aplicações de valor acrescentado. - -Ao longo dos anos, o Arquivo.pt tem sido utilizado como recurso para suportar trabalhos de pesquisa em áreas como as Humanidades ou as Ciências Sociais. Desde 2018, o [Prémio Arquivo.pt](https://perma.cc/8F6F-KZFP) distingue anualmente trabalhos inovadores baseados na informação histórica preservada pelo Arquivo.pt. Os pesquisadores e cidadãos têm vindo a ser sensibilizados para a importância da preservação da informação publicada na web através da realização de sessões de formação gratuitas, por exemplo, sobre a [utilização das APIs disponibilizadas pelo Arquivo.pt](https://sobre.arquivo.pt/pt/ajuda/formacao/modulo-c/). - -Todo o *software* desenvolvido está disponível como [projetos de código-aberto gratuitos](https://github.com/arquivo/) (em inglês) e, desde 2008, tem sido documentado através de [artigos técnicos e científicos](https://arquivo.pt/publica). No decorrer das suas atividades, o Arquivo.pt gera dados que podem ser úteis para suportar novos trabalhos de pesquisa, como por exemplo a lista de Páginas do Governo de Portugal nas redes sociais ou de websites de partidos políticos. Estes [dados estão disponíveis em acesso aberto](https://arquivo.pt/dadosabertos). - -[Este vídeo](https://www.youtube.com/embed/CZ6R4Zydg0Q) detalha os serviços públicos disponibilizados pelo Arquivo.pt. Pode também aceder diretamente aos [slides da apresentação](https://perma.cc/854E-9XEV). Para saber mais detalhes acerca dos serviços disponibilizados pelo Arquivo.pt consulte: -* [Módulo A: Arquivo.pt: uma nova ferramenta para pesquisar o passado (módulo A)](https://sobre.arquivo.pt/pt/ajuda/formacao/modulo-a/) do programa de "Formação acerca de preservação da Web" do Arquivo.pt. - -## Onde posso encontrar o Arquivo.pt? - -O serviço Arquivo.pt encontra-se disponível a partir dos seguintes apontadores: -* [Interfaces de usuário em português e inglês para aceder aos serviços de pesquisa de páginas, imagens e histórico de versões](https://www.arquivo.pt) -* [Website informativo acerca do Arquivo.pt](https://sobre.arquivo.pt) -* [Documentação acerca das APIs do Arquivo.pt](https://perma.cc/FV3U-ZEL9) (em inglês) - -## Como funciona a pesquisa automática via API? - -Periodicamente, o Arquivo.pt recolhe e armazena automaticamente a informação publicada na web. A infraestrutura de *hardware* do Arquivo.pt está alojada no seu próprio centro de dados e é gerida por pessoal a ela dedicado a tempo inteiro. - -O fluxo de trabalho de preservação é realizado através de um [sistema de informação distribuído de grande escala](https://perma.cc/A3Z7-E358). A informação web armazenada é processada automaticamente para realizar atividades de pesquisa sobre [grandes volumes de dados](https://perma.cc/9FMH-DUY8) (em inglês, *big data*), através de uma plataforma de processamento distribuído para dados não estruturados ([Hadoop](https://perma.cc/B5PH-9B4V)). Tal permite, por exemplo, a deteção automática de *spam* na web ou avaliar a acessibilidade web para pessoas com deficiências. - -Os serviços de pesquisa e acesso via APIs permitem que os pesquisadores tirem partido desta infraestrutura de processamento e dos dados históricos preservados sem terem de endereçar a complexidade do sistema que suporta o Arquivo.pt. [Este vídeo](https://www.youtube.com/embed/PPuauEwIwPE) apresenta a [Arquivo.pt API (Full-text & URL search)](https://perma.cc/6ADS-LPLC) (em inglês). Pode também aceder diretamente aos [slides da apresentação](https://perma.cc/RMS4-UD76). - -Neste tutorial iremos abordar apenas a utilização da API Full-text & URL Search do Arquivo.pt. Porém, este disponibiliza também outras APIs: -* [Image Search API v1.1 (beta version)](https://perma.cc/U682-VNKD) (em inglês) -* [CDX-server API (URL search): international standard](https://perma.cc/9M6Y-A4BW) (em inglês) -* [Memento API (URL search): international standard](https://perma.cc/BF5E-32LR) (em inglês) - -Para saber detalhes acerca de [todas as APIs disponibilizadas pelo Arquivo.pt](https://perma.cc/FV3U-ZEL9) (em inglês) consulte os conteúdos de formação disponíveis em: -* [Módulo C: Acesso e processamento automático de informação preservada da Web através de APIs](https://sobre.arquivo.pt/pt/ajuda/formacao/modulo-c/) do programa de "Formação acerca de preservação da Web" do Arquivo.pt. - -## Utilização - -Em seguida, apresentaremos exemplos de como utilizar a [Arquivo.pt API (Full-text & URL search)](https://github.com/arquivo/pwa-technologies/wiki/Arquivo.pt-API) (em inglês) para pesquisar, de forma automática, páginas da web arquivadas entre determinados intervalos de tempo. Como exemplo, executaremos pesquisas acerca de "[Jorge Sampaio](https://pt.wikipedia.org/wiki/Jorge_Sampaio)"(1939-2021), antigo Presidente da Câmara Municipal de Lisboa (1990-1995) e antigo Presidente da República Portuguesa (1996-2006). - -### Definição dos parâmetros de pesquisa - -O parâmetro *query* define a(s) palavra(s) a pesquisar: `Jorge Sampaio`. - -Para facilitar a leitura dos resultados de pesquisa obtidos iremos limitá-los a um máximo de 5 através do parâmetro `maxItems`. - -A totalidade dos parâmetros de pesquisa disponíveis estão definidos na secção [*Request Parameters* da documentação da API do Arquivo.pt](https://perma.cc/2DMP-3XQC) (link em inglês. Em português, parâmetros requeridos). - -```python -import requests -query = "jorge sampaio" -maxItems = 5 -payload = {'q': query,'maxItems': maxItems} -r = requests.get('http://arquivo.pt/textsearch', params=payload) -print("GET",r.url) -``` - -### Percorrer os resultados obtidos no Arquivo.pt - -O seguinte código mostra os resultados de pesquisa obtidos no seu formato original (JSON): - -```python -import pprint -contentsJSon = r.json() -pprint.pprint(contentsJSon) -``` - -### Sumário dos resultados obtidos - -É possível extrair, para cada resultado, a seguinte informação: -* Título (campo `title`) -* Endereço para o conteúdo arquivado (campo `linkToArchive`) -* Data de arquivo (campo `tstamp`) -* Texto extraído da página (campo `linkToExtractedText`) - -Todos os campos obtidos como resposta a pesquisas disponíveis estão definidos na secção [*Response fields* da documentação da API do Arquivo.pt](https://perma.cc/VK9Z-EC83) (link em inglês. Em português, campos de resposta). - -```python -for item in contentsJSon["response_items"]: - title = item["title"] - url = item["linkToArchive"] - time = item["tstamp"] - - print(title) - print(url) - print(time) - - page = requests.get(item["linkToExtractedText"]) - - # Note a existencia de decode, para garantirmos que o conteudo devolvido pelo Arquivo.pt (no formato ISO-8859-1) e impresso no formato (UTF-8) - content = page.content.decode('utf-8') - print(content) - print("\n") -``` - -### Definir o intervalo temporal da pesquisa - -Uma das mais-valias do Arquivo.pt é fornecer o acesso a informação histórica publicada na web ao longo do tempo. - -No processo de acesso à informação os usuários podem definir o intervalo temporal das datas de arquivo das páginas a serem pesquisadas, através da especificação das datas pretendidas nos parâmetros de pesquisa da API `from` e `to`. Estas devem seguir o formato: ano, mês, dia, hora, minuto e segundo (aaaammddhhmmss). Por exemplo, a data 9 de março de 1996 seria representada por: -* 19960309000000 - -O seguinte código executa uma pesquisa por "Jorge Sampaio" de páginas arquivadas entre março de 1996 e março de 2006, período durante o qual este foi Presidente da República Portuguesa. - -```python -query = "jorge sampaio" -maxItems = 5 -fromDate = 19960309000000 -toDate = 20060309000000 -payload = {'q': query,'maxItems': maxItems, 'from': fromDate, 'to': toDate} -r = requests.get('http://arquivo.pt/textsearch', params=payload) -print("GET",r.url) -print("\n") - -contentsJSon = r.json() -for item in contentsJSon["response_items"]: - title = item["title"] - url = item["linkToArchive"] - time = item["tstamp"] - - print(title) - print(url) - print(time) - - page = requests.get(item["linkToExtractedText"]) - - # Note a existencia de decode, para garantirmos que o conteudo devolvido pelo Arquivo.pt (no formato ISO-8859-1) e impresso no formato (UTF-8) - content = page.content.decode('utf-8') - print(content) - print("\n") -``` - -### Restringir a pesquisa a um determinado website - -Se os usuários apenas tiverem interesse na informação histórica publicada por um determinado website, podem restringir a pesquisa através da especificação no parâmetro de pesquisa da API `siteSearch`. O seguinte código executa uma pesquisa por "Jorge Sampaio" de páginas arquivadas apenas a partir do website com o domínio "www.presidenciarepublica.pt", compreendidas entre março de 1996 e março de 2006, e apresenta os resultados obtidos. - - -```python -query = "jorge sampaio" -maxItems = 5 -fromDate = 19960309000000 -toDate = 20060309000000 -siteSearch = "www.presidenciarepublica.pt" -payload = {'q': query,'maxItems': maxItems, 'from': fromDate, 'to': toDate, 'siteSearch': siteSearch} -r = requests.get('http://arquivo.pt/textsearch', params=payload) -print("GET",r.url) -print("\n") - -contentsJSon = r.json() -for item in contentsJSon["response_items"]: - title = item["title"] - url = item["linkToArchive"] - time = item["tstamp"] - - print(title) - print(url) - print(time) - - page = requests.get(item["linkToExtractedText"]) - - # Note a existencia de decode, para garantirmos que o conteudo devolvido pelo Arquivo.pt (no formato ISO-8859-1) e impresso no formato (UTF-8) - content = page.content.decode('utf-8') - print(content) - print("\n") -``` - -### Restringir a pesquisa a um determinado tipo de ficheiro - -Além de páginas da web, o Arquivo.pt também preserva outros formatos de ficheiro vulgarmente publicados online, como por exemplo documentos do tipo PDF. Os usuários podem definir o tipo de ficheiro sobre o qual a pesquisa deverá incidir através da especificação no parâmetro de pesquisa `type` da API. - -O seguinte código executa uma pesquisa por "Jorge Sampaio": -* Sobre ficheiros do tipo PDF -* Arquivados apenas a partir do website com o domínio "www.presidenciarepublica.pt" -* Entre março de 1996 e março de 2006 - -E apresenta os resultados obtidos. Quando o usuário abrir o endereço do conteúdo arquivado fornecido pelo campo de resposta `linkToArchive` terá acesso ao ficheiro PDF. - -```python -query = "jorge sampaio" -maxItems = 5 -fromDate = 19960309000000 -toDate = 20060309000000 -siteSearch = "www.presidenciarepublica.pt" -fileType = "PDF" -payload = {'q': query,'maxItems': maxItems, 'from': fromDate, 'to': toDate, 'siteSearch': siteSearch, 'type': fileType} -r = requests.get('http://arquivo.pt/textsearch', params=payload) -print("GET",r.url) -print("\n") - -contentsJSon = r.json() -for item in contentsJSon["response_items"]: - title = item["title"] - url = item["linkToArchive"] - time = item["tstamp"] - - print(title) - print(url) - print(time) -``` - -# *Conta-me Histórias* - -O projeto *Conta-me Histórias* é desenvolvido por pesquisadores do Laboratório de Inteligência Artificial e Apoio a Decisão ([LIAAD](https://perma.cc/B5U2-R74J)) — [INESCTEC](https://perma.cc/4XN7-A6TR)) e afiliados às instituições [Instituto Politécnico de Tomar](https://perma.cc/7PDB-NRAL) — [Centro de Investigação em Cidades Inteligentes (CI2)](https://perma.cc/M3CE-HQ6U), [Universidade do Porto](https://perma.cc/MGZ3-S9AQ) e [Universidade de Innsbruck](https://perma.cc/THE2-KA3L) (em inglês). O projeto visa oferecer aos usuários a possibilidade de revisitarem tópicos do passado através de uma interface semelhante ao Google que, dada uma pesquisa, devolve uma sumarização temporal das notícias mais relevantes preservadas pelo Arquivo.pt acerca desse tópico. Um vídeo promocional do projeto pode ser visualizado [aqui](https://www.youtube.com/watch?v=fcPOsBCwyu8). - -## Contributos - -Nos últimos anos, o crescente aumento na disponibilização de conteúdos online tem colocado novos desafios àqueles que pretendem entender a estória de um dado evento. Mais recentemente, fenómenos como o [media bias](https://perma.cc/MH2W-5WL4) (em português, viés mediático), as [fake news](https://perma.cc/945E-WVDK) (em português, notícias falsas) e as [filter bubbles](https://perma.cc/7M7E-S5CD) (link em inglês. Em português, filtro de bolha), vieram adensar ainda mais as dificuldades já existentes no acesso transparente à informação. O *Conta-me Histórias* surge, neste contexto, como um importante contributo para todos aqueles que pretendem ter acesso rápido a uma visão histórica de um dado evento, criando automaticamente narrativas resumidas a partir de um elevado volume de dados coletados no passado. A sua disponibilização em 2018, é um importante contributo para que estudantes, jornalistas, políticos, pesquisadores, etc, possam gerar conhecimento e verificar factos de uma forma rápida, a partir da consulta de *timelines* automaticamente geradas, mas também pelo recurso à consulta de páginas web tipicamente inexistentes na web mais convencional, a web do presente. - -## Onde posso encontrar o *Conta-me Histórias*? - -O projeto *Conta-me Histórias* encontra-se disponível, desde 2018, a partir dos seguintes endereços: -- Página web (versão PT): [https://contamehistorias.pt](https://contamehistorias.pt) -- Biblioteca Python: [https://github.com/LIAAD/TemporalSummarizationFramework](https://perma.cc/J7BB-28YX) (em inglês) - -Outros endereços de relevância: -- *Conta-me Histórias front-end*: [https://github.com/LIAAD/contamehistorias-ui](https://perma.cc/J7BB-28YX) (em inglês) -- *Conta-me Histórias back-end*: [https://github.com/LIAAD/contamehistorias-api](https://perma.cc/Q3MH-3T4J) (em inglês) - -Mais recentemente, em setembro de 2021, o Arquivo.pt passou a disponibilizar a funcionalidade "Narrativa", através de um botão adicional na sua interface que redireciona os usuários para o website do *Conta-me Histórias*, para que a partir deste possam criar automaticamente narrativas temporais sobre qualquer tema. A funcionalidade "Narrativa" resulta da colaboração entre a equipa do *Conta-me Histórias*, vencedora do [Prémio Arquivo.pt 2018](https://perma.cc/8F6F-KZFP), e a equipa do Arquivo.pt. - -## Como Funciona? - -Quando um usuário insere um conjunto de palavras acerca de um tema na caixa de pesquisa do Arquivo.pt e clica no botão "Narrativa", é direcionado para o serviço *Conta-me Histórias* que, por sua vez, analisa automaticamente as notícias de 26 websites arquivados pelo Arquivo.pt ao longo do tempo e apresenta-lhe uma cronologia de notícias relacionadas com o tema pesquisado. - -Por exemplo, se pesquisarmos por "Jorge Sampaio" e carregarmos no botão "Narrativa", - -{% include figure.html filename="sumarizacao-narrativas-web-python-1.jpeg" alt="Pesquisa por Jorge Sampaio através do componente narrativa do Arquivo.pt" caption="Figura 1: Pesquisa por 'Jorge Sampaio' através da componente narrativa do Arquivo.pt." %} - -seremos direcionados para o *Conta-me Histórias*, onde obteremos, automaticamente, uma narrativa de notícias arquivadas. Na figura seguinte é possível observar a linha de tempo e o conjunto de notícias relevantes no período compreendido entre 2016-04-07 e 2016-11-17. O último período temporal é referente ao ano de 2019. - -{% include figure.html filename="sumarizacao-narrativas-web-python-2.jpeg" alt="Resultados da pesquisa por Jorge Sampaio no Conta-me Histórias para o periodo compreendido entre 07/04/2016 e 17/11/2016" caption="Figura 2: Resultados da pesquisa por 'Jorge Sampaio' no *Conta-me Histórias* para o periodo compreendido entre 2016-04-07 e 2016-11-17." %} - -Para a seleção das notícias mais relevantes recorremos ao [YAKE!](http://yake.inesctec.pt) (em inglês), um extrator de palavras relevantes (desenvolvido pela nossa equipa de pesquisa) e que, neste contexto, é utilizado para selecionar os excertos mais importantes de uma notícia (mais concretamente os seus títulos) ao longo do tempo. - -Um aspeto interessante da aplicação é o facto desta facilitar o acesso à página web arquivada que dá nome ao título selecionado como relevante. Por exemplo, ao clicar em cima do título "Jorge Sampaio formaliza apoio a Sampaio da Nóvoa" o usuário poderá visualizar a seguinte página web: - -{% include figure.html filename="sumarizacao-narrativas-web-python-3.jpeg" alt="Jorge Sampaio formaliza apoio a Sampaio da Nóvoa" caption="Figura 3: Jorge Sampaio formaliza apoio a Sampaio da Nóvoa." %} - -Paralelamente, poderá ter acesso a um conjunto de "termos relacionados" com o tópico de pesquisa. Na figura abaixo é possível observar, entre outros, a referência aos antigos presidentes da República Mário Soares e Cavaco Silva, bem como aos ex-primeiro-ministros Santana Lopes e Durão Barroso. - -{% include figure.html filename="sumarizacao-narrativas-web-python-4.jpeg" alt="Nuvem de palavras com os termos relacionados com a pesquisa Jorge Sampaio ao longo de 10 anos" caption="Figura 4: Nuvem de palavras com os termos relacionados com a pesquisa por 'Jorge Sampaio' ao longo de 10 anos." %} - -O *Conta-me Histórias* pesquisa, analisa e agrega milhares de resultados para gerar cada narrativa acerca de um tema. Recomenda-se a escolha de palavras descritivas sobre temas bem definidos, personalidades ou eventos para obter boas narrativas. No seção seguinte descrevemos a forma como, através da biblioteca Python, os usuários podem interagir e fazer uso dos dados do *Conta-me Histórias*. - -## Instalação - -Para a instalação da [biblioteca Conta-me Histórias](https://perma.cc/4ZXT-9FB5) (em inglês) necessita de ter o [git](https://perma.cc/6BK8-XZKR) (em inglês) instalado. Após a sua instalação proceda à execução do seguinte código: - -```python -!pip install -U git+https://github.com/LIAAD/TemporalSummarizationFramework -``` - -## Utilização - -### Definição dos parâmetros de pesquisa - -No próximo código o usuário é convidado a definir o conjunto de parâmetros de pesquisa. A variável `domains` lista o conjunto de 24 websites objeto de pesquisa. Um aspeto interessante desta variável é a possibilidade do usuário definir a sua própria lista de fontes noticiosas. Um exercício interessante passa por definir um conjunto de meios de comunicação de âmbito mais regional, por oposição aos meios de comunicação nacionais ali listados. - -Os parâmetros `from` e `to` permitem estabelecer o espectro temporal da pesquisa. Finalmente, na variável `query` o usuário é convidado a definir o tema da pesquisa (e.g., "Jorge Sampaio") para o qual pretende construir uma narrativa temporal. Uma vez executado o código o sistema inicia o processo de pesquisa junto do Arquivo.pt. Para tal, recorre à utilização da [Arquivo.pt API (Full-text & URL search)](https://perma.cc/6ADS-LPLC) (em inglês). - -```python -from contamehistorias.datasources.webarchive import ArquivoPT -from datetime import datetime - -# Especifica o website e o ambito temporal para restringir a pesquisa -domains = [ 'http://publico.pt/', 'http://www.dn.pt/', 'http://dnoticias.pt/', 'http://www.rtp.pt/', 'http://www.cmjornal.pt/', 'http://www.iol.pt/', 'http://www.tvi24.iol.pt/', 'http://noticias.sapo.pt/', 'http://www.sapo.pt/', 'http://expresso.sapo.pt/', 'http://sol.sapo.pt/', 'http://www.jornaldenegocios.pt/', 'http://abola.pt/', 'http://www.jn.pt/', 'http://sicnoticias.sapo.pt/', 'http://www.lux.iol.pt/', 'http://www.ionline.pt/', 'http://news.google.pt/', 'http://www.dinheirovivo.pt/', 'http://www.aeiou.pt/', 'http://www.tsf.pt/', 'http://meiosepublicidade.pt/', 'http://www.sabado.pt/', 'http://economico.sapo.pt/'] - -params = { 'domains':domains, 'from':datetime(year=2011, month=1, day=1), 'to':datetime(year=2021, month=12, day=31) } - -query = 'Jorge Sampaio' - -apt = ArquivoPT() -search_result = apt.getResult(query=query, **params) -``` - -### Percorrer os resultados obtidos no Arquivo.pt - -O objeto `search_result` devolve o número total de resultados obtidos a partir da chamada à API do Arquivo.pt. O número total de resultados excede facilmente as 10.000 entradas, um volume de dados praticamente impossível de processar por qualquer usuário que, a partir dele, queira retirar conhecimento em tempo útil. - -```python -len(search_result) -``` -Para lá do número total de resultados o objeto `search_result` reúne informação extremamente útil para o passo seguinte do algoritmo, i.e., a seleção das notícias mais relevantes ao longo do tempo. Em concreto, este objeto permite ter acesso a: -* `datatime`: data de coleta do recurso -* `domain`: fonte noticiosa -* `headline`: título da notícia -* `url`: url original da notícia - -bastando para tal executar o seguinte código: - -```python -for x in search_result: - print(x.datetime) - print(x.domain) - print(x.headline) - print(x.url) - print() -``` - -### Determinação de datas importantes e seleção das *keywords*/títulos relevantes - -No próximo passo o sistema recorre ao algoritmo do *Conta-me Histórias* para criar um resumo das notícias mais importantes a partir do conjunto de documentos obtidos no Arquivo.pt. Cada bloco temporal determinado como relevante pelo sistema reúne um total de 20 notícias. Os vários blocos temporais determinados automaticamente pelo sistema oferecem ao usuário uma narrativa ao longo do tempo. - -```python -from contamehistorias import engine -language = "pt" - -cont = engine.TemporalSummarizationEngine() -summ_result = cont.build_intervals(search_result, language, query) - -cont.pprint(summ_result) -``` - -#### Estatísticas da pesquisa - -O código seguinte permite ter acesso a um conjunto de estatísticas globais, nomeadamente, ao número total de documentos, de domínios, bem como ao tempo total de execução do algoritmo. - -```python -print(f"Número total de documentos: {summ_result['stats']['n_docs']}") -print(f"Número total de domínios: {summ_result['stats']['n_domains']}") -print(f"Tempo total de execução: {summ_result['stats']['time']}") -``` - -### Obter a lista dos domínios dos resultados da pesquisa - -Para listar todos os domínios execute o seguinte código: - -```python -for domain in summ_result["domains"]: - print(domain) -``` - -### Resultados da pesquisa para a "Narrativa" - -Finalmente, o código seguinte recorre à variável `summ_result ["results"]` para apresentar os resultados gerados com a informação necessária à produção de uma *timeline*, nomeadamente, o período temporal de cada bloco de notícias, as notícias propriamente ditas (um conjunto de 20 notícias relevantes por bloco temporal), a data de coleta, a fonte noticiosa, o url (ligação à página web original) e o título completo da notícia. - -```python -for period in summ_result["results"]: - - print("--------------------------------") - print(period["from"],"until",period["to"]) - - # Cabecalhos selecionados - keyphrases = period["keyphrases"] - - for keyphrase in keyphrases: - print("headline = " + keyphrase.kw) - - # Fontes - for headline in keyphrase.headlines: - print("Date", headline.info.datetime) - print("Source", headline.info.domain) - print("Url", headline.info.url) - print("Headline completa = ", headline.info.headline) - - print() -``` - -# Conclusões - -A web é hoje considerada uma ferramenta essencial de comunicação. Neste contexto, os arquivos web surgem como um importante recurso de preservação dos conteúdos aí publicados. Embora o seu uso seja dominado por pesquisadores, historiadores ou jornalistas, o elevado volume de dados aí disponíveis sobre o nosso passado faz deste tipo de infraestrutura uma fonte de recursos de elevado valor e extrema utilidade para os usuários mais comuns. O acesso generalizado a este tipo de infraestrutura obriga, no entanto, à existência de outro tipo de ferramentas capazes de satisfazer as necessidades de informação do usuário, diminuindo, ao mesmo tempo, os constrangimentos associados à exploração de elevados volumes de dados por parte de usuários não especialistas. - -Neste tutorial, procurámos mostrar como criar automaticamente sumários temporais a partir de eventos coletados no passado, fazendo uso dos dados obtidos no Arquivo.pt e da aplicação da biblioteca de sumarização temporal *Conta-me Histórias*. O tutorial aqui apresentado é um primeiro passo na tentativa de mostrarmos aos interessados na temática uma forma simples de como qualquer usuário pode, utilizando conceitos minímos de programação, fazer uso de APIs e bibliotecas existentes para extrair conhecimento a partir de um elevado volume de dados num curto espaço de tempo. - -# Prémios - -O projeto *Conta-me Histórias* foi o vencedor do [Prémio Arquivo.pt 2018](https://sobre.arquivo.pt/pt/vencedores-premios-arquivo-pt/) e o vencedor da [Best Demo Presentation](https://ecir2019.org/workshops/) na [41st European Conference on Information Retrieval (ECIR-19)](http://ecir2019.org/) (em inglês). - -# Financiamento - -Ricardo Campos foi financiado por fundos nacionais através do Fundação para a Ciência e Tecnologia (FCT) e pela Fundação Portuguesa para Ciência e Tecnologia (I.P.) com o projeto StorySense (2022.09312.PTDC). - -# Bibliografia - -* Campos, R., Pasquali, A., Jatowt, A., Mangaravite, V., and Jorge, A.. "Automatic Generation of Timelines for Past-Web Events" In *The Past Web: Exploring Web Archives*, edited by D. Gomes, E. Demidova, J. Winters, and T. Risse, 225-242. Springer: 2021. [https://link.springer.com/chapter/10.1007/978-3-030-63291-5_18](https://perma.cc/F3SZ-5MVL) - -* Campos, R., Mangaravite, V., Pasquali, A., Jorge, A., Nunes, C., and Jatowt, A.. "YAKE! Keyword Extraction from Single Documents using Multiple Local Features". *Information Sciences Journal*, vol. 509 (2020): 257-289. [https://doi.org/10.1016/j.ins.2019.09.013](https://doi.org/10.1016/j.ins.2019.09.013) - -* Campos, R., Mangaravite, V., Pasquali, A., Jorge, A., Nunes, C., and Jatowt, A.. "A Text Feature Based Automatic Keyword Extraction Method for Single Documents" In *Advances in Information Retrieval. ECIR 2018 (Grenoble, France. March 26 ? 29). Lecture Notes in Computer Science*, edited by G. Pasi, B. Piwowarski, L. Azzopardi, and A. Hanbury, vol. 10772, 684-691. Springer: 2018. [https://link.springer.com/chapter/10.1007/978-3-319-76941-7_63](https://perma.cc/3V3W-X6MZ) - -* Pasquali, A., Mangaravite, V., Campos, R., Jorge, A., and Jatowt, A.."Interactive System for Automatically Generating Temporal Narratives" In -*Advances in Information Retrieval. ECIR'19 (Cologne, Germany. April 14-18). Lecture Notes in Computer Science*, edited by L. Azzopardi, B. Stein, N. Fuhr, P. Mayr, C. Hauff, and D. Hiemstra, vol. 11438, 251 - 255. Springer: 2019. [https://link.springer.com/chapter/10.1007/978-3-030-15719-7_34](https://perma.cc/MH6W-QQFD) - -* Gomes, D., Demidova, E., Winters, J., and Risse, T. (eds.), *The Past Web: Exploring Web Archives*. Springer, 2021. [https://arquivo.pt/book](https://arquivo.pt/book) [Pre-print](https://perma.cc/Q693-DLPA) - -* Gomes, D., and Costa M.. "The Importance of Web Archives for Humanities". *International Journal of Humanities and Arts Computing*, (April 2014). [http://sobre.arquivo.pt/wp-content/uploads/the-importance-of-web-archives-for-humanities.pdf](https://perma.cc/4WHP-Q534). - -* Alam,Sawood, Weigle, Michele C., Nelson, Michael L., Melo, Fernando, Bicho, Daniel, Gomes, Daniel. "MementoMap Framework for Flexible and Adaptive Web Archive Profiling" In *Proceedings of Joint Conference on Digital Libraries 2019*. Urbana-Champaign, Illinois, US: June 2019. [https://www.cs.odu.edu/~salam/drafts/mementomap-jcdl19-cameraready.pdf](https://perma.cc/7ES7-A7H7). - -* Costa, M.. "Information Search in Web Archives" PhD thesis, Universidade de Lisboa, December 2014. [http://sobre.arquivo.pt/wp-content/uploads/phd-thesis-information-search-in-web-archives.pdf](https://perma.cc/HU5S-M2XE) - -* Mourão, A., Gomes, D.. *The Anatomy of a Web Archive Image Search Engine. Technical Report*. Lisboa, Portugal: Arquivo.pt, dezembro 2021. [https://sobre.arquivo.pt/wp-content/uploads/The_Anatomy_of_a_Web_Archive_Image_Search_Engine_tech_report.pdf](https://perma.cc/2JF4-EF4T) +--- +title: "Sumarização de narrativas acerca de eventos do passado documentados na web utilizando Python: o caso do Arquivo.pt" +slug: sumarizacao-narrativas-web-python +collection: lessons +layout: lesson +date: 2023-04-29 +authors: +- Ricardo Campos +- Daniel Gomes +reviewers: +- Daniela Major +- Salete Farias +editors: +- Josir Cardoso Gomes +review-ticket: https://github.com/programminghistorian/ph-submissions/issues/420 +difficulty: 2 +activity: transforming +topics: [api, python, data-manipulation, web-archiving] +avatar_alt: Homem sentado ensinando várias crianças +abstract: Nesta lição aprenderá a criar automaticamente resumos de eventos do passado a partir de conteúdos históricos arquivados da web. Em particular, demonstraremos como obter resultados relevantes ao combinar o uso da API do Arquivo.pt com a utilização do *Conta-me Histórias* permitindo, desta forma, processar um elevado volume de dados num curto espaço de tempo. +lesson-partners: [Jisc, The National Archives] +partnership-url: /pt/jisc-tna-parceria +doi: 10.46430/phpt0037 +--- + +{% include toc.html %} + +# Introdução + +Ao longo dos séculos a comunicação evoluiu paralelamente à evolução do homem. Esta, que antes se fazia a partir de meios físicos, é hoje digital e tem presença online. A "culpa" é da web, que desde o final dos anos 90 do século passado, se tornou na principal fonte de informação e comunicação do século XXI. Porém, cerca de [80% da informação disponível na web desaparece ou é alterada no prazo de apenas 1 ano](https://dl.acm.org/doi/10.1145/1145581.1145623) (em inglês). Este facto origina a perda de informação fundamental para documentar os eventos da era digital. + +A mudança para um paradigma de comunicação baseado na internet obrigou a uma alteração profunda na forma como as informações publicadas são preservadas. Os arquivos da web assumem especial relevância, ao preservarem as informações publicadas online desde a década de 1990. + +Apesar dos avanços recentes na preservação de informações arquivadas a partir da web, o problema de explorar de forma eficiente o património histórico preservado por estes arquivos permanece por resolver devido às enormes quantidades de dados históricos arquivados ao longo do tempo e à inexistência de ferramentas que possam processar automaticamente esse volume de dados. Neste contexto, as *timelines* (sistemas automáticos de sumarização temporal) surgem como a solução ideal para a produção automática de resumos de eventos ao longo do tempo e para a análise das informações publicadas online que os documentam, como é o caso das notícias. + +Neste tutorial, pretendemos mostrar como explorar o [Arquivo.pt](https://arquivo.pt), o arquivo da web portuguesa, e como criar automaticamente resumos de eventos do passado a partir de conteúdos históricos arquivados da web. Mais concretamente, demonstraremos como obter resultados relevantes ao combinar o uso da [API (Interface de Programação de Aplicações)](https://perma.cc/6ASS-KZFW) do Arquivo.pt com a utilização do [*Conta-me Histórias*](https://contamehistorias.pt), um sistema que permite criar automaticamente narrativas temporais sobre qualquer tema objeto de notícia. Para a concretização desse objetivo disponibilizamos um Jupyter Notebook que os usuários poderão usar para interagir com ambas as ferramentas. + +Na primeira parte do tutorial, iremos apresentar sumariamente as funções de pesquisa e acesso disponibilizadas pelo Arquivo.pt. Demonstraremos como podem ser utilizadas de forma automática através da invocação dos métodos disponibilizados pela API do Arquivo.pt, recorrendo a exemplos simples e práticos. A pesquisa automática de palavras em páginas arquivadas ao longo do tempo é o serviço base para desenvolver rapidamente aplicações informáticas inovadoras, que permitem explorar e tirar maior partido da informação histórica preservada pelo Arquivo.pt, como é caso do projeto *Conta-me Histórias*. + +Na segunda parte, recorremos ao *Conta-me Histórias* para exemplificar o processo de sumarização temporal de um evento. Nesse sentido, demonstraremos a forma como os usuários podem obter informações históricas resumidas sobre um determinado tópico (por exemplo, sobre [Jorge Sampaio](https://perma.cc/AWX8-9CA3), presidente da República Portuguesa entre 1996 e 2006), que envolva notícias do passado preservadas pelo Arquivo.pt. Uma tal infraestrutura permite aos usuários ter acesso a um conjunto de informações históricas a partir de páginas web que, muito provavelmente, já não existirão naquela que convencionalmente designamos como a web atual. + +# Pré-requisitos + +A participação neste tutorial pressupõe conhecimentos básicos de programação (nomeadamente Python) bem como familiarização com a instalação de pacotes python (via [git](https://perma.cc/6BK8-XZKR) (em inglês)), com o [formato JSON](https://www.w3schools.com/js/js_json_intro.asp) (em inglês) e com o consumo de APIs. A execução do código pressupõe o recurso ao Jupyter Notebook. Para a instalação deste *software* recomendamos o tutorial [Introduction to Jupyter Notebooks](/en/lessons/jupyter-notebooks#installing-jupyter-notebooks) (em inglês) ou, em alternativa, o recurso ao [Google Colab](https://colab.research.google.com/). Este tutorial foi testado com a versão 3.6.5 do Python. + +# Objetivos de Aprendizagem + +No final deste tutorial os participantes devem estar aptos a: +- Extrair informação relevante a partir do Arquivo.pt fazendo uso da [Arquivo.pt API (Full-text & URL search)](https://github.com/arquivo/pwa-technologies/wiki/Arquivo.pt-API) (em inglês) +- Saber usar a biblioteca Python do [*Conta-me Histórias*](https://github.com/LIAAD/TemporalSummarizationFramework) (em inglês) no contexto da sumarização temporal automática de eventos a partir de elevados volumes de dados preservados no arquivo da web portuguesa + +# Arquivo.pt + +O [Arquivo.pt](https://www.arquivo.pt) é um serviço público e gratuito disponibilizado pela [Fundação para a Ciência e a Tecnologia I.P.](https://perma.cc/D3XA-5J78), que permite a qualquer pessoa pesquisar e aceder a informação histórica preservada da web desde os anos 90. Embora se foque na preservação de informação de interesse para a comunidade portuguesa, contém também páginas escritas em várias línguas de interesse para a comunidade internacional e cerca de metade dos seus usuários são oriundos de fora de Portugal. + +[Este vídeo](https://www.youtube.com/embed/EnSys0HDnCc) introduz brevemente o Arquivo.pt. + +## Contributos + +O Arquivo.pt contém milhares de milhões de ficheiros recolhidos ao longo do tempo a partir de websites em várias línguas que documentam eventos nacionais e internacionais. Os serviços de pesquisa que fornece incluem a pesquisa de texto integral, a pesquisa de imagens, a listagem do histórico de versões, a pesquisa avançada e [APIs](https://arquivo.pt/api), que facilitam o desenvolvimento por terceiros de aplicações de valor acrescentado. + +Ao longo dos anos, o Arquivo.pt tem sido utilizado como recurso para suportar trabalhos de pesquisa em áreas como as Humanidades ou as Ciências Sociais. Desde 2018, o [Prémio Arquivo.pt](https://perma.cc/8F6F-KZFP) distingue anualmente trabalhos inovadores baseados na informação histórica preservada pelo Arquivo.pt. Os pesquisadores e cidadãos têm vindo a ser sensibilizados para a importância da preservação da informação publicada na web através da realização de sessões de formação gratuitas, por exemplo, sobre a [utilização das APIs disponibilizadas pelo Arquivo.pt](https://sobre.arquivo.pt/pt/ajuda/formacao/modulo-c/). + +Todo o *software* desenvolvido está disponível como [projetos de código-aberto gratuitos](https://github.com/arquivo/) (em inglês) e, desde 2008, tem sido documentado através de [artigos técnicos e científicos](https://arquivo.pt/publica). No decorrer das suas atividades, o Arquivo.pt gera dados que podem ser úteis para suportar novos trabalhos de pesquisa, como por exemplo a lista de Páginas do Governo de Portugal nas redes sociais ou de websites de partidos políticos. Estes [dados estão disponíveis em acesso aberto](https://arquivo.pt/dadosabertos). + +[Este vídeo](https://www.youtube.com/embed/CZ6R4Zydg0Q) detalha os serviços públicos disponibilizados pelo Arquivo.pt. Pode também aceder diretamente aos [slides da apresentação](https://perma.cc/854E-9XEV). Para saber mais detalhes acerca dos serviços disponibilizados pelo Arquivo.pt consulte: +* [Módulo A: Arquivo.pt: uma nova ferramenta para pesquisar o passado (módulo A)](https://sobre.arquivo.pt/pt/ajuda/formacao/modulo-a/) do programa de "Formação acerca de preservação da Web" do Arquivo.pt. + +## Onde posso encontrar o Arquivo.pt? + +O serviço Arquivo.pt encontra-se disponível a partir dos seguintes apontadores: +* [Interfaces de usuário em português e inglês para aceder aos serviços de pesquisa de páginas, imagens e histórico de versões](https://www.arquivo.pt) +* [Website informativo acerca do Arquivo.pt](https://sobre.arquivo.pt) +* [Documentação acerca das APIs do Arquivo.pt](https://perma.cc/FV3U-ZEL9) (em inglês) + +## Como funciona a pesquisa automática via API? + +Periodicamente, o Arquivo.pt recolhe e armazena automaticamente a informação publicada na web. A infraestrutura de *hardware* do Arquivo.pt está alojada no seu próprio centro de dados e é gerida por pessoal a ela dedicado a tempo inteiro. + +O fluxo de trabalho de preservação é realizado através de um [sistema de informação distribuído de grande escala](https://perma.cc/A3Z7-E358). A informação web armazenada é processada automaticamente para realizar atividades de pesquisa sobre [grandes volumes de dados](https://perma.cc/9FMH-DUY8) (em inglês, *big data*), através de uma plataforma de processamento distribuído para dados não estruturados ([Hadoop](https://perma.cc/B5PH-9B4V)). Tal permite, por exemplo, a deteção automática de *spam* na web ou avaliar a acessibilidade web para pessoas com deficiências. + +Os serviços de pesquisa e acesso via APIs permitem que os pesquisadores tirem partido desta infraestrutura de processamento e dos dados históricos preservados sem terem de endereçar a complexidade do sistema que suporta o Arquivo.pt. [Este vídeo](https://www.youtube.com/embed/PPuauEwIwPE) apresenta a [Arquivo.pt API (Full-text & URL search)](https://perma.cc/6ADS-LPLC) (em inglês). Pode também aceder diretamente aos [slides da apresentação](https://perma.cc/RMS4-UD76). + +Neste tutorial iremos abordar apenas a utilização da API Full-text & URL Search do Arquivo.pt. Porém, este disponibiliza também outras APIs: +* [Image Search API v1.1 (beta version)](https://perma.cc/U682-VNKD) (em inglês) +* [CDX-server API (URL search): international standard](https://perma.cc/9M6Y-A4BW) (em inglês) +* [Memento API (URL search): international standard](https://perma.cc/BF5E-32LR) (em inglês) + +Para saber detalhes acerca de [todas as APIs disponibilizadas pelo Arquivo.pt](https://perma.cc/FV3U-ZEL9) (em inglês) consulte os conteúdos de formação disponíveis em: +* [Módulo C: Acesso e processamento automático de informação preservada da Web através de APIs](https://sobre.arquivo.pt/pt/ajuda/formacao/modulo-c/) do programa de "Formação acerca de preservação da Web" do Arquivo.pt. + +## Utilização + +Em seguida, apresentaremos exemplos de como utilizar a [Arquivo.pt API (Full-text & URL search)](https://github.com/arquivo/pwa-technologies/wiki/Arquivo.pt-API) (em inglês) para pesquisar, de forma automática, páginas da web arquivadas entre determinados intervalos de tempo. Como exemplo, executaremos pesquisas acerca de "[Jorge Sampaio](https://pt.wikipedia.org/wiki/Jorge_Sampaio)"(1939-2021), antigo Presidente da Câmara Municipal de Lisboa (1990-1995) e antigo Presidente da República Portuguesa (1996-2006). + +### Definição dos parâmetros de pesquisa + +O parâmetro *query* define a(s) palavra(s) a pesquisar: `Jorge Sampaio`. + +Para facilitar a leitura dos resultados de pesquisa obtidos iremos limitá-los a um máximo de 5 através do parâmetro `maxItems`. + +A totalidade dos parâmetros de pesquisa disponíveis estão definidos na secção [*Request Parameters* da documentação da API do Arquivo.pt](https://perma.cc/2DMP-3XQC) (link em inglês. Em português, parâmetros requeridos). + +```python +import requests +query = "jorge sampaio" +maxItems = 5 +payload = {'q': query,'maxItems': maxItems} +r = requests.get('http://arquivo.pt/textsearch', params=payload) +print("GET",r.url) +``` + +### Percorrer os resultados obtidos no Arquivo.pt + +O seguinte código mostra os resultados de pesquisa obtidos no seu formato original (JSON): + +```python +import pprint +contentsJSon = r.json() +pprint.pprint(contentsJSon) +``` + +### Sumário dos resultados obtidos + +É possível extrair, para cada resultado, a seguinte informação: +* Título (campo `title`) +* Endereço para o conteúdo arquivado (campo `linkToArchive`) +* Data de arquivo (campo `tstamp`) +* Texto extraído da página (campo `linkToExtractedText`) + +Todos os campos obtidos como resposta a pesquisas disponíveis estão definidos na secção [*Response fields* da documentação da API do Arquivo.pt](https://perma.cc/VK9Z-EC83) (link em inglês. Em português, campos de resposta). + +```python +for item in contentsJSon["response_items"]: + title = item["title"] + url = item["linkToArchive"] + time = item["tstamp"] + + print(title) + print(url) + print(time) + + page = requests.get(item["linkToExtractedText"]) + + # Note a existencia de decode, para garantirmos que o conteudo devolvido pelo Arquivo.pt (no formato ISO-8859-1) e impresso no formato (UTF-8) + content = page.content.decode('utf-8') + print(content) + print("\n") +``` + +### Definir o intervalo temporal da pesquisa + +Uma das mais-valias do Arquivo.pt é fornecer o acesso a informação histórica publicada na web ao longo do tempo. + +No processo de acesso à informação os usuários podem definir o intervalo temporal das datas de arquivo das páginas a serem pesquisadas, através da especificação das datas pretendidas nos parâmetros de pesquisa da API `from` e `to`. Estas devem seguir o formato: ano, mês, dia, hora, minuto e segundo (aaaammddhhmmss). Por exemplo, a data 9 de março de 1996 seria representada por: +* 19960309000000 + +O seguinte código executa uma pesquisa por "Jorge Sampaio" de páginas arquivadas entre março de 1996 e março de 2006, período durante o qual este foi Presidente da República Portuguesa. + +```python +query = "jorge sampaio" +maxItems = 5 +fromDate = 19960309000000 +toDate = 20060309000000 +payload = {'q': query,'maxItems': maxItems, 'from': fromDate, 'to': toDate} +r = requests.get('http://arquivo.pt/textsearch', params=payload) +print("GET",r.url) +print("\n") + +contentsJSon = r.json() +for item in contentsJSon["response_items"]: + title = item["title"] + url = item["linkToArchive"] + time = item["tstamp"] + + print(title) + print(url) + print(time) + + page = requests.get(item["linkToExtractedText"]) + + # Note a existencia de decode, para garantirmos que o conteudo devolvido pelo Arquivo.pt (no formato ISO-8859-1) e impresso no formato (UTF-8) + content = page.content.decode('utf-8') + print(content) + print("\n") +``` + +### Restringir a pesquisa a um determinado website + +Se os usuários apenas tiverem interesse na informação histórica publicada por um determinado website, podem restringir a pesquisa através da especificação no parâmetro de pesquisa da API `siteSearch`. O seguinte código executa uma pesquisa por "Jorge Sampaio" de páginas arquivadas apenas a partir do website com o domínio "www.presidenciarepublica.pt", compreendidas entre março de 1996 e março de 2006, e apresenta os resultados obtidos. + + +```python +query = "jorge sampaio" +maxItems = 5 +fromDate = 19960309000000 +toDate = 20060309000000 +siteSearch = "www.presidenciarepublica.pt" +payload = {'q': query,'maxItems': maxItems, 'from': fromDate, 'to': toDate, 'siteSearch': siteSearch} +r = requests.get('http://arquivo.pt/textsearch', params=payload) +print("GET",r.url) +print("\n") + +contentsJSon = r.json() +for item in contentsJSon["response_items"]: + title = item["title"] + url = item["linkToArchive"] + time = item["tstamp"] + + print(title) + print(url) + print(time) + + page = requests.get(item["linkToExtractedText"]) + + # Note a existencia de decode, para garantirmos que o conteudo devolvido pelo Arquivo.pt (no formato ISO-8859-1) e impresso no formato (UTF-8) + content = page.content.decode('utf-8') + print(content) + print("\n") +``` + +### Restringir a pesquisa a um determinado tipo de ficheiro + +Além de páginas da web, o Arquivo.pt também preserva outros formatos de ficheiro vulgarmente publicados online, como por exemplo documentos do tipo PDF. Os usuários podem definir o tipo de ficheiro sobre o qual a pesquisa deverá incidir através da especificação no parâmetro de pesquisa `type` da API. + +O seguinte código executa uma pesquisa por "Jorge Sampaio": +* Sobre ficheiros do tipo PDF +* Arquivados apenas a partir do website com o domínio "www.presidenciarepublica.pt" +* Entre março de 1996 e março de 2006 + +E apresenta os resultados obtidos. Quando o usuário abrir o endereço do conteúdo arquivado fornecido pelo campo de resposta `linkToArchive` terá acesso ao ficheiro PDF. + +```python +query = "jorge sampaio" +maxItems = 5 +fromDate = 19960309000000 +toDate = 20060309000000 +siteSearch = "www.presidenciarepublica.pt" +fileType = "PDF" +payload = {'q': query,'maxItems': maxItems, 'from': fromDate, 'to': toDate, 'siteSearch': siteSearch, 'type': fileType} +r = requests.get('http://arquivo.pt/textsearch', params=payload) +print("GET",r.url) +print("\n") + +contentsJSon = r.json() +for item in contentsJSon["response_items"]: + title = item["title"] + url = item["linkToArchive"] + time = item["tstamp"] + + print(title) + print(url) + print(time) +``` + +# *Conta-me Histórias* + +O projeto *Conta-me Histórias* é desenvolvido por pesquisadores do Laboratório de Inteligência Artificial e Apoio a Decisão ([LIAAD](https://perma.cc/B5U2-R74J)) — [INESCTEC](https://perma.cc/4XN7-A6TR)) e afiliados às instituições [Instituto Politécnico de Tomar](https://perma.cc/7PDB-NRAL) — [Centro de Investigação em Cidades Inteligentes (CI2)](https://perma.cc/M3CE-HQ6U), [Universidade do Porto](https://perma.cc/MGZ3-S9AQ) e [Universidade de Innsbruck](https://perma.cc/THE2-KA3L) (em inglês). O projeto visa oferecer aos usuários a possibilidade de revisitarem tópicos do passado através de uma interface semelhante ao Google que, dada uma pesquisa, devolve uma sumarização temporal das notícias mais relevantes preservadas pelo Arquivo.pt acerca desse tópico. Um vídeo promocional do projeto pode ser visualizado [aqui](https://www.youtube.com/watch?v=fcPOsBCwyu8). + +## Contributos + +Nos últimos anos, o crescente aumento na disponibilização de conteúdos online tem colocado novos desafios àqueles que pretendem entender a estória de um dado evento. Mais recentemente, fenómenos como o [media bias](https://perma.cc/MH2W-5WL4) (em português, viés mediático), as [fake news](https://perma.cc/945E-WVDK) (em português, notícias falsas) e as [filter bubbles](https://perma.cc/7M7E-S5CD) (link em inglês. Em português, filtro de bolha), vieram adensar ainda mais as dificuldades já existentes no acesso transparente à informação. O *Conta-me Histórias* surge, neste contexto, como um importante contributo para todos aqueles que pretendem ter acesso rápido a uma visão histórica de um dado evento, criando automaticamente narrativas resumidas a partir de um elevado volume de dados coletados no passado. A sua disponibilização em 2018, é um importante contributo para que estudantes, jornalistas, políticos, pesquisadores, etc, possam gerar conhecimento e verificar factos de uma forma rápida, a partir da consulta de *timelines* automaticamente geradas, mas também pelo recurso à consulta de páginas web tipicamente inexistentes na web mais convencional, a web do presente. + +## Onde posso encontrar o *Conta-me Histórias*? + +O projeto *Conta-me Histórias* encontra-se disponível, desde 2018, a partir dos seguintes endereços: +- Página web (versão PT): [https://contamehistorias.pt](https://contamehistorias.pt) +- Biblioteca Python: [https://github.com/LIAAD/TemporalSummarizationFramework](https://perma.cc/J7BB-28YX) (em inglês) + +Outros endereços de relevância: +- *Conta-me Histórias front-end*: [https://github.com/LIAAD/contamehistorias-ui](https://perma.cc/J7BB-28YX) (em inglês) +- *Conta-me Histórias back-end*: [https://github.com/LIAAD/contamehistorias-api](https://perma.cc/Q3MH-3T4J) (em inglês) + +Mais recentemente, em setembro de 2021, o Arquivo.pt passou a disponibilizar a funcionalidade "Narrativa", através de um botão adicional na sua interface que redireciona os usuários para o website do *Conta-me Histórias*, para que a partir deste possam criar automaticamente narrativas temporais sobre qualquer tema. A funcionalidade "Narrativa" resulta da colaboração entre a equipa do *Conta-me Histórias*, vencedora do [Prémio Arquivo.pt 2018](https://perma.cc/8F6F-KZFP), e a equipa do Arquivo.pt. + +## Como Funciona? + +Quando um usuário insere um conjunto de palavras acerca de um tema na caixa de pesquisa do Arquivo.pt e clica no botão "Narrativa", é direcionado para o serviço *Conta-me Histórias* que, por sua vez, analisa automaticamente as notícias de 26 websites arquivados pelo Arquivo.pt ao longo do tempo e apresenta-lhe uma cronologia de notícias relacionadas com o tema pesquisado. + +Por exemplo, se pesquisarmos por "Jorge Sampaio" e carregarmos no botão "Narrativa", + +{% include figure.html filename="sumarizacao-narrativas-web-python-1.jpeg" alt="Pesquisa por Jorge Sampaio através do componente narrativa do Arquivo.pt" caption="Figura 1: Pesquisa por 'Jorge Sampaio' através da componente narrativa do Arquivo.pt." %} + +seremos direcionados para o *Conta-me Histórias*, onde obteremos, automaticamente, uma narrativa de notícias arquivadas. Na figura seguinte é possível observar a linha de tempo e o conjunto de notícias relevantes no período compreendido entre 2016-04-07 e 2016-11-17. O último período temporal é referente ao ano de 2019. + +{% include figure.html filename="sumarizacao-narrativas-web-python-2.jpeg" alt="Resultados da pesquisa por Jorge Sampaio no Conta-me Histórias para o periodo compreendido entre 07/04/2016 e 17/11/2016" caption="Figura 2: Resultados da pesquisa por 'Jorge Sampaio' no *Conta-me Histórias* para o periodo compreendido entre 2016-04-07 e 2016-11-17." %} + +Para a seleção das notícias mais relevantes recorremos ao [YAKE!](https://yake.inesctec.pt) (em inglês), um extrator de palavras relevantes (desenvolvido pela nossa equipa de pesquisa) e que, neste contexto, é utilizado para selecionar os excertos mais importantes de uma notícia (mais concretamente os seus títulos) ao longo do tempo. + +Um aspeto interessante da aplicação é o facto desta facilitar o acesso à página web arquivada que dá nome ao título selecionado como relevante. Por exemplo, ao clicar em cima do título "Jorge Sampaio formaliza apoio a Sampaio da Nóvoa" o usuário poderá visualizar a seguinte página web: + +{% include figure.html filename="sumarizacao-narrativas-web-python-3.jpeg" alt="Jorge Sampaio formaliza apoio a Sampaio da Nóvoa" caption="Figura 3: Jorge Sampaio formaliza apoio a Sampaio da Nóvoa." %} + +Paralelamente, poderá ter acesso a um conjunto de "termos relacionados" com o tópico de pesquisa. Na figura abaixo é possível observar, entre outros, a referência aos antigos presidentes da República Mário Soares e Cavaco Silva, bem como aos ex-primeiro-ministros Santana Lopes e Durão Barroso. + +{% include figure.html filename="sumarizacao-narrativas-web-python-4.jpeg" alt="Nuvem de palavras com os termos relacionados com a pesquisa Jorge Sampaio ao longo de 10 anos" caption="Figura 4: Nuvem de palavras com os termos relacionados com a pesquisa por 'Jorge Sampaio' ao longo de 10 anos." %} + +O *Conta-me Histórias* pesquisa, analisa e agrega milhares de resultados para gerar cada narrativa acerca de um tema. Recomenda-se a escolha de palavras descritivas sobre temas bem definidos, personalidades ou eventos para obter boas narrativas. No seção seguinte descrevemos a forma como, através da biblioteca Python, os usuários podem interagir e fazer uso dos dados do *Conta-me Histórias*. + +## Instalação + +Para a instalação da [biblioteca Conta-me Histórias](https://perma.cc/4ZXT-9FB5) (em inglês) necessita de ter o [git](https://perma.cc/6BK8-XZKR) (em inglês) instalado. Após a sua instalação proceda à execução do seguinte código: + +```python +!pip install -U git+https://github.com/LIAAD/TemporalSummarizationFramework +``` + +## Utilização + +### Definição dos parâmetros de pesquisa + +No próximo código o usuário é convidado a definir o conjunto de parâmetros de pesquisa. A variável `domains` lista o conjunto de 24 websites objeto de pesquisa. Um aspeto interessante desta variável é a possibilidade do usuário definir a sua própria lista de fontes noticiosas. Um exercício interessante passa por definir um conjunto de meios de comunicação de âmbito mais regional, por oposição aos meios de comunicação nacionais ali listados. + +Os parâmetros `from` e `to` permitem estabelecer o espectro temporal da pesquisa. Finalmente, na variável `query` o usuário é convidado a definir o tema da pesquisa (e.g., "Jorge Sampaio") para o qual pretende construir uma narrativa temporal. Uma vez executado o código o sistema inicia o processo de pesquisa junto do Arquivo.pt. Para tal, recorre à utilização da [Arquivo.pt API (Full-text & URL search)](https://perma.cc/6ADS-LPLC) (em inglês). + +```python +from contamehistorias.datasources.webarchive import ArquivoPT +from datetime import datetime + +# Especifica o website e o ambito temporal para restringir a pesquisa +domains = [ 'http://publico.pt/', 'http://www.dn.pt/', 'http://dnoticias.pt/', 'http://www.rtp.pt/', 'http://www.cmjornal.pt/', 'http://www.iol.pt/', 'http://www.tvi24.iol.pt/', 'http://noticias.sapo.pt/', 'http://www.sapo.pt/', 'http://expresso.sapo.pt/', 'http://sol.sapo.pt/', 'http://www.jornaldenegocios.pt/', 'http://abola.pt/', 'http://www.jn.pt/', 'http://sicnoticias.sapo.pt/', 'http://www.lux.iol.pt/', 'http://www.ionline.pt/', 'http://news.google.pt/', 'http://www.dinheirovivo.pt/', 'http://www.aeiou.pt/', 'http://www.tsf.pt/', 'http://meiosepublicidade.pt/', 'http://www.sabado.pt/', 'http://economico.sapo.pt/'] + +params = { 'domains':domains, 'from':datetime(year=2011, month=1, day=1), 'to':datetime(year=2021, month=12, day=31) } + +query = 'Jorge Sampaio' + +apt = ArquivoPT() +search_result = apt.getResult(query=query, **params) +``` + +### Percorrer os resultados obtidos no Arquivo.pt + +O objeto `search_result` devolve o número total de resultados obtidos a partir da chamada à API do Arquivo.pt. O número total de resultados excede facilmente as 10.000 entradas, um volume de dados praticamente impossível de processar por qualquer usuário que, a partir dele, queira retirar conhecimento em tempo útil. + +```python +len(search_result) +``` +Para lá do número total de resultados o objeto `search_result` reúne informação extremamente útil para o passo seguinte do algoritmo, i.e., a seleção das notícias mais relevantes ao longo do tempo. Em concreto, este objeto permite ter acesso a: +* `datatime`: data de coleta do recurso +* `domain`: fonte noticiosa +* `headline`: título da notícia +* `url`: url original da notícia + +bastando para tal executar o seguinte código: + +```python +for x in search_result: + print(x.datetime) + print(x.domain) + print(x.headline) + print(x.url) + print() +``` + +### Determinação de datas importantes e seleção das *keywords*/títulos relevantes + +No próximo passo o sistema recorre ao algoritmo do *Conta-me Histórias* para criar um resumo das notícias mais importantes a partir do conjunto de documentos obtidos no Arquivo.pt. Cada bloco temporal determinado como relevante pelo sistema reúne um total de 20 notícias. Os vários blocos temporais determinados automaticamente pelo sistema oferecem ao usuário uma narrativa ao longo do tempo. + +```python +from contamehistorias import engine +language = "pt" + +cont = engine.TemporalSummarizationEngine() +summ_result = cont.build_intervals(search_result, language, query) + +cont.pprint(summ_result) +``` + +#### Estatísticas da pesquisa + +O código seguinte permite ter acesso a um conjunto de estatísticas globais, nomeadamente, ao número total de documentos, de domínios, bem como ao tempo total de execução do algoritmo. + +```python +print(f"Número total de documentos: {summ_result['stats']['n_docs']}") +print(f"Número total de domínios: {summ_result['stats']['n_domains']}") +print(f"Tempo total de execução: {summ_result['stats']['time']}") +``` + +### Obter a lista dos domínios dos resultados da pesquisa + +Para listar todos os domínios execute o seguinte código: + +```python +for domain in summ_result["domains"]: + print(domain) +``` + +### Resultados da pesquisa para a "Narrativa" + +Finalmente, o código seguinte recorre à variável `summ_result ["results"]` para apresentar os resultados gerados com a informação necessária à produção de uma *timeline*, nomeadamente, o período temporal de cada bloco de notícias, as notícias propriamente ditas (um conjunto de 20 notícias relevantes por bloco temporal), a data de coleta, a fonte noticiosa, o url (ligação à página web original) e o título completo da notícia. + +```python +for period in summ_result["results"]: + + print("--------------------------------") + print(period["from"],"until",period["to"]) + + # Cabecalhos selecionados + keyphrases = period["keyphrases"] + + for keyphrase in keyphrases: + print("headline = " + keyphrase.kw) + + # Fontes + for headline in keyphrase.headlines: + print("Date", headline.info.datetime) + print("Source", headline.info.domain) + print("Url", headline.info.url) + print("Headline completa = ", headline.info.headline) + + print() +``` + +# Conclusões + +A web é hoje considerada uma ferramenta essencial de comunicação. Neste contexto, os arquivos web surgem como um importante recurso de preservação dos conteúdos aí publicados. Embora o seu uso seja dominado por pesquisadores, historiadores ou jornalistas, o elevado volume de dados aí disponíveis sobre o nosso passado faz deste tipo de infraestrutura uma fonte de recursos de elevado valor e extrema utilidade para os usuários mais comuns. O acesso generalizado a este tipo de infraestrutura obriga, no entanto, à existência de outro tipo de ferramentas capazes de satisfazer as necessidades de informação do usuário, diminuindo, ao mesmo tempo, os constrangimentos associados à exploração de elevados volumes de dados por parte de usuários não especialistas. + +Neste tutorial, procurámos mostrar como criar automaticamente sumários temporais a partir de eventos coletados no passado, fazendo uso dos dados obtidos no Arquivo.pt e da aplicação da biblioteca de sumarização temporal *Conta-me Histórias*. O tutorial aqui apresentado é um primeiro passo na tentativa de mostrarmos aos interessados na temática uma forma simples de como qualquer usuário pode, utilizando conceitos minímos de programação, fazer uso de APIs e bibliotecas existentes para extrair conhecimento a partir de um elevado volume de dados num curto espaço de tempo. + +# Prémios + +O projeto *Conta-me Histórias* foi o vencedor do [Prémio Arquivo.pt 2018](https://sobre.arquivo.pt/pt/vencedores-premios-arquivo-pt/) e o vencedor da [Best Demo Presentation](https://ecir2019.org/workshops/) na [41st European Conference on Information Retrieval (ECIR-19)](https://ecir2019.org/) (em inglês). + +# Financiamento + +Ricardo Campos foi financiado por fundos nacionais através do Fundação para a Ciência e Tecnologia (FCT) e pela Fundação Portuguesa para Ciência e Tecnologia (I.P.) com o projeto StorySense (2022.09312.PTDC). + +# Bibliografia + +* Campos, R., Pasquali, A., Jatowt, A., Mangaravite, V., and Jorge, A.. "Automatic Generation of Timelines for Past-Web Events" In *The Past Web: Exploring Web Archives*, edited by D. Gomes, E. Demidova, J. Winters, and T. Risse, 225-242. Springer: 2021. [https://link.springer.com/chapter/10.1007/978-3-030-63291-5_18](https://perma.cc/F3SZ-5MVL) + +* Campos, R., Mangaravite, V., Pasquali, A., Jorge, A., Nunes, C., and Jatowt, A.. "YAKE! Keyword Extraction from Single Documents using Multiple Local Features". *Information Sciences Journal*, vol. 509 (2020): 257-289. [https://doi.org/10.1016/j.ins.2019.09.013](https://doi.org/10.1016/j.ins.2019.09.013) + +* Campos, R., Mangaravite, V., Pasquali, A., Jorge, A., Nunes, C., and Jatowt, A.. "A Text Feature Based Automatic Keyword Extraction Method for Single Documents" In *Advances in Information Retrieval. ECIR 2018 (Grenoble, France. March 26 ? 29). Lecture Notes in Computer Science*, edited by G. Pasi, B. Piwowarski, L. Azzopardi, and A. Hanbury, vol. 10772, 684-691. Springer: 2018. [https://link.springer.com/chapter/10.1007/978-3-319-76941-7_63](https://perma.cc/3V3W-X6MZ) + +* Pasquali, A., Mangaravite, V., Campos, R., Jorge, A., and Jatowt, A.."Interactive System for Automatically Generating Temporal Narratives" In +*Advances in Information Retrieval. ECIR'19 (Cologne, Germany. April 14-18). Lecture Notes in Computer Science*, edited by L. Azzopardi, B. Stein, N. Fuhr, P. Mayr, C. Hauff, and D. Hiemstra, vol. 11438, 251 - 255. Springer: 2019. [https://link.springer.com/chapter/10.1007/978-3-030-15719-7_34](https://perma.cc/MH6W-QQFD) + +* Gomes, D., Demidova, E., Winters, J., and Risse, T. (eds.), *The Past Web: Exploring Web Archives*. Springer, 2021. [https://arquivo.pt/book](https://arquivo.pt/book) [Pre-print](https://perma.cc/Q693-DLPA) + +* Gomes, D., and Costa M.. "The Importance of Web Archives for Humanities". *International Journal of Humanities and Arts Computing*, (April 2014). [https://sobre.arquivo.pt/wp-content/uploads/the-importance-of-web-archives-for-humanities.pdf](https://perma.cc/4WHP-Q534). + +* Alam,Sawood, Weigle, Michele C., Nelson, Michael L., Melo, Fernando, Bicho, Daniel, Gomes, Daniel. "MementoMap Framework for Flexible and Adaptive Web Archive Profiling" In *Proceedings of Joint Conference on Digital Libraries 2019*. Urbana-Champaign, Illinois, US: June 2019. [https://www.cs.odu.edu/~salam/drafts/mementomap-jcdl19-cameraready.pdf](https://perma.cc/7ES7-A7H7). + +* Costa, M.. "Information Search in Web Archives" PhD thesis, Universidade de Lisboa, December 2014. [https://sobre.arquivo.pt/wp-content/uploads/phd-thesis-information-search-in-web-archives.pdf](https://perma.cc/HU5S-M2XE) + +* Mourão, A., Gomes, D.. *The Anatomy of a Web Archive Image Search Engine. Technical Report*. Lisboa, Portugal: Arquivo.pt, dezembro 2021. [https://sobre.arquivo.pt/wp-content/uploads/The_Anatomy_of_a_Web_Archive_Image_Search_Engine_tech_report.pdf](https://perma.cc/2JF4-EF4T) diff --git a/pt/licoes/trabalhando-ficheiros-texto-python.md b/pt/licoes/trabalhando-ficheiros-texto-python.md index 8cb76db22d..15d6fb2beb 100644 --- a/pt/licoes/trabalhando-ficheiros-texto-python.md +++ b/pt/licoes/trabalhando-ficheiros-texto-python.md @@ -1,191 +1,191 @@ ---- -title: Trabalhando com ficheiros de texto em Python -slug: trabalhando-ficheiros-texto-python -layout: lesson -date: 2012-07-17 -translation_date: 2021-05-13 -authors: -- William J. Turkel -- Adam Crymble -reviewers: -- Jim Clifford -editors: -- Miriam Posner -translator: -- Aracele Torres -translation-editor: -- Danielle Sanches -translation-reviewer: -- Bruno Martins -- Renato Rocha Souza -difficulty: 2 -review-ticket: https://github.com/programminghistorian/ph-submissions/issues/317 -activity: transforming -topics: [python] -abstract: "Nesta lição, você aprenderá a manipular ficheiros de texto usando Python." -next: code-reuse-and-modularity -previous: nocoes-basicas-paginas-web-html -python_warning: false -original: working-with-text-files -avatar_alt: Homem de óculos lendo um livro de alfabeto -doi: 10.46430/phpt0003 ---- - -{% include toc.html %} - - - - - -## Objetivos da lição - -Nesta lição, você aprenderá a manipular ficheiros de texto usando Python. -Isto inclui abrir, fechar, ler e gravar ficheiros no formato `.txt` usando instruções nesta linguagem de programação. - -As próximas lições desta série envolverão o download de uma página da web e a reorganização do seu conteúdo em blocos de informação úteis. Você fará a maior parte do trabalho usando código Python escrito e executado no ambiente Komodo Edit. - -## Trabalhando com ficheiros de texto - -A linguagem Python facilita o trabalho com ficheiros e texto. Vamos começar com ficheiros. - -## Criando e gravando um ficheiro de texto - -Vamos começar com uma breve discussão da terminologia. Numa lição anterior (dependendo do seu sistema operativo: [Instalação em Mac][], [Instalação em Windows][], ou [Instalação em Linux][]), você viu como enviar informação para a janela de "Saída de Comando" do seu editor de texto, usando o comando [print][] do Python. - -``` python[´p -print('olá mundo') -``` - -A linguagem de programação Python é *orientada a objetos*. Isso quer dizer que a mesma é construída em torno de um tipo especial de entidade, um *objeto*, que -contém *dados* e vários *métodos* para aceder e alterar esses dados. Depois de um objeto ser criado, ele pode interagir com outros objetos. - -No exemplo acima, vemos um tipo de objeto, a *string* "olá mundo". A *string* é a sequência de caracteres entre aspas. Você pode escrever uma *string* de três maneiras: - -``` -message1 = 'olá mundo' -message2 = "olá mundo" -message3 = """olá -olá -olá mundo""" -``` - -O importante a notar é que nos primeiros dois exemplos você pode usar aspas simples ou duplas / vírgulas invertidas, mas não pode misturar as duas dentro de uma *string*. -No terceiro exemplo, as aspas triplas significam uma *string* que abrange mais de uma linha. - -Por exemplo, as seguintes declarações estão todas erradas: - -``` -message1 = "olá mundo' -message2 = 'olá mundo" -message3 = 'O meu nome é John O'Brian' -``` - -Conte o número de aspas simples na *message3*. Para funcionar você -teria que *libertar* o apóstrofo: - -``` python -message3 = 'O meu nome é John O\'Brian' -``` - -Alternativamente, poderia reescrever a declaração como: - -``` python -message3 = "O meu nome é John O'Brian" -``` - -`Print` é um comando que imprime objetos na forma textual. O comando *print*, quando combinado com a *string*, produz uma *instrução*. - -Você usará `print` como indicado anteriormente nos casos em que deseja apresentar a informação imediatamente. Às vezes, no entanto, você criará informação que deseja guardar, enviar a outra pessoa, ou usar como entrada para processamento posterior por um outro programa ou conjunto de programas. Nestes casos, você desejará enviar a informação para ficheiros no seu disco rígido, em vez de para a janela de "saída de comando". Insira o seguinte programa no seu editor de texto e salve-o como `ficheiro-saida.py`. - -``` python -# ficheiro-saida.py -f = open('olamundo.txt','w') -f.write('olá mundo') -f.close() -``` - -Em Python, qualquer linha que comece com uma marca de hash (\#) é conhecida como um *comentário* e é ignorada pelo interpretador Python. Os comentários têm como objetivo permitir que os programadores comuniquem uns com os outros (ou para se lembrarem do que seu código faz quando o voltam a analisar alguns meses depois). Num sentido mais amplo, os próprios programas são tipicamente escritos e formatados de modo que seja mais fácil para os programadores comunicarem uns com os outros. Quando o código é mais próximo dos requisitos da máquina é conhecido como *baixo nível*, enquanto o que está mais próximo da linguagem natural é de *alto nível*. Um dos benefícios de usar uma linguagem como Python é que ela é de nível muito alto, tornando mais fácil a comunicação (com algum custo em termos de eficiência computacional). - -No programa anterior, *f* é um *objeto ficheiro* (*file object*), e `open` (abrir), `write` (gravar) e `close` (fechar) são *métodos de ficheiro* (*file -methods*). Em outras palavras, abrir, gravar, e fechar fazem algo com o objeto *f* que, neste caso, é definido como um ficheiro `.txt`. Este é provavelmente um uso diferente do termo "método" do que aquele que você poderia esperar e, de vez em quando, você descobrirá que as palavras usadas no contexto de programação têm significados ligeiramente (ou completamente) diferentes do que na fala do dia a dia. Neste caso, lembre-se de que os métodos são código que executa ações. Eles fazem algo a outra coisa e retornam um resultado. Você pode tentar pensar nisto usando um exemplo do mundo real, como dar comandos ao cão da família. O cão (o objeto) entende comandos (ou seja, tem "métodos") como "latir", "sentar", "fingir de morto" e assim por diante. Discutiremos e aprenderemos como usar muitos outros métodos à medida que avançarmos. - -*f* é um nome de variável escolhido por nós; você poderia chamá-lo de qualquer coisa que quisesse. No Python, os nomes das variáveis podem ser constituídos por letras maiúsculas e minúsculas, números, e o símbolo *underline*... mas você não pode usar os nomes dos comandos Python como variáveis. Se você tentasse nomear a sua variável de ficheiro como, por exemplo, "print", o seu programa não funcionaria porque esta é uma [palavra reservada][] que faz parte da linguagem de programação. - -Os nomes das variáveis Python também são *case-sensitive*, ou seja, diferenciam letras maiúsculas de minúsculas, o que significa que *foobar*, *Foobar* e *FOOBAR* seriam todas variáveis diferentes. - -Quando você executa o programa, o método `open` (abrir) vai dizer ao seu computador para criar um novo ficheiro de texto `olamundo.txt` na mesma pasta que você salvou o programa `ficheiro-saida.py`. O parâmetro *w* diz que você pretende gravar conteúdo neste novo ficheiro usando Python. - -Observe que, como o nome do ficheiro e o parâmetro estão entre aspas simples, você sabe que ambos estão armazenados como *strings*; esquecer de incluir as aspas fará com que o seu programa falhe. - -Na próxima linha, o seu programa grava a mensagem "olá mundo" (outra string) no ficheiro e o fecha. (Para obter mais informações sobre estas instruções, consulte a seção [File Objects][] na Referência da biblioteca Python.) - -Clique duas vezes no botão "Executar Python" no Komodo Edit para executar o programa (ou o equivalente em qualquer outro editor de texto que você tenha decidido usar: por exemplo, clique em "\#!" E "Executar" no TextWrangler). Embora nada seja impresso no painel "Saída de Comando", você verá uma mensagem de status que diz algo como - -``` python -`/usr/bin/python ficheiro-saida.py` returned 0. -``` - -em Mac ou Linux, ou - -``` python -'C:\Python27\Python.exe ficheiro-saida.py' returned 0. -``` - -no Windows. - -Isso significa que o seu programa foi executado com sucesso. Se você usar *Arquivo -> Abrir -> Arquivo* no Komodo Edit, você pode abrir o ficheiro `olamundo.txt`. Ele deve conter a sua mensagem numa linha: - -``` python -olá mundo -``` - -Como os ficheiros de texto incluem uma quantidade mínima de informação de formatação, eles tendem a ser pequenos, fáceis de trocar entre plataformas diferentes -(ou seja, do Windows para Linux ou Mac, ou vice-versa) e fáceis de enviar de um programa de computador para outro. Eles geralmente também podem ser lidos por pessoas que usam um editor de texto como o Komodo Edit. - -### Lendo de um ficheiro de texto - -A linguagem Python também possui métodos que permitem obter informação desde ficheiros. Digite o seguinte programa no seu editor de texto e salve-o como -`ficheiro-entrada.py`. Ao clicar em "Executar" para executá-lo, será aberto o ficheiro de texto que você acabou de criar, lida a mensagem numa linha do ficheiro, e -impressa a mensagem no painel "Saída de Comando". - -``` python -# ficheiro-entrada.py -f = open('olamundo.txt','r') -message = f.read() -print(message) -f.close() -``` - -Nesse caso, o parâmetro *r* é usado para indicar que você está abrindo um ficheiro para ler (`read`) a partir dele. Os parâmetros permitem que você escolha entre as diferentes opções que um método específico permite. Voltando ao exemplo do cão da família, o cão pode ser treinado a latir uma vez quando faz um lanche com sabor de carne e duas vezes quando recebe um com sabor de frango. O sabor do lanche é um parâmetro. Cada método é diferente em termos de quais parâmetros aceitará. Você não pode, por exemplo, pedir a um cão que cante uma ópera italiana - a menos que o seu cão seja particularmente talentoso. Você pode pesquisar os parâmetros possíveis para um método específico no site do Python ou, frequentemente, pode encontrá-los digitando o nome do método num motor de busca, junto com o termo "Python". - -`Read` é um outro método de ficheiro. Os conteúdos do ficheiro (a mensagem de uma linha) são copiados para a variável *message*, que é como decidimos chamar esta *string*, e então o comando `print` é usado para enviar os conteúdos de *message* para o painel "Saída do Comando". - -### Anexando conteúdo a um ficheiro de texto pré-existente - -Uma terceira opção é abrir um ficheiro pré-existente e adicionar mais conteúdo a ele. Note que se você abrir (`open`) um ficheiro e usar o método `write` (gravar), *o programa sobrescreverá tudo o que possa estar contido no ficheiro*. Isso não é um problema quando você está criando um novo ficheiro, ou quando deseja sobrescrever os conteúdos de um ficheiro existente, mas pode ser indesejável quando você está criando um registro de eventos ou compilando um grande conjunto de dados em um ficheiro. Neste caso, ao invés de `write`, você vai querer usar o método acrescentar (`append`), designado por `a`. - -Digite o seguinte programa no seu editor de texto e salve-o como`ficheiro-acrescentar.py`. Quando você executar este programa, ele abrirá o mesmo ficheiro `olamundo.txt` criado anteriormente e anexará uma segunda mensagem “olá mundo” ao ficheiro. A sequência '\\n' significa o início de uma nova linha. - -``` python -# ficheiro-acrescentar.py -f = open('olamundo.txt','a') -f.write('\n' + 'olá mundo') -f.close() -``` - -Depois de executar o programa, abra o ficheiro `olamundo.txt` e veja o que aconteceu. Feche o ficheiro de texto e execute mais algumas vezes o programa `ficheiro-acrescentar.py`. Quando você abrir `olamundo.txt` novamente, notará algumas mensagens 'olá mundo' extra esperando por você. - -Na próxima seção, discutiremos a modularidade e a reutilização de código. - -Leituras sugeridas ------------------- - -- [Non-Programmer's Tutorial for Python 3/Hello, World][] - - [Instalação em Mac]: https://programminghistorian.org/lessons/mac-installation - [Instalação em Windows]: https://programminghistorian.org/lessons/windows-installation - [Instalação em Linux]: https://programminghistorian.org/lessons/linux-installation - [print]: https://docs.python.org/2/reference/simple_stmts.html#the-print-statement - [palavra reservada]: http://docs.python.org/release/2.5.4/ref/keywords.html - [File Objects]: https://docs.python.org/2/library/stdtypes.html#bltin-file-objects - [Non-Programmer's Tutorial for Python 3/Hello, World]: https://en.wikibooks.org/wiki/Non-Programmer%27s_Tutorial_for_Python_3/Hello,_World +--- +title: Trabalhando com ficheiros de texto em Python +slug: trabalhando-ficheiros-texto-python +layout: lesson +date: 2012-07-17 +translation_date: 2021-05-13 +authors: +- William J. Turkel +- Adam Crymble +reviewers: +- Jim Clifford +editors: +- Miriam Posner +translator: +- Aracele Torres +translation-editor: +- Danielle Sanches +translation-reviewer: +- Bruno Martins +- Renato Rocha Souza +difficulty: 2 +review-ticket: https://github.com/programminghistorian/ph-submissions/issues/317 +activity: transforming +topics: [python] +abstract: "Nesta lição, você aprenderá a manipular ficheiros de texto usando Python." +next: code-reuse-and-modularity +previous: nocoes-basicas-paginas-web-html +python_warning: false +original: working-with-text-files +avatar_alt: Homem de óculos lendo um livro de alfabeto +doi: 10.46430/phpt0003 +--- + +{% include toc.html %} + + + + + +## Objetivos da lição + +Nesta lição, você aprenderá a manipular ficheiros de texto usando Python. +Isto inclui abrir, fechar, ler e gravar ficheiros no formato `.txt` usando instruções nesta linguagem de programação. + +As próximas lições desta série envolverão o download de uma página da web e a reorganização do seu conteúdo em blocos de informação úteis. Você fará a maior parte do trabalho usando código Python escrito e executado no ambiente Komodo Edit. + +## Trabalhando com ficheiros de texto + +A linguagem Python facilita o trabalho com ficheiros e texto. Vamos começar com ficheiros. + +## Criando e gravando um ficheiro de texto + +Vamos começar com uma breve discussão da terminologia. Numa lição anterior (dependendo do seu sistema operativo: [Instalação em Mac][], [Instalação em Windows][], ou [Instalação em Linux][]), você viu como enviar informação para a janela de "Saída de Comando" do seu editor de texto, usando o comando [print][] do Python. + +``` python[´p +print('olá mundo') +``` + +A linguagem de programação Python é *orientada a objetos*. Isso quer dizer que a mesma é construída em torno de um tipo especial de entidade, um *objeto*, que +contém *dados* e vários *métodos* para aceder e alterar esses dados. Depois de um objeto ser criado, ele pode interagir com outros objetos. + +No exemplo acima, vemos um tipo de objeto, a *string* "olá mundo". A *string* é a sequência de caracteres entre aspas. Você pode escrever uma *string* de três maneiras: + +``` +message1 = 'olá mundo' +message2 = "olá mundo" +message3 = """olá +olá +olá mundo""" +``` + +O importante a notar é que nos primeiros dois exemplos você pode usar aspas simples ou duplas / vírgulas invertidas, mas não pode misturar as duas dentro de uma *string*. +No terceiro exemplo, as aspas triplas significam uma *string* que abrange mais de uma linha. + +Por exemplo, as seguintes declarações estão todas erradas: + +``` +message1 = "olá mundo' +message2 = 'olá mundo" +message3 = 'O meu nome é John O'Brian' +``` + +Conte o número de aspas simples na *message3*. Para funcionar você +teria que *libertar* o apóstrofo: + +``` python +message3 = 'O meu nome é John O\'Brian' +``` + +Alternativamente, poderia reescrever a declaração como: + +``` python +message3 = "O meu nome é John O'Brian" +``` + +`Print` é um comando que imprime objetos na forma textual. O comando *print*, quando combinado com a *string*, produz uma *instrução*. + +Você usará `print` como indicado anteriormente nos casos em que deseja apresentar a informação imediatamente. Às vezes, no entanto, você criará informação que deseja guardar, enviar a outra pessoa, ou usar como entrada para processamento posterior por um outro programa ou conjunto de programas. Nestes casos, você desejará enviar a informação para ficheiros no seu disco rígido, em vez de para a janela de "saída de comando". Insira o seguinte programa no seu editor de texto e salve-o como `ficheiro-saida.py`. + +``` python +# ficheiro-saida.py +f = open('olamundo.txt','w') +f.write('olá mundo') +f.close() +``` + +Em Python, qualquer linha que comece com uma marca de hash (\#) é conhecida como um *comentário* e é ignorada pelo interpretador Python. Os comentários têm como objetivo permitir que os programadores comuniquem uns com os outros (ou para se lembrarem do que seu código faz quando o voltam a analisar alguns meses depois). Num sentido mais amplo, os próprios programas são tipicamente escritos e formatados de modo que seja mais fácil para os programadores comunicarem uns com os outros. Quando o código é mais próximo dos requisitos da máquina é conhecido como *baixo nível*, enquanto o que está mais próximo da linguagem natural é de *alto nível*. Um dos benefícios de usar uma linguagem como Python é que ela é de nível muito alto, tornando mais fácil a comunicação (com algum custo em termos de eficiência computacional). + +No programa anterior, *f* é um *objeto ficheiro* (*file object*), e `open` (abrir), `write` (gravar) e `close` (fechar) são *métodos de ficheiro* (*file +methods*). Em outras palavras, abrir, gravar, e fechar fazem algo com o objeto *f* que, neste caso, é definido como um ficheiro `.txt`. Este é provavelmente um uso diferente do termo "método" do que aquele que você poderia esperar e, de vez em quando, você descobrirá que as palavras usadas no contexto de programação têm significados ligeiramente (ou completamente) diferentes do que na fala do dia a dia. Neste caso, lembre-se de que os métodos são código que executa ações. Eles fazem algo a outra coisa e retornam um resultado. Você pode tentar pensar nisto usando um exemplo do mundo real, como dar comandos ao cão da família. O cão (o objeto) entende comandos (ou seja, tem "métodos") como "latir", "sentar", "fingir de morto" e assim por diante. Discutiremos e aprenderemos como usar muitos outros métodos à medida que avançarmos. + +*f* é um nome de variável escolhido por nós; você poderia chamá-lo de qualquer coisa que quisesse. No Python, os nomes das variáveis podem ser constituídos por letras maiúsculas e minúsculas, números, e o símbolo *underline*... mas você não pode usar os nomes dos comandos Python como variáveis. Se você tentasse nomear a sua variável de ficheiro como, por exemplo, "print", o seu programa não funcionaria porque esta é uma [palavra reservada][] que faz parte da linguagem de programação. + +Os nomes das variáveis Python também são *case-sensitive*, ou seja, diferenciam letras maiúsculas de minúsculas, o que significa que *foobar*, *Foobar* e *FOOBAR* seriam todas variáveis diferentes. + +Quando você executa o programa, o método `open` (abrir) vai dizer ao seu computador para criar um novo ficheiro de texto `olamundo.txt` na mesma pasta que você salvou o programa `ficheiro-saida.py`. O parâmetro *w* diz que você pretende gravar conteúdo neste novo ficheiro usando Python. + +Observe que, como o nome do ficheiro e o parâmetro estão entre aspas simples, você sabe que ambos estão armazenados como *strings*; esquecer de incluir as aspas fará com que o seu programa falhe. + +Na próxima linha, o seu programa grava a mensagem "olá mundo" (outra string) no ficheiro e o fecha. (Para obter mais informações sobre estas instruções, consulte a seção [File Objects][] na Referência da biblioteca Python.) + +Clique duas vezes no botão "Executar Python" no Komodo Edit para executar o programa (ou o equivalente em qualquer outro editor de texto que você tenha decidido usar: por exemplo, clique em "\#!" E "Executar" no TextWrangler). Embora nada seja impresso no painel "Saída de Comando", você verá uma mensagem de status que diz algo como + +``` python +`/usr/bin/python ficheiro-saida.py` returned 0. +``` + +em Mac ou Linux, ou + +``` python +'C:\Python27\Python.exe ficheiro-saida.py' returned 0. +``` + +no Windows. + +Isso significa que o seu programa foi executado com sucesso. Se você usar *Arquivo -> Abrir -> Arquivo* no Komodo Edit, você pode abrir o ficheiro `olamundo.txt`. Ele deve conter a sua mensagem numa linha: + +``` python +olá mundo +``` + +Como os ficheiros de texto incluem uma quantidade mínima de informação de formatação, eles tendem a ser pequenos, fáceis de trocar entre plataformas diferentes +(ou seja, do Windows para Linux ou Mac, ou vice-versa) e fáceis de enviar de um programa de computador para outro. Eles geralmente também podem ser lidos por pessoas que usam um editor de texto como o Komodo Edit. + +### Lendo de um ficheiro de texto + +A linguagem Python também possui métodos que permitem obter informação desde ficheiros. Digite o seguinte programa no seu editor de texto e salve-o como +`ficheiro-entrada.py`. Ao clicar em "Executar" para executá-lo, será aberto o ficheiro de texto que você acabou de criar, lida a mensagem numa linha do ficheiro, e +impressa a mensagem no painel "Saída de Comando". + +``` python +# ficheiro-entrada.py +f = open('olamundo.txt','r') +message = f.read() +print(message) +f.close() +``` + +Nesse caso, o parâmetro *r* é usado para indicar que você está abrindo um ficheiro para ler (`read`) a partir dele. Os parâmetros permitem que você escolha entre as diferentes opções que um método específico permite. Voltando ao exemplo do cão da família, o cão pode ser treinado a latir uma vez quando faz um lanche com sabor de carne e duas vezes quando recebe um com sabor de frango. O sabor do lanche é um parâmetro. Cada método é diferente em termos de quais parâmetros aceitará. Você não pode, por exemplo, pedir a um cão que cante uma ópera italiana - a menos que o seu cão seja particularmente talentoso. Você pode pesquisar os parâmetros possíveis para um método específico no site do Python ou, frequentemente, pode encontrá-los digitando o nome do método num motor de busca, junto com o termo "Python". + +`Read` é um outro método de ficheiro. Os conteúdos do ficheiro (a mensagem de uma linha) são copiados para a variável *message*, que é como decidimos chamar esta *string*, e então o comando `print` é usado para enviar os conteúdos de *message* para o painel "Saída do Comando". + +### Anexando conteúdo a um ficheiro de texto pré-existente + +Uma terceira opção é abrir um ficheiro pré-existente e adicionar mais conteúdo a ele. Note que se você abrir (`open`) um ficheiro e usar o método `write` (gravar), *o programa sobrescreverá tudo o que possa estar contido no ficheiro*. Isso não é um problema quando você está criando um novo ficheiro, ou quando deseja sobrescrever os conteúdos de um ficheiro existente, mas pode ser indesejável quando você está criando um registro de eventos ou compilando um grande conjunto de dados em um ficheiro. Neste caso, ao invés de `write`, você vai querer usar o método acrescentar (`append`), designado por `a`. + +Digite o seguinte programa no seu editor de texto e salve-o como`ficheiro-acrescentar.py`. Quando você executar este programa, ele abrirá o mesmo ficheiro `olamundo.txt` criado anteriormente e anexará uma segunda mensagem “olá mundo” ao ficheiro. A sequência '\\n' significa o início de uma nova linha. + +``` python +# ficheiro-acrescentar.py +f = open('olamundo.txt','a') +f.write('\n' + 'olá mundo') +f.close() +``` + +Depois de executar o programa, abra o ficheiro `olamundo.txt` e veja o que aconteceu. Feche o ficheiro de texto e execute mais algumas vezes o programa `ficheiro-acrescentar.py`. Quando você abrir `olamundo.txt` novamente, notará algumas mensagens 'olá mundo' extra esperando por você. + +Na próxima seção, discutiremos a modularidade e a reutilização de código. + +Leituras sugeridas +------------------ + +- [Non-Programmer's Tutorial for Python 3/Hello, World][] + + [Instalação em Mac]: https://programminghistorian.org/lessons/mac-installation + [Instalação em Windows]: https://programminghistorian.org/lessons/windows-installation + [Instalação em Linux]: https://programminghistorian.org/lessons/linux-installation + [print]: https://docs.python.org/2/reference/simple_stmts.html#the-print-statement + [palavra reservada]: https://docs.python.org/release/2.5.4/ref/keywords.html + [File Objects]: https://docs.python.org/2/library/stdtypes.html#bltin-file-objects + [Non-Programmer's Tutorial for Python 3/Hello, World]: https://en.wikibooks.org/wiki/Non-Programmer%27s_Tutorial_for_Python_3/Hello,_World diff --git a/pt/licoes/transcricao-automatica-grafias-nao-latinas.md b/pt/licoes/transcricao-automatica-grafias-nao-latinas.md index bc8c75f177..12fc190679 100644 --- a/pt/licoes/transcricao-automatica-grafias-nao-latinas.md +++ b/pt/licoes/transcricao-automatica-grafias-nao-latinas.md @@ -443,7 +443,7 @@ Justifica-se uma abordagem por "baselines" (a encarnado na figura 10 encontra-se ```xml - + Calfa 2022-08-23T14:48:18+00:00 @@ -763,7 +763,7 @@ Os dados gerados neste artigo e no âmbito do projeto CGPG estão disponíveis n ## Notas de fim -[^1]: Os volumes da PG estão disponíveis em formato PDF, por exemplo, nos links [http://patristica.net/graeca](http://patristica.net/graeca) e [https://www.roger-pearse.com/weblog/patrologia-graeca-pg-pdfs](https://www.roger-pearse.com/weblog/patrologia-graeca-pg-pdfs) (em inglês). Mas apenas parte da PG está codificada em formato de "texto", por exemplo, no corpus do [Thesaurus Linguae Graecae](http://stephanus.tlg.uci.edu) (em inglês). +[^1]: Os volumes da PG estão disponíveis em formato PDF, por exemplo, nos links [https://patristica.net/graeca](https://patristica.net/graeca) e [https://www.roger-pearse.com/weblog/patrologia-graeca-pg-pdfs](https://www.roger-pearse.com/weblog/patrologia-graeca-pg-pdfs) (em inglês). Mas apenas parte da PG está codificada em formato de "texto", por exemplo, no corpus do [Thesaurus Linguae Graecae](https://stephanus.tlg.uci.edu) (em inglês). [^2]: A associação Calfa (Paris, França) e o projeto GRE*g*ORI (Université Catholique de Louvain, Louvain-la-Neuve, Bélgica) desenvolvem conjuntamente sistemas de reconhecimento de caracteres e sistemas de análise automática de textos: lematização, rotulagem morfossintática, POS_tagging). Esses desenvolvimentos já foram adaptados, testados e utilizados para processar textos em arménio, em georgiano e em sírio. O projeto CGPG continua esses desenvolvimentos no domínio do grego, propondo um processamento completo (OCR e análise) de textos editados da PG. Para os exemplos de processamento morfossintático do grego antigo realizado em conjunto: Kindt, Bastien, Chahan Vidal-Gorène, Saulo Delle Donne. "Analyse automatique du grec ancien par réseau de neurones. Évaluation sur le corpus De Thessalonica Capta". *BABELAO*, 10-11 (2022), 525-550. [https://doi.org/10.14428/babelao.vol1011.2022.65073](https://doi.org/10.14428/babelao.vol1011.2022.65073) (em francês). @@ -841,7 +841,7 @@ Os dados gerados neste artigo e no âmbito do projeto CGPG estão disponíveis n [^38]: *Ibid.* -[^39]: Bastien Kindt e Vidal-Gorène Chahan, "From Manuscript to Tagged Corpora. An Automated Process for Ancient Armenian or Other Under-Resourced Languages of the Christian East". *Armeniaca. International Journal of Armenian Studies* 1, 73-96, 2022. [http://doi.org/10.30687/arm/9372-8175/2022/01/005]( http://doi.org/10.30687/arm/9372-8175/2022/01/005) (em inglês). +[^39]: Bastien Kindt e Vidal-Gorène Chahan, "From Manuscript to Tagged Corpora. An Automated Process for Ancient Armenian or Other Under-Resourced Languages of the Christian East". *Armeniaca. International Journal of Armenian Studies* 1, 73-96, 2022. [https://doi.org/10.30687/arm/9372-8175/2022/01/005]( https://doi.org/10.30687/arm/9372-8175/2022/01/005) (em inglês). [^40]: Vidal-Gorène, Lucas, Salah, Decours-Perez, e Dupin. "RASAM–A Dataset for the Recognition and Analysis of Scripts in Arabic Maghrebi", 265-281. diff --git a/pt/pesquisa.md b/pt/pesquisa.md index 1db2dd7ddd..ffd4d0f913 100755 --- a/pt/pesquisa.md +++ b/pt/pesquisa.md @@ -8,16 +8,16 @@ original: research A equipe do projeto e membros da comunidade em geral estão envolvidos em várias iniciativas académicas relacionadas com o nosso trabalho aqui no *Programming Historian em português*. Tal incluí eventos, artigos em periódicos, resenhas (da comunidade) e pósteres. Se estiver a desenvolver pesquisa académica usando este projeto, por favor contate a nossa assistente de publicação Anisa Hawes. ## *Programming Historian* original -* William J. Turkel e Alan MacEachern, [_The Programming Historian_](http://niche-canada.org/wp-content/uploads/2013/09/programming-historian-1.pdf) 1a Edição (Network in Canadian History & Environment: 2007-2008). +* William J. Turkel e Alan MacEachern, [_The Programming Historian_](https://niche-canada.org/wp-content/uploads/2013/09/programming-historian-1.pdf) 1a Edição (Network in Canadian History & Environment: 2007-2008). * Tradução ao japonês de William J. Turkel e Alan MacEachern, [_The Programming Historian_](https://www.dh.ku-orcas.kansai-u.ac.jp/?cat=2) 1a Edição (Network in Canadian History & Environment: 2007-2008). ## Resenhas -* Björn Ekström, Elisa Tattersall Wallin e Hana Marčetić, '[_Programming Historian_: Novice-friendly tutorials on digital methods](http://www.diva-portal.org/smash/record.jsf?pid=diva2%3A1508542&dswid=7551)', _Tidskrift för ABM_, Vol. 5, no 1 (2020), pp. 71-75. +* Björn Ekström, Elisa Tattersall Wallin e Hana Marčetić, '[_Programming Historian_: Novice-friendly tutorials on digital methods](https://www.diva-portal.org/smash/record.jsf?pid=diva2%3A1508542&dswid=7551)', _Tidskrift för ABM_, Vol. 5, no 1 (2020), pp. 71-75. * Dries Daems, '[A Review and Roadmap of Online Learning Platforms and Tutorials in Digital Archaeology](https://doi.org/10.1017/aap.2019.47)', _Advances in Archaeological Practice_, vol. 8, no 1 (2020), pp. 87-92. * Martin Dröge, '[Review of: The Programming Historian](https://www.hsozkult.de/webreview/id/rezwww-184)', _H-Soz-Kult_ (2019). * Priscila Pilatowsky Goñi, '[Reseña a The programming historian](https://revistas.uned.es/index.php/RHD/article/view/22420)', _Revista de Humanidades Digitales_, vol. 2 (2018). -* Lincoln Mullen, '[Review of the Programming Historian](http://jah.oxfordjournals.org/content/103/1/299.2.full)', _The Journal of American History_, vol. 103, no. 1 (2016), pp. 299-301. -* Cameron Blevins, '[Review of the Programming Historian](http://jitp.commons.gc.cuny.edu/review-of-the-programming-historian/)', _The Journal of Interactive Technology & Pedagogy_, vol. 8 (2015). +* Lincoln Mullen, '[Review of the Programming Historian](https://jah.oxfordjournals.org/content/103/1/299.2.full)', _The Journal of American History_, vol. 103, no. 1 (2016), pp. 299-301. +* Cameron Blevins, '[Review of the Programming Historian](https://jitp.commons.gc.cuny.edu/review-of-the-programming-historian/)', _The Journal of Interactive Technology & Pedagogy_, vol. 8 (2015). ## Pesquisa publicada @@ -27,16 +27,16 @@ A equipe do projeto e membros da comunidade em geral estão envolvidos em vária * Jennifer Isasi, Riva Quiroga, Nabeel Sidiqqui, Joana Vieira Paulino, Alex Wermer-Colan, [“A Model for Multilingual and Multicultural Digital Scholarship Methods Publishing"](https://www.taylorfrancis.com/chapters/edit/10.4324/9781003393696-3/model-multilingual-multicultural-digital-scholarship-methods-publishing-jennifer-isasi-riva-quiroga-nabeel-siddiqui-joana-vieira-paulino-alex-wermer-colan), em _Multilingual Digital Humanities_, editado por Viola, L., & Spence, P., Routledge, 2023. * Adam Crymble & Charlotte M. H. Im, ['Measuring digital humanities learning requirements in Spanish & English-speaking practitioner communities'](https://doi.org/10.1007/s42803-023-00066-x), International Journal of Digital Humanities, (2023). * Eric Brasil, '[_pyHDB - Ferramenta Heurística para a Hemeroteca Digital Brasileira: utilizando técnicas de web scraping para a pesquisa em História_'](https://doi.org/10.15848/hh.v15i40.1904), _História Da Historiografia: International Journal of Theory and History of Historiography_, 15(40) (2022), 186–217. -* Matthew Lincoln, Sarah Melton, Jennifer Isasi, François Dominic Laramée, '[Relocating Complexity: The Programming Historian and Multilingual Static Site Generation](http://www.digitalhumanities.org/dhq/vol/16/2/000585/000585.html)', _Digital Humanities Quarterly_ 16, 2 (2022). +* Matthew Lincoln, Sarah Melton, Jennifer Isasi, François Dominic Laramée, '[Relocating Complexity: The Programming Historian and Multilingual Static Site Generation](https://www.digitalhumanities.org/dhq/vol/16/2/000585/000585.html)', _Digital Humanities Quarterly_ 16, 2 (2022). * Jennifer Isasi e Antonio Rojas Castro, ‘[¿Sin equivalencia? Una reflexión sobre la traducción al español de recursos educativos abiertos](https://muse.jhu.edu/article/842253)’, _Hispania_, 104, no. 4 (2021), 613-624. * Adam Crymble e Maria José Afanador Llach, ‘The Globally Unequal Promise of Digital Tools for History: UK and Colombia Case Study’ em _Teaching History for the Contemporary World_, editado por Adele Nye, 85-98, Springer, 2021. * Daniel Alves, ['Ensinar Humanidades Digitais sem as Humanidades Digitais: um olhar a partir das licenciaturas em História'](https://novaresearch.unl.pt/files/32228034/Ensinar_Humanidades_Digitais.pdf), _Revista EducaOnline_, v. 15, n. 2 (2021). * Adam Crymble, [_Technology & the Historian: Transformations in the Digital Age_](https://www.press.uillinois.edu/books/catalog/57hxp7wr9780252043710.html), (University of Illinois Press, 2021). * Anna-Maria Sichani, James Baker, Maria José Afanador Llach, e Brandon Walsh, [‘Diversity and Inclusion in Digital Scholarship and Pedagogy: The Case of The Programming Historian’](https://doi.org/10.1629/uksg.465), _Insights_, (2019). -* Katrina Navickas e Adam Crymble, ['From Chartist Newspaper to Digital Map of Grass-roots Meetings, 1841-44: Documenting Workflows'](http://www.tandfonline.com/doi/full/10.1080/13555502.2017.1301179), _Journal of Victorian Culture_, (2017). +* Katrina Navickas e Adam Crymble, ['From Chartist Newspaper to Digital Map of Grass-roots Meetings, 1841-44: Documenting Workflows'](https://www.tandfonline.com/doi/full/10.1080/13555502.2017.1301179), _Journal of Victorian Culture_, (2017). * Adam Crymble, ['Identifying and Removing Gender Barriers in Open Learning Communities: The Programming Historian'](https://www.herts.ac.uk/__data/assets/pdf_file/0016/138013/Blip-2016-Autumn-2016-Final-Autumn-2016.pdf), _Blended Learning in Practice_, (2016), 49-60. [[pre-print pdf](/researchpapers/openLearningCommunities2016.pdf)] -* Fred Gibbs, ['Editorial Sustainability and Open Peer Review at Programming Historian',](http://web.archive.org/web/20180713014622/http://dhcommons.org/journal/issue-1/editorial-sustainability-and-open-peer-review-programming-historian) _DH Commons_, Vol. 1 (2015). -* Shawn Graham, Ian Milligan, and Scott Weingart, [_Exploring Big Historical Data: The Historian's Macroscope_](http://www.themacroscope.org/2.0/), (Imperial College Press, 2015). +* Fred Gibbs, ['Editorial Sustainability and Open Peer Review at Programming Historian',](https://web.archive.org/web/20180713014622/https://dhcommons.org/journal/issue-1/editorial-sustainability-and-open-peer-review-programming-historian) _DH Commons_, Vol. 1 (2015). +* Shawn Graham, Ian Milligan, and Scott Weingart, [_Exploring Big Historical Data: The Historian's Macroscope_](https://www.themacroscope.org/2.0/), (Imperial College Press, 2015). ## Relatórios @@ -53,7 +53,7 @@ A equipe do projeto e membros da comunidade em geral estão envolvidos em vária * Alex Wermer-Colan, ['Learning Digital Methods with the _Programming Historian_'](https://charlesstudy.temple.edu/event/11953011), Temple University [Em linha], (22 de fevereiro de 2024). * Carlo Blum, Adam Crymble, Vicky Garnett, Timothée Giraud, Alíz Horváth, Stefan Krebs, Ralph Marschall, Sofia Papastamkou, & Lorella Viola, 'Invisible College of Digital History: Workshop on Multilingual Educational Resources', C²DH [Em linha], (8 de novembro de 2023). * Nabeel Siddiqui, 'Convolutional Neural Networks for Image Classification', University of Edinburgh [Em linha], (7 de novembro de 2023). -* Eric Brasil, '[História Digital e História Digital da Educação: Caminhos Cruzados](http://www.iea.usp.br/eventos/historia-digital-educacao-caminhos-cruzados)', Instituto de Estudos Avançados, USP, São Paulo, Brasil, (17 de outubro de 2023). +* Eric Brasil, '[História Digital e História Digital da Educação: Caminhos Cruzados](https://www.iea.usp.br/eventos/historia-digital-educacao-caminhos-cruzados)', Instituto de Estudos Avançados, USP, São Paulo, Brasil, (17 de outubro de 2023). * Scott Kleinman, Alex Wermer-Colan, Joana Vieira Paulino, Nabeel Siddiqui, Zoe LeBlanc, 'Developing a Digital Humanities Tutorial', [DH 2023](https://dh2023.adho.org/), Graz, Áustria, (10 de julho de 2023). * Daphné Mathelier, 'Atelier Markdown', [11e journées du réseau Medici](https://medici2023.sciencesconf.org/resource/page/id/2), Université de Liège, Bélgica, (29 de junho de 2023). * María José Afanador Llach, Jennifer Isasi, Riva Quiroga, 'Sobre _Programming Historian en español_ y cómo contribuir a la publicación', Semana de Humanidades Digitales 2023 [Em linha], (10 de Maio de 2023). @@ -135,10 +135,10 @@ A equipe do projeto e membros da comunidade em geral estão envolvidos em vária * Adam Crymble, 'Facilitating Making in Digital Humanities', The Archaeology of Making, University of London, Reino Unido, 5 de Maio de 2021. * Daniel Alves, Jennifer Isasi, Sarah Melton, Sofia Papastamkou, Jessica Parr, Riva Quiroga, Nabeel Siddiqui, Brandon Walsh, '[The Programming Historian: A Global Case Study in Multilingual Open Access and DH Tutelage/Instruction](https://msuglobaldh.org/abstracts/#programming-historian)' (panel), _Global Digital Humanities Symposium_, Michigan State University, East Lansing, USA, 12 de Abril de 2021. * Jessica Parr, '[Cambridge Cultural Heritage Data School: Final plenary](https://www.cdh.cam.ac.uk/events/cambridge-cultural-heritage-data-school-final-plenary)', University of Cambridge, Reino Unido, 30 de Março de 2021. -* Jennifer Isasi & Riva Quiroga, ['_Programming Historian_: Un proyecto colaborativo para poner la programación al alcance de los humanistas'](http://ixa2.si.ehu.eus/intele/?q=webinars), _INTELE : INfraestructura de TEcnologías del LEnguaje_, España, 25 de Março de 2021. +* Jennifer Isasi & Riva Quiroga, ['_Programming Historian_: Un proyecto colaborativo para poner la programación al alcance de los humanistas'](https://ixa2.si.ehu.eus/intele/?q=webinars), _INTELE : INfraestructura de TEcnologías del LEnguaje_, España, 25 de Março de 2021. * Sofia Papastamkou, Jessica Parr & Riva Quiroga, 'Challenges for Digital Literacy in the Humanities: The Open, Community-Based and Multilinguistic Approach of _The Programming Historian_', NewsEye’s International Conference, Europe, 17 de Março de 2021. * Riva Quiroga, ['Multilingual Digital Humanites'](https://mediacentral.ucl.ac.uk/Play/59506), Digital Humanities Long View Seminar, UCLDH, UK & CESTA, USA, 10 de Março de 2021. -* Brandon Walsh, '[The Programming Historian and Editorial Process in Digital Publishing](http://walshbr.com/blog/the-programming-historian-and-editorial-process-in-digital-publishing/)', Modern Languages Association Conference 2021, 7-10 de Janeiro de 2021. +* Brandon Walsh, '[The Programming Historian and Editorial Process in Digital Publishing](https://walshbr.com/blog/the-programming-historian-and-editorial-process-in-digital-publishing/)', Modern Languages Association Conference 2021, 7-10 de Janeiro de 2021. * Sofia Papastamkou, François Dominic Laramée, Martin Grandjean, '[Le Programming Historian en français: quelles ressources éducatives libres pour les méthodes numériques ?](https://zenodo.org/record/3819954)', *Humanistica 2020 Conference*, Bordeaux, France, 12-14 de Maio de 2020. * Sofia Papastamkou, 'A Beating Heart of Digital History: The Programming Historian', [Teaching Digital History Workshop](https://cas.au.dk/en/cedhar/events/show/artikel/teaching-digital-history-workshop), Center for Digital History Aarhus, University of Aarhus, Denmark, 23 de Outubro de 2019. * Jennifer Isasi, Maria José Afanador y Antonio Rojas Castro, 'Retos en la producción de tutoriales de HD en contexto hispanohablantes', Conferencia ACH 2019, The Association for Computers and the Humanities, Pittsburgh, 23-26 de Julho de 2019, Pittsburgh. @@ -151,7 +151,7 @@ A equipe do projeto e membros da comunidade em geral estão envolvidos em vária * Victor Gayol, 'La investigación del pasado y la historia digital: análisis de datos y cómo aprender (The Programming Historian en español)', _Humanidades Digitales_, IV Feria Internacional de Ciencias Sociales y Humanidades, Centro Universitario de Los Lagos - Universidad de Guadalajara, Lagos de Moreno, Jalisco (9 de Março de 2017). * Victor Gayol, 'The Programming Historian: 'un modelo colaborativo para la investigación y la ensenñanza en ciencias sociales y humanidades digitales', _Mesa de Trabajo sobre Ciencias Sociales y Humanidades Digitales_, El Colegio De Michoacán, Mexico (21 de Fevereiro de 2017). * Adam Crymble, 'Bringing Digital Humanities into the University for Free', University of Cape Town, South Africa (27-28 de Junho de 2016). -* Fred Gibbs, 'The Programming Historian' (Poster), _American Historical Association_, New York (Janeiro de 2015). +* Fred Gibbs, 'The Programming Historian' (Poster), _American Historical Association_, New York (Janeiro de 2015). * Adam Crymble, 'The Programming Historian 2', _Digital History Seminar_, Institute of Historical Research, London (13 de Outubro de 2013). * Adam Crymble, 'The Programming Historian 2', _Digital Humanities 2012_, Hamburg (Julho de 2012). @@ -162,11 +162,11 @@ A equipe do projeto e membros da comunidade em geral estão envolvidos em vária * Matthew Lincoln, 'Multilingual Jekyll: How The Programming Historian Does That', *matthewlincoln.net*, 1 de Março de 2020, . * Sue Levine, 'The Early-Stage Ph.D.'s Guide to Summer', _Inside Higher Education_, 10 de Junho, 2019, . * 'Championing open access with online digital history journal', _University of Sussex Press Office_, 9 de Outubro, 2018, . -* Adam Crymble, 'A Decade of Programming Historians', _Network in Canadian History & Environment_, 23 de Março, 2018, . -* Fred Gibbs, "Sustainable Publishing: Reflections of a Former Programming Historian Editor", FredGibbs.net, 2017, . -* Anaclet Pons, "The Programming Historian en español", _Clionauta: Blog de historia_, 14 de Junho, 2017, . +* Adam Crymble, 'A Decade of Programming Historians', _Network in Canadian History & Environment_, 23 de Março, 2018, . +* Fred Gibbs, "Sustainable Publishing: Reflections of a Former Programming Historian Editor", FredGibbs.net, 2017, . +* Anaclet Pons, "The Programming Historian en español", _Clionauta: Blog de historia_, 14 de Junho, 2017, . * Seth Denbo, “Historian, Program! Self-Help for Digital Neophytes,” _Perspectives on History: The Newsmagazine of the American Historical Association_, Maio 2017, . -* Víctor Gayol, '*The Programming Historian* en español', *Blog de Humanidades Digitales*, 17 de Março, 2017, . +* Víctor Gayol, '*The Programming Historian* en español', *Blog de Humanidades Digitales*, 17 de Março, 2017, . ## Projetos que utilizam o *Programming Historian* diff --git a/pt/ppi.md b/pt/ppi.md index 1f9479c90e..3a0eaf0e86 100644 --- a/pt/ppi.md +++ b/pt/ppi.md @@ -52,7 +52,7 @@ Ao aderir ao Programa de Parceiros Institucionais receberá os seguintes benefí
    - + diff --git a/pt/sobre.md b/pt/sobre.md index ca0553ca9a..634e2192e0 100755 --- a/pt/sobre.md +++ b/pt/sobre.md @@ -14,7 +14,7 @@ O processo de revisão é um componente essencial de um esforço colaborativo, p ## Código Aberto -A equipe do _Programming Historian em Português_ está comprometida com os princípios do código aberto. Sempre que possível, todas as lições apresentadas usam linguagens de programação e software de código aberto. Esta política visa minimizar custos para todas as partes envolvidas e permitir o mais amplo nível de participação. Acreditamos que todos devem se beneficiar destes tutoriais, não apenas aqueles que têm acesso a orçamentos de pesquisa elevados para software proprietário. Desde 2016, tem sido depositada no [Zenodo](https://zenodo.org/) uma versão citável do projeto _The Programming Historian_. A cópia de 2022 está disponível em [doi.org/10.5281/zenodo.7313045](https://doi.org/10.5281/zenodo.7313045). Desde 2018, o [UK Web Archive](https://www.webarchive.org.uk/) faz rastreamentos regulares ao Programming Historian. Estes são arquivados e disponibilizados ao público no seu [website](https://www.webarchive.org.uk/wayback/en/archive/*/http://programminghistorian.org/). +A equipe do _Programming Historian em Português_ está comprometida com os princípios do código aberto. Sempre que possível, todas as lições apresentadas usam linguagens de programação e software de código aberto. Esta política visa minimizar custos para todas as partes envolvidas e permitir o mais amplo nível de participação. Acreditamos que todos devem se beneficiar destes tutoriais, não apenas aqueles que têm acesso a orçamentos de pesquisa elevados para software proprietário. Desde 2016, tem sido depositada no [Zenodo](https://zenodo.org/) uma versão citável do projeto _The Programming Historian_. A cópia de 2022 está disponível em [doi.org/10.5281/zenodo.7313045](https://doi.org/10.5281/zenodo.7313045). Desde 2018, o [UK Web Archive](https://www.webarchive.org.uk/) faz rastreamentos regulares ao Programming Historian. Estes são arquivados e disponibilizados ao público no seu [website](https://www.webarchive.org.uk/wayback/en/archive/*/https://programminghistorian.org/). ## Acesso Aberto _Diamante_ @@ -25,7 +25,7 @@ Não cobramos taxas de processamento de artigos (APCs) ou assinaturas para bibli O _Programming Historian em Português_ (ISSN {{ site.data.snippets.issn[page.lang] }}) está indexado no [Directory of Open Access Journals](https://doaj.org/toc/2397-2068). ## Prémios -O _Programming Historian_ ganhou vários prémios que reconhecem as suas conquistas nas esferas das publicações em acesso aberto e da pesquisa digital. Em 2016 a nossa versão em inglês ganhou o [Digital Humanities Awards](http://dhawards.org/dhawards2016/results/) na categoria de _Best Series of Posts_. No ano seguinte, em 2017, o _Programming Historian en español_ ganhou o mesmo louvor e, no ano seguinte, venceu a 'Mejor iniciativa formativa desarrollada durante el año 2018', [Humanidades Digitales Hispánicas Association](http://humanidadesdigitaleshispanicas.es/). Ganhámos o Canadian Social Knowledge Institute's Open Scholarship Award em 2020 e, em 2021, foi-nos atribuído o Coko Foundation's Open Publishing Award na categoria _Open Content_. Em 2022, ganhámos a categoria de Melhor Material de Formação de DH do [Digital Humanities Awards](http://dhawards.org/dhawards2022/results/). +O _Programming Historian_ ganhou vários prémios que reconhecem as suas conquistas nas esferas das publicações em acesso aberto e da pesquisa digital. Em 2016 a nossa versão em inglês ganhou o [Digital Humanities Awards](https://dhawards.org/dhawards2016/results/) na categoria de _Best Series of Posts_. No ano seguinte, em 2017, o _Programming Historian en español_ ganhou o mesmo louvor e, no ano seguinte, venceu a 'Mejor iniciativa formativa desarrollada durante el año 2018', [Humanidades Digitales Hispánicas Association](https://humanidadesdigitaleshispanicas.es/). Ganhámos o Canadian Social Knowledge Institute's Open Scholarship Award em 2020 e, em 2021, foi-nos atribuído o Coko Foundation's Open Publishing Award na categoria _Open Content_. Em 2022, ganhámos a categoria de Melhor Material de Formação de DH do [Digital Humanities Awards](https://dhawards.org/dhawards2022/results/). ## Política de Diversidade @@ -40,4 +40,4 @@ Veja a página ['Apoie o projeto']({{site.baseurl}}/pt/apoie-nos) para uma lista ## História do Projeto -O _Programming Historian_ foi fundado em 2008 por William J. Turkel e Alan MacEachern. Na altura, Turkel publicou uma entrada no [blog](http://digitalhistoryhacks.blogspot.com/2008/01/programming-historian.html), definindo as suas intenções para o projeto. Inicialmente concentrou-se na linguagem de programação Python e foi publicado em acesso aberto como um projeto de 'infraestrutura digital' da Network in Canadian History & Environment (NiCHE). Em 2012, o Programming Historian expandiu a sua equipe editorial e tornou-se numa revista académica de acesso aberto com revisão por pares sobre metodologia para historiadores digitais. Em 2016, adicionámos uma versão em espanhol à publicação inicial em inglês e, em 2017, começámos a publicar lições traduzidas sob o título _[Programming Historian en español]({{site.baseurl}}/es)_. Em 2018, [organizámos o nosso primeiro workshop de escrita em espanhol](/posts/bogota-workshop-report), publicámos uma chamada para [a contribuição de novas lições em espanhol](/posts/convocatoria-de-tutoriales) e iniciámos um plano para traduzir lições de espanhol para inglês. No mesmo ano adicionámos uma versão em francês e em 2019 lançámos o _[Programming Historian en français]({{site.baseurl}}/fr)_. Em 2021, adicionámos uma [versão em português]({{site.baseurl}}/pt). +O _Programming Historian_ foi fundado em 2008 por William J. Turkel e Alan MacEachern. Na altura, Turkel publicou uma entrada no [blog](https://digitalhistoryhacks.blogspot.com/2008/01/programming-historian.html), definindo as suas intenções para o projeto. Inicialmente concentrou-se na linguagem de programação Python e foi publicado em acesso aberto como um projeto de 'infraestrutura digital' da Network in Canadian History & Environment (NiCHE). Em 2012, o Programming Historian expandiu a sua equipe editorial e tornou-se numa revista académica de acesso aberto com revisão por pares sobre metodologia para historiadores digitais. Em 2016, adicionámos uma versão em espanhol à publicação inicial em inglês e, em 2017, começámos a publicar lições traduzidas sob o título _[Programming Historian en español]({{site.baseurl}}/es)_. Em 2018, [organizámos o nosso primeiro workshop de escrita em espanhol](/posts/bogota-workshop-report), publicámos uma chamada para [a contribuição de novas lições em espanhol](/posts/convocatoria-de-tutoriales) e iniciámos um plano para traduzir lições de espanhol para inglês. No mesmo ano adicionámos uma versão em francês e em 2019 lançámos o _[Programming Historian en français]({{site.baseurl}}/fr)_. Em 2021, adicionámos uma [versão em português]({{site.baseurl}}/pt). diff --git a/translation-concordance.md b/translation-concordance.md index d2a9fcb51b..dc3b78bc97 100644 --- a/translation-concordance.md +++ b/translation-concordance.md @@ -20,7 +20,11 @@ An automatically-generated list of page translation relationships across our pub
    {% for l in site.data.snippets.language-list %} {% assign sp = page_versions | where: "lang", l | first %} - {{ sp.title }} + + {% if sp %} + {{ sp.title }} + {% endif %} + {% endfor %}
    {% endfor %} @@ -41,7 +45,11 @@ An automatically-generated list of page translation relationships across our pub
    {% for l in site.data.snippets.language-list %} {% assign sp = page_versions | where: "lang", l | first %} - {{ sp.title }} + + {% if sp %} + {{ sp.title }} + {% endif %} + {% endfor %}
    {% endfor %} diff --git a/troubleshooting.md b/troubleshooting.md index c5c27c63ad..66d95d7c51 100644 --- a/troubleshooting.md +++ b/troubleshooting.md @@ -136,30 +136,30 @@ HTML learning. Other programming languages have equally valuable sets of introductory texts and websites which you can find online. - [Python for - Non-programmers](http://wiki.python.org/moin/BeginnersGuide/NonProgrammers) -- [LearnPython.org](http://learnpython.org/) This tutorial offers + Non-programmers](https://wiki.python.org/moin/BeginnersGuide/NonProgrammers) +- [LearnPython.org](https://learnpython.org/) This tutorial offers in-browser coding windows. - [Non-Programmer's Tutorial for Python 2.6](https://en.wikibooks.org/wiki/Non-Programmer's_Tutorial_for_Python_2.6) - [W3 Schools HTML - Tutorial](http://www.w3schools.com/html/default.asp) + Tutorial](https://www.w3schools.com/html/default.asp) As you proceed (or if you already have some programming experience) you'll probably prefer more general references like: - [Python for - Programmers](http://wiki.python.org/moin/BeginnersGuide/Programmers) -- [Python documentation page](http://docs.python.org/) + Programmers](https://wiki.python.org/moin/BeginnersGuide/Programmers) +- [Python documentation page](https://docs.python.org/) - [Python tutorial](https://docs.python.org/3/tutorial/index.html) - [Python library reference](https://docs.python.org/3/library/index.html) -- Pilgrim, [Dive into Python](http://www.diveintopython.net) +- Pilgrim, [Dive into Python](https://www.diveintopython.net) We also like to have a few printed books ready-to-hand, especially -- Lutz, *[Learning Python](http://www.worldcat.org/oclc/156890981)* -- Lutz, *[Programming Python](http://www.worldcat.org/oclc/65765375)* +- Lutz, *[Learning Python](https://www.worldcat.org/oclc/156890981)* +- Lutz, *[Programming Python](https://www.worldcat.org/oclc/65765375)* - Martelli, Ravenscroft and Ascher, *[Python - Cookbook](http://www.worldcat.org/oclc/59007845)* + Cookbook](https://www.worldcat.org/oclc/59007845)* Bring On The Code! ------------------ @@ -167,6 +167,6 @@ Bring On The Code! Now that you have Step One (Troubleshooting) mastered, you are ready to dive in. Let's get programming. - [Stack Overflow]: http://stackoverflow.com/ - [Tutor]: http://mail.python.org/mailman/listinfo/tutor - [FAQ page]: http://web.archive.org/web/20130101093828/http://stackoverflow.com/faq + [Stack Overflow]: https://stackoverflow.com/ + [Tutor]: https://mail.python.org/mailman/listinfo/tutor + [FAQ page]: https://web.archive.org/web/20130101093828/https://stackoverflow.com/faq From 49ac5054a109223a9c2a1368af4a16b2e74180e8 Mon Sep 17 00:00:00 2001 From: ZoeLeBlanc Date: Thu, 15 May 2025 12:51:22 -0500 Subject: [PATCH 7/7] adding notebook for fixing borken links --- FixingLinksNotebook.ipynb | 568 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 568 insertions(+) create mode 100644 FixingLinksNotebook.ipynb diff --git a/FixingLinksNotebook.ipynb b/FixingLinksNotebook.ipynb new file mode 100644 index 0000000000..458df8c9ff --- /dev/null +++ b/FixingLinksNotebook.ipynb @@ -0,0 +1,568 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "61f40e40", + "metadata": {}, + "source": [ + "# Fixing Links Notebook" + ] + }, + { + "cell_type": "markdown", + "id": "fac6fe6c", + "metadata": {}, + "source": [ + "Before running this notebook, make sure to run the following command in the terminal to install the required packages:\n", + "\n", + "```bash\n", + "bundle install\n", + "bundle exec jekyll build\n", + "bundle exec htmlproofer _site > htmlproofer-output.txt 2>&1\n", + "ruby parse_htmlproofer_log.rb \n", + "```\n", + "\n", + "Each command should be run separately and the final two commands create files for all the htmlproofer errors and warnings. This notebook loads the final csv file to help you see what links exists. You will also need to install the `pandas` library if you haven't already. You can do this by running:\n", + "\n", + "```bash\n", + "pip install pandas\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "ae22f5c9", + "metadata": {}, + "source": [ + "## Load Libraries and Data" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "7106f439", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "6f0bb84a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of errors: 4405\n" + ] + } + ], + "source": [ + "df = pd.read_csv(\"htmlproofer-report.csv\")\n", + "# Lower case the column names\n", + "df.columns = df.columns.str.lower()\n", + "print(f\"Number of errors: {len(df)}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "f7e9ea41", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "message\n", + "'a' tag is missing a reference 2190\n", + "External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) 491\n", + "internal image i/genericThumb.jpg does not exist 10\n", + "External link https://twitter.com/jenniferisve failed (status code 400) 9\n", + "External link https://twitter.com/rivaquiroga failed (status code 400) 9\n", + " ... \n", + "External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/project-team.md failed (status code 429) 1\n", + "External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/research.md failed (status code 429) 1\n", + "External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/reviewer-guidelines.md failed (status code 429) 1\n", + "External link https://github.com/programminghistorian/jekyll/commits/gh-pages/en/supporters.md failed (status code 429) 1\n", + "script is empty and has no src attribute 1\n", + "Name: count, Length: 1268, dtype: int64" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.message.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "7fbda16b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
    \n", + "\n", + "
    \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    filecountcount_index
    0_site/es/lecciones/sitios-estaticos-con-jekyll...850
    1_site/en/lessons/building-static-sites-with-je...851
    2_site/assets/from-html-to-list-of-words-1/obo-...522
    3_site/assets/normaliser-donnees-textuelles-pyt...523
    4_site/fr/equipe-projet.html334
    ............
    498_site/assets/mapping-with-python-leaflet/exerc...2498
    499_site/assets/mapping-with-python-leaflet/exerc...2499
    500_site/assets/mapping-with-python-leaflet/exerc...2500
    501_site/assets/sustainable-authorship-in-plain-t...1501
    502_site/assets/interactive-text-games-using-twin...1502
    \n", + "

    503 rows × 3 columns

    \n", + "
    " + ], + "text/plain": [ + " file count count_index\n", + "0 _site/es/lecciones/sitios-estaticos-con-jekyll... 85 0\n", + "1 _site/en/lessons/building-static-sites-with-je... 85 1\n", + "2 _site/assets/from-html-to-list-of-words-1/obo-... 52 2\n", + "3 _site/assets/normaliser-donnees-textuelles-pyt... 52 3\n", + "4 _site/fr/equipe-projet.html 33 4\n", + ".. ... ... ...\n", + "498 _site/assets/mapping-with-python-leaflet/exerc... 2 498\n", + "499 _site/assets/mapping-with-python-leaflet/exerc... 2 499\n", + "500 _site/assets/mapping-with-python-leaflet/exerc... 2 500\n", + "501 _site/assets/sustainable-authorship-in-plain-t... 1 501\n", + "502 _site/assets/interactive-text-games-using-twin... 1 502\n", + "\n", + "[503 rows x 3 columns]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "file_counts_df = df.file.value_counts().reset_index()\n", + "file_counts_df['count_index'] = file_counts_df.index\n", + "\n", + "file_counts_df" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "6352df82", + "metadata": {}, + "outputs": [], + "source": [ + "merged_df = df.merge(file_counts_df, on='file', how='outer').sort_values(by=\"count_index\", ascending=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "05882ea5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "message\n", + "'a' tag is missing a reference 78\n", + "internally linking to #sectionwindows; the file exists, but the hash 'sectionwindows' does not 1\n", + "internally linking to #section1-9; the file exists, but the hash 'section1-9' does not 1\n", + "External link https://github.com/programminghistorian/jekyll/commits/gh-pages/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.md failed (status code 429) 1\n", + "External link https://github.com/programminghistorian/jekyll/commits/gh-pages failed (status code 429) 1\n", + "External link https://jekyll-windows.juthilo.com/ failed with something very wrong. 1\n", + "External link https://jekyllthemes.org/ failed with something very wrong. 1\n", + "External link https://github.com/programminghistorian/ph-submissions/issues/303 failed (status code 429) 1\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "merged_df[merged_df.file.str.contains(\"_site/es/lecciones/sitios-estaticos-con-jekyll\", na=False)].message.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "0857ae8b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "file\n", + "_site/en/lessons/building-static-sites-with-jekyll-github-pages.html 80\n", + "_site/es/lecciones/sitios-estaticos-con-jekyll-y-github-pages.html 78\n", + "_site/pt/licoes/som-dados-sonificacao-historiadores.html 17\n", + "_site/en/lessons/sonification.html 17\n", + "_site/assets/from-html-to-list-of-words-1/obo-t17800628-33.html 12\n", + " ..\n", + "_site/es/lecciones/introduccion-a-bash.html 4\n", + "_site/en/lessons/clustering-visualizing-word-embeddings.html 4\n", + "_site/es/lecciones/introduccion-a-imageplot-y-la-visualizacion-de-metadatos.html 4\n", + "_site/es/lecciones/introduccion-a-markdown.html 4\n", + "_site/en/vacancies.html 4\n", + "Name: count, Length: 493, dtype: int64" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "merged_df[merged_df.message == \"'a' tag is missing a reference\"].file.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "03ad4a55", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
    \n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    filelinemessagecountcount_index
    413_site/en/lessons/building-static-sites-with-je...533'a' tag is missing a reference851
    412_site/en/lessons/building-static-sites-with-je...532'a' tag is missing a reference851
    411_site/en/lessons/building-static-sites-with-je...531'a' tag is missing a reference851
    410_site/en/lessons/building-static-sites-with-je...530'a' tag is missing a reference851
    404_site/en/lessons/building-static-sites-with-je...520'a' tag is missing a reference851
    ..................
    435_site/en/lessons/building-static-sites-with-je...706'a' tag is missing a reference851
    436_site/en/lessons/building-static-sites-with-je...716'a' tag is missing a reference851
    437_site/en/lessons/building-static-sites-with-je...778'a' tag is missing a reference851
    439_site/en/lessons/building-static-sites-with-je...823'a' tag is missing a reference851
    438_site/en/lessons/building-static-sites-with-je...803'a' tag is missing a reference851
    \n", + "

    85 rows × 5 columns

    \n", + "
    " + ], + "text/plain": [ + " file line \\\n", + "413 _site/en/lessons/building-static-sites-with-je... 533 \n", + "412 _site/en/lessons/building-static-sites-with-je... 532 \n", + "411 _site/en/lessons/building-static-sites-with-je... 531 \n", + "410 _site/en/lessons/building-static-sites-with-je... 530 \n", + "404 _site/en/lessons/building-static-sites-with-je... 520 \n", + ".. ... ... \n", + "435 _site/en/lessons/building-static-sites-with-je... 706 \n", + "436 _site/en/lessons/building-static-sites-with-je... 716 \n", + "437 _site/en/lessons/building-static-sites-with-je... 778 \n", + "439 _site/en/lessons/building-static-sites-with-je... 823 \n", + "438 _site/en/lessons/building-static-sites-with-je... 803 \n", + "\n", + " message count count_index \n", + "413 'a' tag is missing a reference 85 1 \n", + "412 'a' tag is missing a reference 85 1 \n", + "411 'a' tag is missing a reference 85 1 \n", + "410 'a' tag is missing a reference 85 1 \n", + "404 'a' tag is missing a reference 85 1 \n", + ".. ... ... ... \n", + "435 'a' tag is missing a reference 85 1 \n", + "436 'a' tag is missing a reference 85 1 \n", + "437 'a' tag is missing a reference 85 1 \n", + "439 'a' tag is missing a reference 85 1 \n", + "438 'a' tag is missing a reference 85 1 \n", + "\n", + "[85 rows x 5 columns]" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "merged_df[merged_df.file.str.contains(\"_site/en/lessons/building-static-sites-with-jekyll-github-pages\", na=False)]" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "95f87012", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "✅ Updated: ./_data/ph_authors.yml\n" + ] + } + ], + "source": [ + "import os\n", + "import re\n", + "\n", + "EXTENSIONS = (\".yml\")\n", + "\n", + "def replace_links_preserving_code_blocks(file_path):\n", + " with open(file_path, \"r\", encoding=\"utf-8\") as f:\n", + " content = f.read()\n", + "\n", + " # Match code blocks (triple backticks) and inline code (`...`)\n", + " code_blocks = list(re.finditer(r\"(```.*?```|`[^`]*`)\", content, re.DOTALL))\n", + " modified = content\n", + " offset = 0\n", + "\n", + " for match in code_blocks:\n", + " start, end = match.span()\n", + " segment = content[start:end]\n", + "\n", + " # Temporarily mark this section to skip\n", + " placeholder = f\"%%CODEBLOCK{start}%%\"\n", + " modified = modified[:start + offset] + placeholder + modified[end + offset:]\n", + " offset += len(placeholder) - (end - start)\n", + "\n", + " # Replace all http:// with https://\n", + " modified = re.sub(r\"http://\", \"https://\", modified)\n", + "\n", + " # Restore code blocks untouched\n", + " for match in code_blocks:\n", + " start = match.start()\n", + " placeholder = f\"%%CODEBLOCK{start}%%\"\n", + " modified = modified.replace(placeholder, match.group(0))\n", + "\n", + " if content != modified:\n", + " print(f\"✅ Updated: {file_path}\")\n", + " with open(file_path, \"w\", encoding=\"utf-8\") as f:\n", + " f.write(modified)\n", + "\n", + "def process_all_files(root=\".\"):\n", + " for dirpath, _, filenames in os.walk(root):\n", + " for fname in filenames:\n", + " if fname.endswith(EXTENSIONS) and \"ph_authors\" in fname:\n", + " replace_links_preserving_code_blocks(os.path.join(dirpath, fname))\n", + "\n", + "process_all_files()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "66f11201", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "wizened-old-wizard-lab-env", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}