Skip to content

Commit

Permalink
extractors/instagram: Add support
Browse files Browse the repository at this point in the history
  • Loading branch information
iawia002 committed Apr 19, 2018
1 parent bffc789 commit dcd9a65
Show file tree
Hide file tree
Showing 5 changed files with 174 additions and 2 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,7 @@ Facebook | <https://facebook.com> | ✓ | | | |
斗鱼视频 | <https://v.douyu.com> | ✓ | | | |
秒拍 | <https://www.miaopai.com> | ✓ | | | |
微博 | <https://weibo.com> | ✓ | | | |
Instagram | <https://www.instagram.com> | ✓ | ✓ | | |


## Known issues
Expand Down
123 changes: 123 additions & 0 deletions extractors/instagram.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
package extractors

import (
"encoding/json"

"github.com/iawia002/annie/downloader"
"github.com/iawia002/annie/parser"
"github.com/iawia002/annie/request"
"github.com/iawia002/annie/utils"
)

type instagramEdgeNode struct {
DisplayURL string `json:"display_url"`
}

type instagramEdges struct {
Node instagramEdgeNode `json:"node"`
}

type instagramEdgeSidecar struct {
Edges []instagramEdges `json:"edges"`
}

type instagramShortcodeMedia struct {
DisplayURL string `json:"display_url"`
VideoURL string `json:"video_url"`
EdgeSidecar instagramEdgeSidecar `json:"edge_sidecar_to_children"`
}

type instagramGraphql struct {
ShortcodeMedia instagramShortcodeMedia `json:"shortcode_media"`
}

type instagramPostPage struct {
Graphql instagramGraphql `json:"graphql"`
}

type instagramEntryData struct {
PostPage []instagramPostPage `json:"PostPage"`
}

type instagram struct {
EntryData instagramEntryData `json:"entry_data"`
}

// Instagram download function
func Instagram(url string) downloader.VideoData {
html := request.Get(url, url)
// get the title
doc := parser.GetDoc(html)
title := parser.Title(doc)

dataString := utils.MatchOneOf(html, `window\._sharedData\s*=\s*(.*);`)[1]
var data instagram
json.Unmarshal([]byte(dataString), &data)

var realURL, dataType string
var size int64
format := map[string]downloader.FormatData{}

if data.EntryData.PostPage[0].Graphql.ShortcodeMedia.VideoURL != "" {
// Video
dataType = "video"
realURL = data.EntryData.PostPage[0].Graphql.ShortcodeMedia.VideoURL
size = request.Size(realURL, url)
format["default"] = downloader.FormatData{
URLs: []downloader.URLData{
{
URL: realURL,
Size: size,
Ext: "mp4",
},
},
Size: size,
}
} else {
// Image
dataType = "image"
if data.EntryData.PostPage[0].Graphql.ShortcodeMedia.EdgeSidecar.Edges == nil {
// Single
realURL = data.EntryData.PostPage[0].Graphql.ShortcodeMedia.DisplayURL
size = request.Size(realURL, url)
format["default"] = downloader.FormatData{
URLs: []downloader.URLData{
{
URL: realURL,
Size: size,
Ext: "jpg",
},
},
Size: size,
}
} else {
// Album
var totalSize int64
var urls []downloader.URLData
for _, u := range data.EntryData.PostPage[0].Graphql.ShortcodeMedia.EdgeSidecar.Edges {
realURL = u.Node.DisplayURL
size = request.Size(realURL, url)
urlData := downloader.URLData{
URL: realURL,
Size: size,
Ext: "jpg",
}
urls = append(urls, urlData)
totalSize += size
}
format["default"] = downloader.FormatData{
URLs: urls,
Size: totalSize,
}
}
}

extractedData := downloader.VideoData{
Site: "Instagram instagram.com",
Title: utils.FileName(title),
Type: dataType,
Formats: format,
}
extractedData.Download(url)
return extractedData
}
47 changes: 47 additions & 0 deletions extractors/instagram_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
package extractors

import (
"testing"

"github.com/iawia002/annie/config"
"github.com/iawia002/annie/test"
)

func TestInstagram(t *testing.T) {
config.InfoOnly = true
tests := []struct {
name string
args test.Args
}{
{
name: "Video",
args: test.Args{
URL: "https://www.instagram.com/p/BYQ0PMWlAQY",
Title: "王薇雅🇨🇳🇺🇸 on Instagram:“我的Ins是用来分享#lifestyle 一些正能量健身旅游等,请那些负能量离我远点!谢谢😀😀BTW,我从来不否认我P图微调,谁都想展现自己最完美的一面在网上.不要再给我粉丝私信黑我了,那么空能不能多读书看报增加些涵养🙂🙂🙂”",
Size: 1469037,
},
},
{
name: "Image Single",
args: test.Args{
URL: "https://www.instagram.com/p/Bei7whzgfMq",
Title: "王薇雅🇨🇳🇺🇸 on Instagram:“Let go of what u can no longer keep. Protect what’s still worth keeping. ✨✨✨”",
Size: 144348,
},
},
{
name: "Image Album",
args: test.Args{
URL: "https://www.instagram.com/p/BdZ7sPTgchP",
Title: "王薇雅🇨🇳🇺🇸 on Instagram:“2018的第一餐,吃得很满足🐷#happynewyear #🎆 #🎊”",
Size: 10353828,
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
data := Instagram(tt.args.URL)
test.Check(t, tt.args, data)
})
}
}
2 changes: 2 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@ func download(videoURL string) {
extractors.Miaopai(videoURL)
case "weibo":
extractors.Weibo(videoURL)
case "instagram":
extractors.Instagram(videoURL)
default:
extractors.Universal(videoURL)
}
Expand Down
3 changes: 1 addition & 2 deletions utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,7 @@ func Domain(url string) string {

// FileName Converts a string to a valid filename
func FileName(name string) string {
// FIXME(iawia002) file name can't have /
rep := strings.NewReplacer("/", " ", "|", "-", ": ", ":", ":", ":")
rep := strings.NewReplacer("\n", " ", "/", " ", "|", "-", ": ", ":", ":", ":")
name = rep.Replace(name)
if runtime.GOOS == "windows" {
rep = strings.NewReplacer("\"", " ", "?", " ", "*", " ", "\\", " ", "<", " ", ">", " ")
Expand Down

0 comments on commit dcd9a65

Please sign in to comment.