Skip to content

Commit

Permalink
Some tests and TikTok parsing fixed (#127)
Browse files Browse the repository at this point in the history
  • Loading branch information
soxoj authored May 2, 2022

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
1 parent 5a8f98f commit b271ce6
Showing 5 changed files with 46 additions and 41 deletions.
10 changes: 5 additions & 5 deletions METHODS.md
Original file line number Diff line number Diff line change
@@ -27,8 +27,8 @@
22 | Yandex messenger profile API | | |
23 | Yandex Bugbounty user profile | | |
24 | Yandex O | [yandex_o_user_profile](https://github.com/soxoj/socid-extractor/search?q=test_yandex_o_user_profile) | |
25 | VK user profile foaf page | [vk_foaf](https://github.com/soxoj/socid-extractor/search?q=test_vk_foaf), [vk_user_profile_no_username](https://github.com/soxoj/socid-extractor/search?q=test_vk_user_profile_no_username) | , |
26 | VK user profile | [vk_blocked_user_profile](https://github.com/soxoj/socid-extractor/search?q=test_vk_blocked_user_profile), [vk_closed_user_profile](https://github.com/soxoj/socid-extractor/search?q=test_vk_closed_user_profile), [vk_user_profile_full](https://github.com/soxoj/socid-extractor/search?q=test_vk_user_profile_full), [vk_user_profile_no_username](https://github.com/soxoj/socid-extractor/search?q=test_vk_user_profile_no_username) | , , , |
25 | VK user profile foaf page | [vk_foaf](https://github.com/soxoj/socid-extractor/search?q=test_vk_foaf), [vk_user_profile_no_username](https://github.com/soxoj/socid-extractor/search?q=test_vk_user_profile_no_username) | |
26 | VK user profile | [vk_blocked_user_profile](https://github.com/soxoj/socid-extractor/search?q=test_vk_blocked_user_profile), [vk_closed_user_profile](https://github.com/soxoj/socid-extractor/search?q=test_vk_closed_user_profile), [vk_user_profile_full](https://github.com/soxoj/socid-extractor/search?q=test_vk_user_profile_full), [vk_user_profile_no_username](https://github.com/soxoj/socid-extractor/search?q=test_vk_user_profile_no_username) | |
27 | VK closed user profile | | |
28 | VK blocked user profile | | |
29 | Gravatar | [gravatar](https://github.com/soxoj/socid-extractor/search?q=test_gravatar) | |
@@ -44,7 +44,7 @@
39 | My Mail.ru | | |
40 | Behance | [behance](https://github.com/soxoj/socid-extractor/search?q=test_behance) | |
41 | Blogger | [blogger](https://github.com/soxoj/socid-extractor/search?q=test_blogger) | Failed in GitHub CI |
42 | D3.ru | | |
42 | D3.ru | [d3](https://github.com/soxoj/socid-extractor/search?q=test_d3) | |
43 | Gitlab | | |
44 | 500px GraphQL API | [500px](https://github.com/soxoj/socid-extractor/search?q=test_500px) | non-actual, 500px requires POST requests for now |
45 | Google Document API | [google_documents](https://github.com/soxoj/socid-extractor/search?q=test_google_documents) | |
@@ -87,7 +87,7 @@
82 | uID.me | | |
83 | tapd | [tapd](https://github.com/soxoj/socid-extractor/search?q=test_tapd) | |
84 | freelancer.com | | |
85 | Yelp | [yelp_userid](https://github.com/soxoj/socid-extractor/search?q=test_yelp_userid), [yelp_username](https://github.com/soxoj/socid-extractor/search?q=test_yelp_username) | , |
85 | Yelp | [yelp_userid](https://github.com/soxoj/socid-extractor/search?q=test_yelp_userid), [yelp_username](https://github.com/soxoj/socid-extractor/search?q=test_yelp_username) | |
86 | Trello API | [trello](https://github.com/soxoj/socid-extractor/search?q=test_trello) | |
87 | Weibo | [weibo](https://github.com/soxoj/socid-extractor/search?q=test_weibo) | cookies are required to get content, requests from GitHub Actions CI servers are blocked |
88 | ICQ | [icq](https://github.com/soxoj/socid-extractor/search?q=test_icq) | |
@@ -105,4 +105,4 @@
100 | pr0gramm API | [pr0gramm_api](https://github.com/soxoj/socid-extractor/search?q=test_pr0gramm_api) | |
101 | Aparat API | [aparat_api](https://github.com/soxoj/socid-extractor/search?q=test_aparat_api) | |

The table was updated at 2022-01-06 17:00:56.328459 UTC
The table has been updated at 2022-05-02 16:48:12.900875 UTC
1 change: 1 addition & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
[pytest]
asyncio_mode = auto
markers =
github_failed: requests from GitHub Actions CI servers are blocked
rate_limited: anti-bot / captcha / rate limiting from the site
4 changes: 2 additions & 2 deletions revision.py
Original file line number Diff line number Diff line change
@@ -92,13 +92,13 @@ def revision():
f'[{t["test"]}]({test_url_search.format(t["test"])})'
for t in tests
]
notes = [", ".join(t['notes']) for t in tests]
notes = [", ".join(t['notes']) for t in tests if t['notes']]
if not tests:
methods_without_tests.append(m)

f.write(f'{i} | {m} | {", ".join(tests_links)} | {", ".join(notes)} |\n')

f.write(f'\nThe table was updated at {datetime.utcnow()} UTC\n')
f.write(f'\nThe table has been updated at {datetime.utcnow()} UTC\n')

print(f'Total {len(m)} methods, {len(methods_without_tests)} without tests: ')
print(methods_without_tests)
44 changes: 23 additions & 21 deletions socid_extractor/schemes.py
Original file line number Diff line number Diff line change
@@ -632,6 +632,8 @@
'is_blocking': lambda x: x.get('isBlocking'),
'is_muting': lambda x: x.get('isMuting'),
'post_counts': lambda x: x.get('userPostCounts'),
'follower_count': lambda x: x.get('socialStats', {}).get('followerCount'),
'following_count': lambda x: x.get('socialStats', {}).get('followingCount'),
}
},
'Odnoklassniki': {
@@ -991,28 +993,28 @@
}
},
'TikTok': {
'flags': ['tiktokcdn.com', '__NEXT_DATA__'],
'regex': r'<script id="__NEXT_DATA__"[^>]+>(.+?)</script>',
'flags': ['tiktokcdn.com', 'SIGI_STATE'],
'regex': r'<script id="SIGI_STATE"[^>]+>(.+?)</script>',
'extract_json': True,
'transforms': [
json.loads,
lambda x: x['props']['pageProps'].get('userInfo', {}),
lambda x: {**x['UserModule']['users'].get(x['UserPage']['uniqueId'], {}), **x['UserModule']['stats'].get(x['UserPage']['uniqueId'], {})},
json.dumps,
],
'fields': {
'tiktok_id': lambda x: x['user']['id'],
'tiktok_username': lambda x: x['user']['uniqueId'],
'fullname': lambda x: x['user']['nickname'],
'bio': lambda x: x['user']['signature'],
'image': lambda x: x['user']['avatarMedium'],
'is_verified': lambda x: x['user']['verified'],
'is_secret': lambda x: x['user']['secret'],
'sec_uid': lambda x: x['user']['secUid'],
'following_count': lambda x: x['stats']['followingCount'],
'follower_count': lambda x: x['stats']['followerCount'],
'heart_count': lambda x: x['stats']['heartCount'],
'video_count': lambda x: x['stats']['videoCount'],
'digg_count': lambda x: x['stats']['diggCount'],
'tiktok_id': lambda x: x['id'],
'tiktok_username': lambda x: x['uniqueId'],
'fullname': lambda x: x['nickname'],
'bio': lambda x: x['signature'],
'image': lambda x: x['avatarMedium'],
'is_verified': lambda x: x['verified'],
'is_secret': lambda x: x['secret'],
'sec_uid': lambda x: x['secUid'],
'following_count': lambda x: x['followingCount'],
'follower_count': lambda x: x['followerCount'],
'heart_count': lambda x: x['heartCount'],
'video_count': lambda x: x['videoCount'],
'digg_count': lambda x: x['diggCount'],
}
},
'VC.ru': {
@@ -1266,11 +1268,11 @@
'flags': [' | ASKfm</title>'],
'bs': True,
'fields': {
'username': lambda x: x.find('span', {'class': 'userName_wrap'}).find('span', {'class': 'userName'}).contents[0].lstrip('@'),
'fullname': lambda x: x.find('h1', {'class': 'userName_status'}).find('span', {'class': 'userName'}).contents[0].lstrip('@'),
'posts_count': lambda x: x.find('div', {'class': 'profileStats_number profileTabAnswerCount'}).contents[0],
'likes_count': lambda x: x.find('div', {'class': 'profileStats_number profileTabLikeCount'}).contents[0],
'image': lambda x: x.find('a', {'class': 'userAvatar-big'}).get('style').replace('background-image:url(','').rstrip(')'),
'username': lambda x: x.find('form', {'id': 'profileAnswersForm'}).get('action', '').split('/')[-2],
'fullname': lambda x: x.find('span', {'class': 'userName'}).contents[0],
'posts_count': lambda x: x.find('div', {'class': 'profileTabAnswerCount'}).contents[0],
'likes_count': lambda x: x.find('div', {'class': 'profileTabLikeCount'}).contents[0],
'image': lambda x: x.find('a', {'class': 'userAvatar-big'}).get('style').replace('background-image:url(','').rstrip(')').split(';')[1],
'location': lambda x: x.find('div', {'class': 'icon-location'}).contents[0],
}
},
28 changes: 15 additions & 13 deletions tests/test_e2e.py
Original file line number Diff line number Diff line change
@@ -110,8 +110,10 @@ def test_medium():
assert info.get('medium_id') == '4894fec6b289'
assert info.get('medium_username') == 'lys1n'
assert info.get('fullname') == 'Марк Лясин'
assert info.get('twitter_username') == 'lys1n'
# assert info.get('twitter_username') == 'lys1n'
assert info.get('is_suspended') == 'False'
assert 'follower_count' in info
assert 'following_count' in info


def test_odnoklassniki():
@@ -505,9 +507,10 @@ def test_blogger():


def test_d3():
info = extract(parse('https://d3.ru/user/deer00hunter')[0])
"""D3.ru"""
info = extract(parse('https://d3.ru/user/nomad62')[0])

assert info.get('uid') == '75504'
assert info.get('uid') == '126836'


@pytest.mark.skip(reason="broken")
@@ -1043,9 +1046,9 @@ def test_trello():
assert info.get("id") == "5e78cae55d711a6382e239c1"
assert info.get("username") == "xfubuki"
assert info.get("fullname") == "xFubuki"
assert info.get("image") == "https://trello-members.s3.amazonaws.com/5e78cae55d711a6382e239c1/d9c5264e657de6175f14a9067126873f/170.png"
assert info.get("type") == "normal"
assert info.get("is_verified") == "True"
assert 'image' in info


@pytest.mark.github_failed
@@ -1077,18 +1080,17 @@ def test_pastebin():


def test_tinder():
info = extract(parse('https://tinder.com/@john_mclean')[0])
info = extract(parse('https://tinder.com/@john')[0])

assert info.get("tinder_username") == "john_mclean"
assert info.get("birth_date").startswith("19")
assert info.get("id") == "5f4b5bc57f87b00100caa6f9"
assert info.get("badges_list") == "['selfie_verified']"
assert info.get("position_held") == "Something something consultant"
assert info.get("fullname") == "John"
assert info.get("image") == "https://images-ssl.gotinder.com/5f4b5bc57f87b00100caa6f9/original_819f94bd-b1d8-4946-a9bf-ab9a3ced1ff0.jpeg"
assert info.get("tinder_username") == "john"
assert info.get("birth_date").startswith("2000")
assert info.get("id") == "60c40ff58fb9ce01006d74ce"
assert info.get("fullname") == "Mamk"
assert eval(info.get("education"))[0] == "Mokpo National Maritime University"
assert info.get("image") == "https://images-ssl.gotinder.com/60c40ff58fb9ce01006d74ce/original_30e5c835-c34f-447b-b346-8b539e7a7e07.jpeg"

images_list = eval(info.get("images"))
assert 'https://images-ssl.gotinder.com/5f4b5bc57f87b00100caa6f9/original_42294ebf-cbcc-42a4-9f6b-71ba9234c237.jpeg' in images_list
assert 'https://images-ssl.gotinder.com/60c40ff58fb9ce01006d74ce/original_30e5c835-c34f-447b-b346-8b539e7a7e07.jpeg' in images_list


def test_ifunny_co():

0 comments on commit b271ce6

Please sign in to comment.