Skip to content

Commit

Permalink
Merge pull request #1 from postaddictme/master
Browse files Browse the repository at this point in the history
Update
  • Loading branch information
Mulkave authored Mar 7, 2017
2 parents 5093c2e + 89fe67c commit fec6ecd
Show file tree
Hide file tree
Showing 7 changed files with 168 additions and 106 deletions.
33 changes: 33 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,5 +1,38 @@
Instagram PHP Scraper Change Log
================================
Version v0.4.5
--------------
- Enh: BIG thing. Get best resolutions for medias possible

Version v0.4.4
--------------
- Bug: Image urls broken for some medias in getMediaByCode()

Version v0.4.3
--------------
- Enh: Make methods getContentsFromUrl() and generateRandomString() static

Version v0.4.2
--------------
- Bug: Fix 405 error with fetching accounts by id
- Bug: Fix 405 error with fetching comments by id

Version v0.4.1
--------------
- Enh: Url encode endpoints to support for example Japan language

Version v0.4.0
--------------
- Enh: New methods to paginate medias getPaginateMedias() and getPaginateMediasByTag()

Version v0.3.5
--------------
- Enh: Convenient media pagination getMediasByTag()

Version v0.3.4
--------------
- Bug: Fix media urls

Version v0.3.3
--------------
- Bug: Include class Location and Comment in InstagramScraper.php
Expand Down
20 changes: 17 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ echo $account->username;

### Search users by username
```php
$medias = Instagram::searchAccountsByUsername('durov');
$users = Instagram::searchAccountsByUsername('durov');
echo '<pre>';
echo json_encode($medias);
echo json_encode($users);
echo '</pre><br/>';
```

Expand All @@ -56,9 +56,20 @@ Available properties:
$imageStandardResolutionUrl;
$imageHighResolutionUrl;
$caption;
$captionIsEdited;
$isAd;
$videoLowResolutionUrl;
$videoStandardResolutionUrl;
$videoLowBandwidthUrl;
$videoViews;
$code;
$owner;
$ownerId;
$likesCount;
$locationId;
$locationName;
$commentsCount;

*/
echo $medias[0]->imageHighResolutionUrl;
echo $medias[0]->caption;
Expand Down Expand Up @@ -163,4 +174,7 @@ $medias = Instagram::getLocationTopMediasById(1);
### Get location medias by location id
```php
$medias = Instagram::getLocationMediasById(1);
```
```

### Other
Java library: https://github.com/postaddictme/instagram-java-scraper
10 changes: 6 additions & 4 deletions index.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,15 @@

require_once 'vendor/autoload.php';
require_once 'src/InstagramScraper.php';
use InstagramScraper\Exception\InstagramException;

use InstagramScraper\Instagram;

$instagram = new Instagram();
try {
$medias = Instagram::getMedias('kevin', 1000);
echo $medias[998]->imageThumbnailUrl;
// $medias = Instagram::getMedias('kevin', 1497);
// echo json_encode($medias[1497]);
$medias = InstagramScraper\Instagram::getMediasByTag('paveldurov', 300);
echo sizeof($medias) . '\n';
// echo json_encode($medias);
} catch (\Exception $ex) {
print_r($ex);
}
Expand Down
10 changes: 6 additions & 4 deletions src/InstagramScraper/Endpoints.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,13 @@ class Endpoints
const MEDIA_JSON_BY_LOCATION_ID = 'https://www.instagram.com/explore/locations/{{facebookLocationId}}/?__a=1&max_id={{maxId}}';
const MEDIA_JSON_BY_TAG = 'https://www.instagram.com/explore/tags/{tag}/?__a=1&max_id={max_id}';
const GENERAL_SEARCH = 'https://www.instagram.com/web/search/topsearch/?query={query}';
const ACCOUNT_JSON_INFO_BY_ID = 'https://www.instagram.com/query/?q=ig_user({userId}){id,username,external_url,full_name,profile_pic_url,biography,followed_by{count},follows{count},media{count},is_private,is_verified}';
const LAST_COMMENTS_BY_CODE = 'https://www.instagram.com/query/?q=ig_shortcode({{code}}){comments.last({{count}}){count,nodes{id,created_at,text,user{id,profile_pic_url,username,follows{count},followed_by{count},biography,full_name,media{count},is_private,external_url,is_verified}},page_info}}';
const COMMENTS_BEFORE_COMMENT_ID_BY_CODE = 'https://www.instagram.com/query/?q=ig_shortcode({{code}}){comments.before({{commentId}},{{count}}){count,nodes{id,created_at,text,user{id,profile_pic_url,username,follows{count},followed_by{count},biography,full_name,media{count},is_private,external_url,is_verified}},page_info}}';
const LAST_LIKES_BY_CODE = 'https://www.instagram.com/query/?q=ig_shortcode({{code}}){likes{nodes{id,user{id,profile_pic_url,username,follows{count},followed_by{count},biography,full_name,media{count},is_private,external_url,is_verified}},page_info}}';
const ACCOUNT_JSON_INFO_BY_ID = 'ig_user({userId}){id,username,external_url,full_name,profile_pic_url,biography,followed_by{count},follows{count},media{count},is_private,is_verified}';
const LAST_COMMENTS_BY_CODE = 'ig_shortcode({{code}}){comments.last({{count}}){count,nodes{id,created_at,text,user{id,profile_pic_url,username,follows{count},followed_by{count},biography,full_name,media{count},is_private,external_url,is_verified}},page_info}}';
const COMMENTS_BEFORE_COMMENT_ID_BY_CODE = 'ig_shortcode({{code}}){comments.before({{commentId}},{{count}}){count,nodes{id,created_at,text,user{id,profile_pic_url,username,follows{count},followed_by{count},biography,full_name,media{count},is_private,external_url,is_verified}},page_info}}';
const LAST_LIKES_BY_CODE = 'ig_shortcode({{code}}){likes{nodes{id,user{id,profile_pic_url,username,follows{count},followed_by{count},biography,full_name,media{count},is_private,external_url,is_verified}},page_info}}';

const INSTAGRAM_QUERY_URL = 'https://www.instagram.com/query/';
const INSTAGRAM_CDN_URL = 'https://scontent.cdninstagram.com/';

public static function getAccountPageLink($username)
{
Expand Down
101 changes: 56 additions & 45 deletions src/InstagramScraper/Instagram.php
Original file line number Diff line number Diff line change
Expand Up @@ -34,18 +34,11 @@ public static function getAccount($username)

public static function getAccountById($id)
{

if (!is_numeric($id)) {
throw new \InvalidArgumentException('User id must be integer or integer wrapped in string');
}
$response = Request::get(Endpoints::getAccountJsonInfoLinkByAccountId($id));
if ($response->code === 404) {
throw new InstagramNotFoundException('Account with given username does not exist.');
}
if ($response->code !== 200) {
throw new InstagramException('Response code is ' . $response->code . '. Body: ' . $response->body . ' Something went wrong. Please report issue.');
}
$userArray = json_decode($response->raw_body, true);
$parameters = Endpoints::getAccountJsonInfoLinkByAccountId($id);
$userArray = json_decode(self::getContentsFromUrl($parameters), true);
if ($userArray['status'] === 'fail') {
throw new InstagramException($userArray['message']);
}
Expand All @@ -55,6 +48,38 @@ public static function getAccountById($id)
return Account::fromAccountPage($userArray);
}

private static function getContentsFromUrl($parameters)
{
if (!function_exists('curl_init')) {
return false;
}
$random = self::generateRandomString();
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, Endpoints::INSTAGRAM_QUERY_URL);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, 'q=' . $parameters);
$headers = array();
$headers[] = "Cookie: csrftoken=$random;";
$headers[] = "X-Csrftoken: $random";
$headers[] = "Referer: https://www.instagram.com/";
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
$output = curl_exec($ch);
curl_close($ch);
return $output;
}

private static function generateRandomString($length = 10)
{
$characters = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
$charactersLength = strlen($characters);
$randomString = '';
for ($i = 0; $i < $length; $i++) {
$randomString .= $characters[rand(0, $charactersLength - 1)];
}
return $randomString;
}

public static function getMedias($username, $count = 20, $maxId = '')
{
$index = 0;
Expand Down Expand Up @@ -95,8 +120,8 @@ public static function getPaginateMedias($username, $maxId = '')
$medias = [];

$toReturn = [
'medias' => $medias,
'maxId' => $maxId,
'medias' => $medias,
'maxId' => $maxId,
'hasNextPage' => $hasNextPage
];

Expand Down Expand Up @@ -124,8 +149,8 @@ public static function getPaginateMedias($username, $maxId = '')
$hasNextPage = $arr['more_available'];

$toReturn = [
'medias' => $medias,
'maxId' => $maxId,
'medias' => $medias,
'maxId' => $maxId,
'hasNextPage' => $hasNextPage
];

Expand Down Expand Up @@ -160,6 +185,7 @@ public static function getMediasByTag($tag, $count = 12, $maxId = '')
{
$index = 0;
$medias = [];
$mediaIds = [];
$hasNextPage = true;
while ($index < $count && $hasNextPage) {
$response = Request::get(Endpoints::getMediasJsonByTagLink($tag, $maxId));
Expand All @@ -179,7 +205,12 @@ public static function getMediasByTag($tag, $count = 12, $maxId = '')
if ($index === $count) {
return $medias;
}
$medias[] = Media::fromTagPage($mediaArray);
$media = Media::fromTagPage($mediaArray);
if (in_array($media->id, $mediaIds)) {
return $medias;
}
$mediaIds[] = $media->id;
$medias[] = $media;
$index++;
}
if (count($nodes) == 0) {
Expand All @@ -197,8 +228,8 @@ public static function getPaginateMediasByTag($tag, $maxId = '')
$medias = [];

$toReturn = [
'medias' => $medias,
'maxId' => $maxId,
'medias' => $medias,
'maxId' => $maxId,
'hasNextPage' => $hasNextPage
];

Expand Down Expand Up @@ -230,11 +261,13 @@ public static function getPaginateMediasByTag($tag, $maxId = '')

$maxId = $arr['tag']['media']['page_info']['end_cursor'];
$hasNextPage = $arr['tag']['media']['page_info']['has_next_page'];
$count = $arr['tag']['media']['count'];

$toReturn = [
'medias' => $medias,
'maxId' => $maxId,
'hasNextPage' => $hasNextPage
'medias' => $medias,
'count' => $count,
'maxId' => $maxId,
'hasNextPage' => $hasNextPage,
];

return $toReturn;
Expand Down Expand Up @@ -336,17 +369,12 @@ public static function getMediaCommentsByCode($code, $count = 10, $maxId = null)
$remain = 0;
}
if (!isset($maxId)) {
$response = Request::get(Endpoints::getLastCommentsByCodeLink($code, $numberOfCommentsToRetreive));
$parameters = Endpoints::getLastCommentsByCodeLink($code, $numberOfCommentsToRetreive);

} else {
$response = Request::get(Endpoints::getCommentsBeforeCommentIdByCode($code, $numberOfCommentsToRetreive, $maxId));
$parameters = Endpoints::getCommentsBeforeCommentIdByCode($code, $numberOfCommentsToRetreive, $maxId);
}
if ($response->code === 404) {
throw new InstagramNotFoundException('Account with given username does not exist.');
}
if ($response->code !== 200) {
throw new InstagramException('Response code is ' . $response->code . '. Body: ' . $response->body . ' Something went wrong. Please report issue.');
}
$jsonResponse = json_decode($response->raw_body, true);
$jsonResponse = json_decode(self::getContentsFromUrl($parameters), true);
$nodes = $jsonResponse['comments']['nodes'];
foreach ($nodes as $commentArray) {
$comments[] = Comment::fromApi($commentArray);
Expand Down Expand Up @@ -422,21 +450,4 @@ public static function getLocationById($facebookLocationId)
$jsonResponse = json_decode($response->raw_body, true);
return Location::makeLocation($jsonResponse['location']);
}

public static function getLastLikesByCode($code)
{
$response = Request::get(Endpoints::getLastLikesByCodeLink($code));
if ($response->code === 404) {
throw new InstagramNotFoundException('Media with this shortcode doesn\'t exist');
}
if ($response->code !== 200) {
throw new InstagramException('Response code is ' . $response->code . '. Body: ' . $response->body . ' Something went wrong. Please report issue.');
}
$jsonResponse = json_decode($response->raw_body, true);
$users = [];
foreach ($jsonResponse['likes']['nodes'] as $userArray) {
$users[] = Account::fromAccountPage($userArray['user']);
}
return $users;
}
}
Loading

0 comments on commit fec6ecd

Please sign in to comment.