-
Notifications
You must be signed in to change notification settings - Fork 28
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
14 changed files
with
328 additions
and
21 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
GEM | ||
remote: http://rubygems.org/ | ||
specs: | ||
diff-lcs (1.1.3) | ||
git (1.2.5) | ||
jeweler (1.8.4) | ||
bundler (~> 1.0) | ||
git (>= 1.2.5) | ||
rake | ||
rdoc | ||
json (1.7.5) | ||
nokogiri (1.5.5) | ||
rake (0.9.2.2) | ||
rdoc (3.12) | ||
json (~> 1.4) | ||
rspec (2.11.0) | ||
rspec-core (~> 2.11.0) | ||
rspec-expectations (~> 2.11.0) | ||
rspec-mocks (~> 2.11.0) | ||
rspec-core (2.11.1) | ||
rspec-expectations (2.11.3) | ||
diff-lcs (~> 1.1.3) | ||
rspec-mocks (2.11.3) | ||
|
||
PLATFORMS | ||
ruby | ||
|
||
DEPENDENCIES | ||
bundler | ||
jeweler | ||
nokogiri | ||
rdoc | ||
rspec |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,11 +17,12 @@ Jeweler::Tasks.new do |gem| | |
gem.name = "opengraph_parser" | ||
gem.homepage = "http://github.com/huyha85/opengraph_parser" | ||
gem.license = "MIT" | ||
gem.summary = %Q{TODO: one-line summary of your gem} | ||
gem.description = %Q{TODO: longer description of your gem} | ||
gem.summary = %Q{A simple Ruby library for parsing Open Graph Protocol information from a website.} | ||
gem.description = %Q{A simple Ruby library for parsing Open Graph Protocol information from a website. It also includes a fallback solution when the website has no Open Graph information.} | ||
gem.email = "[email protected]" | ||
gem.authors = ["Huy Ha"] | ||
gem.authors = ["Huy Ha", "Duc Trinh"] | ||
# dependencies defined in Gemfile | ||
gem.files = Dir.glob('lib/**/*.rb') | ||
end | ||
Jeweler::RubygemsDotOrgTasks.new | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
0.1.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
require 'nokogiri' | ||
require 'redirect_follower' | ||
|
||
class OpenGraph | ||
attr_accessor :src, :url, :type, :title, :description, :images, :metadata, :response | ||
|
||
def initialize(src, fallback = true) | ||
@src = src | ||
@images = [] | ||
@metadata = {} | ||
parse_opengraph | ||
load_fallback if fallback | ||
check_images_path | ||
end | ||
|
||
def parse_opengraph | ||
begin | ||
@response = RedirectFollower.new(@src).resolve | ||
rescue | ||
@title = @url = @src | ||
return | ||
end | ||
|
||
if @response && @response.body | ||
attrs_list = %w(title url type description) | ||
doc = Nokogiri.parse(@response.body) | ||
doc.css('meta').each do |m| | ||
if m.attribute('property') && m.attribute('property').to_s.match(/^og:(.+)$/i) | ||
m_content = m.attribute('content').to_s.strip | ||
case metadata_name = m.attribute('property').to_s.gsub("og:", "") | ||
when *attrs_list | ||
self.instance_variable_set("@#{metadata_name}", m_content) unless m_content.empty? | ||
when "image" | ||
add_image(m_content) | ||
else | ||
@metadata[m.attribute('property').to_s] = m_content | ||
end | ||
end | ||
end | ||
end | ||
end | ||
|
||
def load_fallback | ||
if @response && @response.body | ||
doc = Nokogiri.parse(@response.body) | ||
|
||
if @title.to_s.empty? && doc.xpath("//head/title").size > 0 | ||
@title = doc.xpath("//head/title").first.text.to_s.strip | ||
end | ||
|
||
@url = @src if @url.to_s.empty? | ||
|
||
if @description.to_s.empty? && description_meta = doc.xpath("//head/meta[@name='description']").first | ||
@description = description_meta.attribute("content").to_s.strip | ||
end | ||
|
||
fetch_images(doc, "//head/link[@rel='image_src']", "href") if @images.empty? | ||
fetch_images(doc, "//img", "src") if @images.empty? | ||
end | ||
end | ||
|
||
def check_images_path | ||
uri = URI.parse(URI.escape(@src)) | ||
imgs = @images.dup | ||
@images = [] | ||
imgs.each do |img| | ||
if URI.parse(URI.escape(img)).host.nil? | ||
add_image("#{uri.scheme}://#{uri.host}:#{uri.port}#{img}") | ||
else | ||
add_image(img) | ||
end | ||
end | ||
end | ||
|
||
private | ||
def add_image(image_url) | ||
@images << image_url unless @images.include?(image_url) || image_url.to_s.empty? | ||
end | ||
|
||
def fetch_images(doc, xpath_str, attr) | ||
doc.xpath(xpath_str).each do |link| | ||
add_image(link.attribute(attr).to_s.strip) | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
require 'open_graph' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
require 'net/http' | ||
|
||
class RedirectFollower | ||
class TooManyRedirects < StandardError; end | ||
|
||
attr_accessor :url, :body, :redirect_limit, :response | ||
|
||
def initialize(url, limit = 5) | ||
@url, @redirect_limit = url, limit | ||
end | ||
|
||
def resolve | ||
raise TooManyRedirects if redirect_limit < 0 | ||
|
||
self.response = Net::HTTP.get_response(URI.parse(URI.escape(url))) | ||
|
||
if response.kind_of?(Net::HTTPRedirection) | ||
self.url = redirect_url | ||
self.redirect_limit -= 1 | ||
resolve | ||
end | ||
|
||
self.body = response.body | ||
self | ||
end | ||
|
||
def redirect_url | ||
if response['location'].nil? | ||
response.body.match(/<a href=\"([^>]+)\">/i)[1] | ||
else | ||
response['location'] | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper') | ||
|
||
describe OpenGraph do | ||
describe "#initialize" do | ||
context "with invalid src" do | ||
it "should set title and url the same as src" do | ||
og = OpenGraph.new("invalid") | ||
og.src.should == "invalid" | ||
og.title.should == "invalid" | ||
og.url.should == "invalid" | ||
end | ||
end | ||
|
||
context "with no fallback" do | ||
it "should get values from opengraph metadata" do | ||
response = double(body: File.open("#{File.dirname(__FILE__)}/../view/opengraph.html", 'r') { |f| f.read }) | ||
RedirectFollower.stub(:new) { double(resolve: response) } | ||
|
||
og = OpenGraph.new("http://test.host", false) | ||
og.src.should == "http://test.host" | ||
og.title.should == "OpenGraph Title" | ||
og.type.should == "article" | ||
og.url.should == "http://test.host" | ||
og.description.should == "My OpenGraph sample site for Rspec" | ||
og.images.should == ["http://test.host/images/rock1.jpg", "http://test.host/images/rock2.jpg"] | ||
end | ||
end | ||
|
||
context "with fallback" do | ||
context "when website has opengraph metadata" do | ||
it "should get values from opengraph metadata" do | ||
response = double(body: File.open("#{File.dirname(__FILE__)}/../view/opengraph.html", 'r') { |f| f.read }) | ||
RedirectFollower.stub(:new) { double(resolve: response) } | ||
|
||
og = OpenGraph.new("http://test.host") | ||
og.src.should == "http://test.host" | ||
og.title.should == "OpenGraph Title" | ||
og.type.should == "article" | ||
og.url.should == "http://test.host" | ||
og.description.should == "My OpenGraph sample site for Rspec" | ||
og.images.should == ["http://test.host/images/rock1.jpg", "http://test.host/images/rock2.jpg"] | ||
end | ||
end | ||
|
||
context "when website has no opengraph metadata" do | ||
it "should lookup for other data from website" do | ||
response = double(body: File.open("#{File.dirname(__FILE__)}/../view/opengraph_no_metadata.html", 'r') { |f| f.read }) | ||
RedirectFollower.stub(:new) { double(resolve: response) } | ||
|
||
og = OpenGraph.new("http://test.host") | ||
og.src.should == "http://test.host" | ||
og.title.should == "OpenGraph Title Fallback" | ||
og.type.should be_nil | ||
og.url.should == "http://test.host" | ||
og.description.should == "Short Description Fallback" | ||
og.images.should == ["http://test.host:80/images/wall1.jpg", "http://test.host:80/images/wall2.jpg"] | ||
end | ||
end | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper') | ||
|
||
describe RedirectFollower do | ||
describe "#resolve" do | ||
let(:url) { "http://test.host" } | ||
let(:mock_res) { double(body: "Body is here.") } | ||
let(:mock_redirect) { | ||
m = double(body: %Q{<body><a href="http://new.test.host"></a></body>}, kind_of?: Net::HTTPRedirection) | ||
m.stub(:[]).and_return(nil) | ||
m | ||
} | ||
|
||
context "with no redirection" do | ||
it "should return the response" do | ||
Net::HTTP.should_receive(:get_response).and_return(mock_res) | ||
|
||
res = RedirectFollower.new(url).resolve | ||
res.body.should == "Body is here." | ||
res.redirect_limit.should == 5 | ||
end | ||
end | ||
|
||
context "with redirection" do | ||
it "should follow the link in redirection" do | ||
Net::HTTP.should_receive(:get_response).with(URI.parse(URI.escape(url))).and_return(mock_redirect) | ||
Net::HTTP.should_receive(:get_response).with(URI.parse(URI.escape("http://new.test.host"))).and_return(mock_res) | ||
|
||
res = RedirectFollower.new(url).resolve | ||
res.body.should == "Body is here." | ||
res.redirect_limit.should == 4 | ||
end | ||
end | ||
|
||
context "with unlimited redirection" do | ||
it "should raise TooManyRedirects error" do | ||
Net::HTTP.stub(:get_response).and_return(mock_redirect) | ||
lambda { | ||
RedirectFollower.new(url).resolve | ||
}.should raise_error(RedirectFollower::TooManyRedirects) | ||
end | ||
end | ||
end | ||
end |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,13 @@ | ||
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib')) | ||
$LOAD_PATH.unshift(File.dirname(__FILE__)) | ||
require 'rspec' | ||
require 'opengraph_parser' | ||
require 'open_graph' | ||
require 'redirect_follower' | ||
|
||
# Requires supporting files with custom matchers and macros, etc, | ||
# in ./support/ and its subdirectories. | ||
Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f} | ||
|
||
RSpec.configure do |config| | ||
|
||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
<html> | ||
<head> | ||
<title>OpenGraph Title Fallback</title> | ||
<meta property="og:title" content="OpenGraph Title" /> | ||
<meta property="og:type" content="article" /> | ||
<meta property="og:url" content="http://test.host" /> | ||
<meta property="og:description" content="My OpenGraph sample site for Rspec" /> | ||
<meta property="og:image" content="http://test.host/images/rock1.jpg" /> | ||
<meta property="og:image" content="http://test.host/images/rock2.jpg" /> | ||
<meta name="description" content="Short Description Fallback" /> | ||
</head> | ||
<body> | ||
<img src="http://test.host/images/wall1.jpg" /> | ||
<img src="http://test.host/images/wall2.jpg" /> | ||
</body> | ||
</html> |
Oops, something went wrong.