Skip to content

Commit aca4c7a

Browse files
committed
Tests and POST functionality.
1 parent 18b6bc1 commit aca4c7a

File tree

10 files changed

+266
-68
lines changed

10 files changed

+266
-68
lines changed

.github/workflows/ci.yml

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3,21 +3,29 @@ name: CI
33
on: [push, pull_request]
44

55
jobs:
6-
build:
7-
8-
runs-on: ubuntu-latest
9-
6+
test:
107
strategy:
8+
fail-fast: false
119
matrix:
12-
node-version: [18.x]
10+
os: [ubuntu-24.04]
11+
ruby: [3.4.4]
12+
13+
runs-on: ${{ matrix.os }}
1314

1415
steps:
15-
- uses: actions/checkout@v3
16-
- name: Use Node.js ${{ matrix.node-version }}
17-
uses: actions/setup-node@v3
18-
with:
19-
node-version: ${{ matrix.node-version }}
20-
cache: 'npm'
21-
- run: npm install
22-
- run: npm run build --if-present
23-
- run: npm test
16+
- uses: actions/checkout@v2
17+
18+
- uses: actions/setup-node@v4
19+
with:
20+
node-version: 20
21+
cache: "npm"
22+
- run: npm ci
23+
24+
- name: Set up Ruby
25+
uses: ruby/setup-ruby@v1
26+
with:
27+
ruby-version: ${{ matrix.ruby }}
28+
bundler-cache: true
29+
30+
- name: Run tests
31+
run: bundle exec rake

Gemfile

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,10 @@ gem "sd_notify"
66
gem "unicorn"
77
gem "sinatra"
88
gem "connection_pool"
9-
gem "http"
9+
gem "http"
10+
11+
group :test do
12+
gem "minitest"
13+
gem "webmock"
14+
gem "rack-test"
15+
end

Gemfile.lock

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,19 @@ GEM
44
addressable (2.8.7)
55
public_suffix (>= 2.0.2, < 7.0)
66
base64 (0.3.0)
7+
bigdecimal (3.2.2)
78
connection_pool (2.5.3)
9+
crack (1.0.0)
10+
bigdecimal
11+
rexml
812
domain_name (0.6.20240107)
913
ffi (1.17.2)
14+
ffi (1.17.2-arm64-darwin)
15+
ffi (1.17.2-x86_64-linux-gnu)
1016
ffi-compiler (1.3.2)
1117
ffi (>= 1.15.5)
1218
rake
19+
hashdiff (1.2.0)
1320
http (5.3.1)
1421
addressable (~> 2.8)
1522
http-cookie (~> 1.0)
@@ -23,6 +30,7 @@ GEM
2330
ffi-compiler (~> 1.0)
2431
rake (~> 13.0)
2532
logger (1.7.0)
33+
minitest (5.25.5)
2634
mustermann (3.0.3)
2735
ruby2_keywords (~> 0.0.1)
2836
nio4r (2.7.4)
@@ -37,8 +45,11 @@ GEM
3745
rack-session (2.1.1)
3846
base64 (>= 0.1.0)
3947
rack (>= 3.0.0)
48+
rack-test (2.2.0)
49+
rack (>= 1.3)
4050
raindrops (0.20.1)
4151
rake (13.3.0)
52+
rexml (3.4.1)
4253
ruby2_keywords (0.0.5)
4354
sd_notify (0.1.1)
4455
sinatra (4.1.1)
@@ -52,17 +63,25 @@ GEM
5263
unicorn (6.1.0)
5364
kgio (~> 2.6)
5465
raindrops (~> 0.7)
66+
webmock (3.25.1)
67+
addressable (>= 2.8.0)
68+
crack (>= 0.3.2)
69+
hashdiff (>= 0.4.0, < 2.0.0)
5570

5671
PLATFORMS
57-
ruby
72+
arm64-darwin-24
73+
x86_64-linux
5874

5975
DEPENDENCIES
6076
connection_pool
6177
http
78+
minitest
6279
puma
80+
rack-test
6381
sd_notify
6482
sinatra
6583
unicorn
84+
webmock
6685

6786
BUNDLED WITH
6887
2.6.7

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ cd extract
6161
mkdir users
6262
6363
# use your own secret key and username
64-
echo "SECRET_KEY" > users/USERNAME
64+
echo "user: key" > users/users.yml
6565
```
6666

6767
Once a username and password has been created, you can make a request.

Rakefile

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
require "rake/testtask"
2+
3+
Rake::TestTask.new(:test) do |t|
4+
t.libs << "_test"
5+
t.test_files = FileList["test/**/*_test.rb"]
6+
t.warning = false
7+
end
8+
9+
task default: :test

app/app.rb

Lines changed: 69 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -5,24 +5,40 @@
55
require "openssl"
66
require "base64"
77
require "connection_pool"
8+
require "yaml"
89

910
set :protection, except: [:json_csrf]
1011

1112
$parser = ConnectionPool.new(size: 1, timeout: 5) {
1213
HTTP.persistent(ENV["PARSER_URL"])
1314
}
1415

16+
$users = begin
17+
if ENV["EXTRACT_USERS"]
18+
YAML.safe_load_file(ENV["EXTRACT_USERS"])
19+
else
20+
{"demo" => "demo"}
21+
end
22+
end
23+
24+
1525
def signature_valid?(user, signature, data)
16-
path = File.expand_path(File.join("..", "users", user), __dir__)
17-
key = File.read(path).strip
26+
key = $users[user]
27+
return false unless key
28+
1829
signature == OpenSSL::HMAC.hexdigest("sha1", key, data)
1930
end
2031

21-
def parse(json)
32+
def parse_with_mercury(json)
2233
$parser.with do |connection|
23-
connection
34+
response = connection
2435
.timeout(connect: 1, write: 5, read: 5)
2536
.post("/parser", json: json)
37+
38+
body = response.to_s
39+
halt_with_error("Cannot extract this URL.") unless response.status.ok?
40+
headers("Content-Type" => response.headers[:content_type])
41+
body
2642
end
2743
end
2844

@@ -33,20 +49,37 @@ def halt_with_error(error)
3349
}.to_json
3450
end
3551

52+
def parser_object(url:, html:, content_type:)
53+
{
54+
url: url,
55+
options: {
56+
html: html,
57+
contentType: content_type
58+
}
59+
}
60+
end
61+
3662
def download_with_http(url)
3763
response = HTTP
3864
.follow(max_hops: 5)
3965
.timeout(connect: 4, write: 4, read: 5)
4066
.headers({accept_encoding: "gzip, deflate"})
4167
.use(:auto_inflate)
4268
.get(url)
43-
{
44-
url: url,
45-
options: {
46-
html: response.to_s,
47-
contentType: response.headers[:content_type]
48-
}
49-
}
69+
70+
parser_object(url: url, html: response.to_s, content_type: response.headers[:content_type])
71+
end
72+
73+
def authenticate(user, signature, url)
74+
halt_with_error("User does not exist: #{user}.") unless $users.key?(user)
75+
halt_with_error("Invalid signature.") unless signature_valid?(user, signature, url)
76+
end
77+
78+
def response_error!(exception, url, user)
79+
logger.error "Exception processing exception=#{exception} url=#{url} user=#{user} "
80+
logger.error exception.backtrace.join("\n")
81+
halt_with_error("Cannot extract this URL.")
82+
raise exception
5083
end
5184

5285
get "/health_check" do
@@ -62,21 +95,32 @@ def download_with_http(url)
6295

6396
logger.info "url=#{url}"
6497

65-
begin
66-
halt_with_error("Invalid signature.") unless signature_valid?(params["user"], params["signature"], url)
67-
rescue Errno::ENOENT
68-
halt_with_error("User does not exist: #{params["user"]}.")
69-
end
98+
authenticate(params["user"], params["signature"], url)
7099

71100
payload = download_with_http(url)
72-
response = parse(payload)
73-
body = response.to_s
74-
halt_with_error("Cannot extract this URL.") unless response.status.ok?
75-
headers("Content-Type" => response.headers[:content_type])
76-
body
101+
102+
parse_with_mercury(payload)
77103
rescue => exception
78-
logger.error "Exception processing exception=#{exception} url=#{url} user=#{params["user"]} "
79-
logger.error exception.backtrace.join("\n")
80-
halt_with_error("Cannot extract this URL.")
81-
raise exception
104+
response_error!(exception, url, params["user"])
105+
end
106+
107+
post "/parser/:user/:signature" do
108+
json = begin
109+
JSON.parse(request.body.read)
110+
rescue JSON::ParserError
111+
halt_with_error("Invalid JSON body.")
112+
end
113+
114+
halt_with_error("Missing url field in JSON body.") unless json["url"]
115+
halt_with_error("Missing body field in JSON body.") unless json["body"]
116+
117+
logger.info "url=#{json["url"]}"
118+
119+
authenticate(params["user"], params["signature"], json["url"])
120+
121+
payload = parser_object(url: json["url"], html: json["body"], content_type: "text/html")
122+
123+
parse_with_mercury(payload)
124+
rescue => exception
125+
response_error!(exception, url, params["user"])
82126
end

script/bootstrap.sh

Lines changed: 0 additions & 26 deletions
This file was deleted.

test/app_test.rb

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
require "webmock/minitest"
2+
require_relative "test_helper"
3+
4+
class AppTest < Test
5+
6+
def test_health_check
7+
get "/health_check"
8+
assert_equal 200, last_response.status
9+
assert_equal "OK", last_response.body
10+
end
11+
12+
def test_parser_with_valid_signature
13+
url = "https://example.com"
14+
base64_url = Base64.urlsafe_encode64(url)
15+
signature = OpenSSL::HMAC.hexdigest("sha1", @key, url)
16+
title = "The Title"
17+
18+
stub_request(:get, url)
19+
.to_return(
20+
status: 200,
21+
body: "<title>#{title}</title>",
22+
headers: {"Content-Type" => "text/html"}
23+
)
24+
25+
get "/parser/#{@user}/#{signature}?base64_url=#{base64_url}"
26+
27+
assert_equal 200, last_response.status
28+
assert_equal "application/json; charset=utf-8", last_response.content_type
29+
assert_equal title, JSON.load(last_response.body).fetch("title")
30+
end
31+
32+
def test_parser_with_invalid_signature
33+
url = "https://example.com"
34+
base64_url = Base64.urlsafe_encode64(url)
35+
invalid_signature = "invalid"
36+
37+
get "/parser/#{@user}/#{invalid_signature}?base64_url=#{base64_url}"
38+
39+
assert_equal 400, last_response.status
40+
assert_equal "application/json", last_response.content_type
41+
assert_equal "Invalid signature.", JSON.parse(last_response.body).fetch("messages")
42+
end
43+
44+
def test_post_parser_with_valid_signature
45+
url = "https://example.com"
46+
signature = OpenSSL::HMAC.hexdigest("sha1", @key, url)
47+
title = "The Title"
48+
html_body = "<title>#{title}</title>"
49+
50+
post "/parser/#{@user}/#{signature}", {url: url, body: html_body}.to_json, "CONTENT_TYPE" => "application/json"
51+
52+
assert_equal "application/json; charset=utf-8", last_response.content_type
53+
assert_equal title, JSON.load(last_response.body).fetch("title")
54+
end
55+
end

0 commit comments

Comments
 (0)