Skip to content

Commit

Permalink
Merge branch '__rultor'
Browse files Browse the repository at this point in the history
  • Loading branch information
rultor committed Apr 15, 2022
2 parents d61c49f + c8be299 commit c4f8750
Show file tree
Hide file tree
Showing 9 changed files with 4,019 additions and 3 deletions.
1 change: 1 addition & 0 deletions .pdd
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
--exclude README.md
--exclude coverage/**/*
--exclude assets/**/*
--exclude model/data/**/*
--rule min-words:10
--rule min-estimate:15
--rule max-estimate:90
2 changes: 2 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

source 'https://rubygems.org'

gem 'activesupport', '6.1.5'
gem 'aws-sdk-dynamodb', '1.59.0'
gem 'aws-sdk-s3', '1.90.0'
gem 'codecov', '0.5.1'
Expand All @@ -34,6 +35,7 @@ gem 'rack-test', '1.1.0'
gem 'rake', '13.0.3', require: false
gem 'rubocop', '0.69.0', require: false
gem 'rubocop-rspec', '1.33.0', require: false
gem 'ruby-fann'
gem 'sass', '3.7.4'
gem 'sentry-raven', '3.1.1'
gem 'sinatra', '2.1.0'
Expand Down
17 changes: 16 additions & 1 deletion Gemfile.lock
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
GEM
remote: https://rubygems.org/
specs:
activesupport (6.1.5)
concurrent-ruby (~> 1.0, >= 1.0.2)
i18n (>= 1.6, < 2)
minitest (>= 5.1)
tzinfo (~> 2.0)
zeitwerk (~> 2.3)
addressable (2.8.0)
public_suffix (>= 2.0.2, < 5.0)
ast (2.4.2)
Expand Down Expand Up @@ -54,12 +60,15 @@ GEM
haml (5.2.1)
temple (>= 0.8.0)
tilt
i18n (1.10.0)
concurrent-ruby (~> 1.0)
jaro_winkler (1.5.4)
jmespath (1.4.0)
mail (2.7.1)
mini_mime (>= 0.1.1)
mini_mime (1.1.2)
mini_portile2 (2.8.0)
minitest (5.15.0)
mocha (1.11.2)
multi_json (1.15.0)
multipart-post (2.1.1)
Expand Down Expand Up @@ -102,6 +111,7 @@ GEM
unicode-display_width (>= 1.4.0, < 1.7)
rubocop-rspec (1.33.0)
rubocop (>= 0.60.0)
ruby-fann (1.3.2)
ruby-progressbar (1.11.0)
ruby2_keywords (0.0.5)
sass (3.7.4)
Expand Down Expand Up @@ -139,18 +149,22 @@ GEM
test-unit (3.4.0)
power_assert
tilt (2.0.10)
tzinfo (2.0.4)
concurrent-ruby (~> 1.0)
unicode-display_width (1.6.1)
xcop (0.6.2)
differ (~> 0.1.2)
nokogiri (~> 1.10)
rainbow (~> 3.0)
slop (~> 4.4)
zeitwerk (2.5.4)

PLATFORMS
ruby
x86_64-darwin-19

DEPENDENCIES
activesupport (= 6.1.5)
aws-sdk-dynamodb (= 1.59.0)
aws-sdk-s3 (= 1.90.0)
codecov (= 0.5.1)
Expand All @@ -166,6 +180,7 @@ DEPENDENCIES
rake (= 13.0.3)
rubocop (= 0.69.0)
rubocop-rspec (= 1.33.0)
ruby-fann
sass (= 3.7.4)
sentry-raven (= 3.1.1)
sinatra (= 2.1.0)
Expand All @@ -175,4 +190,4 @@ DEPENDENCIES
xcop (= 0.6.2)

BUNDLED WITH
2.2.21
2.2.30
14 changes: 14 additions & 0 deletions model/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
Neural Net (in Ruby)
===

#### Data
The data for puzzles is pre-processed and available in *~/data/proper_pdd_data_regression.csv*. In the data, The first row is the column index, the first column is the repo id the puzzle belongs to and the last column is the output variable (*y*)

#### Model
The neural network model uses gradient descent to optimize the weights of the model. The weights are stored in *~/data/weights.marshal* after training and loaded for subsequent runs. To retrain the model, please delete *~/data/weights.marshal*.

##### Training
Run the following command to train the model or test on random dataset
```sh
> ruby model/model.rb
```
3,581 changes: 3,581 additions & 0 deletions model/data/proper_pdd_data_regression.csv

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions model/data/weights.marshal

Large diffs are not rendered by default.

119 changes: 119 additions & 0 deletions model/model.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
require 'slop'
require 'rainbow'
require 'active_support/core_ext/hash'
require 'nokogiri'
require_relative 'nn'

DATA_FNAME = File.join(File.dirname(__FILE__), 'data/proper_pdd_data_regression.csv')
WEIGHTS_FNAME = File.join(File.dirname(__FILE__), 'data/weights.marshal')

def split_data(x_data, y_data)
n = x_data.length
train_test_split = 0.8 # split training and test data
train_size = (train_test_split * x_data.length).round
x_train = x_data.slice(0, train_size)
y_train = y_data.slice(0, train_size)
x_test = x_data.slice(train_size, n)
y_test = y_data.slice(train_size, n)
[x_train, y_train, x_test, y_test]
end

# rubocop:disable Metrics/AbcSize, Metrics/MethodLength
def run_model(inputs)
inputs = [inputs] if inputs.is_a?(Hash)
rows = File.readlines(DATA_FNAME).map { |l| l.chomp.split(',') }
rows.slice!(0) # remove header of csv file
rows = rows.transpose[1..].transpose # drop first column containing repo id
rows.shuffle! # shuffle data
x_data = rows.map { |row| row[0..-2].map(&:to_f) } # array of array of numeric values
y_data = rows.map { |row| row[-1..].map(&:to_f) } # array of array of numeric values
x_train, y_train, x_test, y_test = split_data(x_data, y_data)
epsilon = 1e-1 # model hyperparameters and metrics
mse = lambda { |actual, ideal|
errors = actual.zip(ideal).map { |a, i| a - i }
(errors.inject(0) { |sum, err| sum + err**2 }) / errors.length.to_f
}
error_rate = ->(errors, total) { ((errors / total.to_f) * 100).round }
prediction_success = ->(actual, ideal) { actual >= (ideal - epsilon) && actual <= (ideal + epsilon) }
run_test = lambda { |nn, test_inputs, expected_outputs|
success = 0
failure = 0
errsum = 0
test_inputs.each.with_index do |input, i|
output = nn.run input
prediction_success.call(output[0], expected_outputs[i][0]) ? success += 1 : failure += 1
errsum += mse.call(output, expected_outputs[i])
end
[success, failure, errsum / test_inputs.length.to_f]
}

# Build a 4 layer network: 31 input neurons, 4 hidden neurons, 3 output neurons
# Bias neurons are automatically added to input + hidden layers; no need to specify these
input_size = x_train.first.length
nn = NeuralNet.new [input_size, 20, 10, 1]

if File.file?(WEIGHTS_FNAME)
puts "\nLoading existing model weights..."
nn.load WEIGHTS_FNAME
puts "\nSuccessfully loaded model weights..."
else
puts 'Testing the untrained network...'
success, failure, avg_mse = run_test.call(nn, x_test, y_test)
puts "Untrained classification success: #{success}, failure: #{failure}
(classification error: #{error_rate.call(failure, x_test.length)}%, mse: #{(avg_mse * 100).round(2)}%)"

puts "\nTraining the network...\n\n"
t1 = Time.now
result = nn.train(x_train, y_train, error_threshold: 0.01,
max_iterations: 1000,
log_every: 20)
# puts result
puts "\nDone training the network: #{result[:iterations]} iterations,
#{(result[:error] * 100).round(2)}% mse, #{(Time.now - t1).round(1)}s"
puts "\nSaving the model weights..."
nn.save WEIGHTS_FNAME
puts "\nSuccessfully saved model weights..."
end

puts "\nTesting the trained network..."
success, failure, avg_mse = run_test.call(nn, x_test, y_test)
puts "Trained classification success: #{success}, failure: #{failure}
(classification error: #{error_rate.call(failure, x_test.length)}%, mse: #{(avg_mse * 100).round(2)}%)"

estimates = inputs.map { |p| p['estimate'].to_i || Infinity }
rank_output = estimates.map.with_index.sort.map(&:last) # sort estimates from minimum to maximum
rank_output
end
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength

def process_input(input_path)
throw 'Please provide a valid input path' unless input_path
doc = Nokogiri::XML(File.read(input_path))
puzzles = Hash.from_xml(doc.to_s)['puzzles']
puzzles = puzzles.nil? ? [] : puzzles['puzzle']
puzzles.nil? ? [] : puzzles
end

begin
args = []
args += ARGV

begin
opts = Slop.parse(args, strict: true, help: true) do |o|
o.banner = 'Usage: model.rb [options]'
o.string '-p', '--puzzles', 'Puzzles file'
o.string '-f', '--file', 'Result file'
end
rescue Slop::Error => e
raise StandardError, "#{e.message}, use -p and -f"
end

raise '-f is mandatory when using -v, try --help for more information' if !opts[:file] || !opts[:puzzles]

ranks = run_model(process_input(opts[:puzzles]))
File.open(opts[:file], 'w') { |f| f.write(ranks.join(' ')) }
puts 'Successfully wrote output to file'
rescue StandardError => e
puts "#{Rainbow('ERROR').red} (#{e.class.name}): #{e.message}"
exit(255)
end
Loading

0 comments on commit c4f8750

Please sign in to comment.