From 6159c5e44c51139af32bfad41d8cea79c6438fd7 Mon Sep 17 00:00:00 2001 From: Marat Radchenko Date: Thu, 25 Feb 2021 16:39:46 +0300 Subject: [PATCH] resolves #187 add support for custom lexers pygments.rb no longer stores list of lexers in a file. Instead, Pygments is queried for available lexers. In order to avoid spawning Pygments process when pygments.rb is just loaded, lexers are now stored in a lazily initialized cache. --- .rubocop.yml | 2 - CHANGELOG.adoc | 4 ++ Rakefile | 12 ---- cache_lexers.rb | 9 --- lib/pygments.rb | 11 ++-- lib/pygments/lexer.rb | 146 ++++++++++++++++++++++++------------------ lib/pygments/popen.rb | 21 +----- 7 files changed, 97 insertions(+), 108 deletions(-) delete mode 100644 cache_lexers.rb diff --git a/.rubocop.yml b/.rubocop.yml index 423c6e51..e3af1463 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -10,8 +10,6 @@ Layout/LineLength: Max: 120 Metrics/MethodLength: Enabled: false -Security/MarshalLoad: - Enabled: false Style/StructInheritance: Enabled: false Style/Documentation: diff --git a/CHANGELOG.adoc b/CHANGELOG.adoc index 4b395835..b40db913 100644 --- a/CHANGELOG.adoc +++ b/CHANGELOG.adoc @@ -5,6 +5,10 @@ This document provides a high-level view of the changes to the {project-name} by release. For a detailed view of what has changed, refer to the {uri-repo}/commits/master[commit history] on GitHub. +== Unreleased + +* Add support for custom lexers ({uri-repo}/pull/187[#187]) + == 2.1.0 (2021-02-14) - @slonopotamus * Update Pygments to 2.8.0 diff --git a/Rakefile b/Rakefile index 8302ff32..64350811 100755 --- a/Rakefile +++ b/Rakefile @@ -28,18 +28,6 @@ task :bench do sh 'ruby bench.rb' end -# ========================================================== -# Cache lexers -# ========================================================== - -# Write all the lexers to a file for easy lookup -task :lexers do - sh 'ruby cache_lexers.rb' -end - -task(:test).enhance([:lexers]) -task(:build).enhance([:lexers]) - # ========================================================== # Vendor # ========================================================== diff --git a/cache_lexers.rb b/cache_lexers.rb deleted file mode 100644 index 8a5bde7c..00000000 --- a/cache_lexers.rb +++ /dev/null @@ -1,9 +0,0 @@ -# frozen_string_literal: true - -require File.join(File.dirname(__FILE__), '/lib/pygments.rb') - -# Simple marshalling -serialized_lexers = Marshal.dump(Pygments.lexers!) - -# Write to a file -File.open('lexers', 'wb') { |file| file.write(serialized_lexers) } diff --git a/lib/pygments.rb b/lib/pygments.rb index ad527633..a8ad5f23 100644 --- a/lib/pygments.rb +++ b/lib/pygments.rb @@ -1,14 +1,18 @@ # frozen_string_literal: true -require File.join(File.dirname(__FILE__), 'pygments/popen') require 'forwardable' -module Pygments - autoload :Lexer, 'pygments/lexer' +require_relative 'pygments/lexer' +require_relative 'pygments/popen' +module Pygments class << self extend Forwardable + def lexers + LexerCache.instance.raw_lexers + end + def engine Thread.current.thread_variable_get(:pygments_engine) || Thread.current.thread_variable_set(:pygments_engine, Pygments::Popen.new) @@ -16,7 +20,6 @@ def engine def_delegators :engine, :formatters, - :lexers, :lexers!, :filters, :styles, diff --git a/lib/pygments/lexer.rb b/lib/pygments/lexer.rb index 7c62216f..59d09f0a 100644 --- a/lib/pygments/lexer.rb +++ b/lib/pygments/lexer.rb @@ -1,61 +1,14 @@ # frozen_string_literal: true +require 'singleton' + module Pygments class Lexer < Struct.new(:name, :aliases, :filenames, :mimetypes) - @lexers = [] - @index = {} - @name_index = {} - @alias_index = {} - @extname_index = {} - @mimetypes_index = {} - - # Internal: Create a new Lexer object - # - # hash - A hash of attributes - # - # Returns a Lexer object - def self.create(hash) - lexer = new(hash[:name], hash[:aliases], hash[:filenames], hash[:mimetypes]) - - @lexers << lexer - - @index[lexer.name.downcase] = @name_index[lexer.name] = lexer - - lexer.aliases.each do |name| - @alias_index[name] = lexer - @index[name.downcase] ||= lexer - end - - lexer.filenames.each do |filename| - extnames = [] - - extname = File.extname(filename) - if (m = extname.match(/\[(.+)\]/)) - m[1].scan(/./).each do |s| - extnames << extname.sub(m[0], s) - end - elsif extname != '' - extnames << extname - end - - extnames.each do |the_extname| - @extname_index[the_extname] = lexer - @index[the_extname.downcase.sub(/^\./, '')] ||= lexer - end - end - - lexer.mimetypes.each do |type| - @mimetypes_index[type] = lexer - end - - lexer - end - # Public: Get all Lexers # - # Returns an Array of Lexers + # @return [Array] def self.all - @lexers + LexerCache.instance.lexers end # Public: Look up Lexer by name or alias. @@ -65,12 +18,15 @@ def self.all # Lexer.find('Ruby') # => # # - # Returns the Lexer or nil if none was found. + # @return [Lexer, nil] def self.find(name) - @index[name.to_s.downcase] + LexerCache.instance.index[name.to_s.downcase] end # Public: Alias for find. + # + # @param name [String] + # @return [Lexer, nil] def self.[](name) find(name) end @@ -84,9 +40,10 @@ def self.[](name) # Lexer.find_by_name('Ruby') # # => # # - # Returns the Lexer or nil if none was found. + # @param name [String] + # @return [Lexer, nil] def self.find_by_name(name) - @name_index[name] + LexerCache.instance.name_index[name] end # Public: Look up Lexer by one of its aliases. @@ -98,9 +55,10 @@ def self.find_by_name(name) # Lexer.find_by_alias('rb') # # => # # - # Returns the Lexer or nil if none was found. + # @param name [String] + # @return [Lexer, nil] def self.find_by_alias(name) - @alias_index[name] + LexerCache.instance.alias_index[name] end # Public: Look up Lexer by one of it's file extensions. @@ -112,9 +70,10 @@ def self.find_by_alias(name) # Lexer.find_by_extname('.rb') # # => # # - # Returns the Lexer or nil if none was found. + # @param extname [String] + # @return [Lexer, nil] def self.find_by_extname(extname) - @extname_index[extname] + LexerCache.instance.extname_index[extname] end # Public: Look up Lexer by one of it's mime types. @@ -126,9 +85,10 @@ def self.find_by_extname(extname) # Lexer.find_by_mimetype('application/x-ruby') # # => # # - # Returns the Lexer or nil if none was found. + # @param type [String] + # @return [Lexer, nil] def self.find_by_mimetype(type) - @mimetypes_index[type] + LexerCache.instance.mimetypes_index[type] end # Public: Highlight syntax of text @@ -146,5 +106,67 @@ def highlight(text, options = {}) alias eql? equal? end - lexers.values.each { |h| Lexer.create(h) } + class LexerCache + include Singleton + + # @return [Array] + attr_reader(:lexers) + # @return [Map] + attr_reader(:index) + # @return [Map] + attr_reader(:name_index) + # @return [Map] + attr_reader(:extname_index) + # @return [Map] + attr_reader(:mimetypes_index) + + attr_reader(:raw_lexers) + + def initialize + @lexers = [] + @index = {} + @name_index = {} + @alias_index = {} + @extname_index = {} + @mimetypes_index = {} + @raw_lexers = Pygments.lexers! + + @raw_lexers.values.each do |hash| + lexer = Lexer.new(hash[:name], hash[:aliases], hash[:filenames], hash[:mimetypes]) + + @lexers << lexer + + @index[lexer.name.downcase] = @name_index[lexer.name] = lexer + + lexer.aliases.each do |name| + @alias_index[name] = lexer + @index[name.downcase] ||= lexer + end + + lexer.filenames.each do |filename| + extnames = [] + + extname = File.extname(filename) + if (m = extname.match(/\[(.+)\]/)) + m[1].scan(/./).each do |s| + extnames << extname.sub(m[0], s) + end + elsif extname != '' + extnames << extname + end + + extnames.each do |the_extname| + @extname_index[the_extname] = lexer + @index[the_extname.downcase.sub(/^\./, '')] ||= lexer + end + end + + lexer.mimetypes.each do |type| + @mimetypes_index[type] = lexer + end + end + end + end end diff --git a/lib/pygments/popen.rb b/lib/pygments/popen.rb index 92e52b2e..18b053c0 100644 --- a/lib/pygments/popen.rb +++ b/lib/pygments/popen.rb @@ -103,25 +103,8 @@ def formatters end end - # Get all lexers from a serialized array. - # This avoids needing to spawn mentos when it's not really needed - # (e.g., one-off jobs, loading the Rails env, etc). - # - # Should be preferred to #lexers! - # - # @return [Array] an array of lexers - def lexers - lexer_file = File.join(__dir__, '..', '..', 'lexers') - begin - File.open(lexer_file, 'rb') do |f| - Marshal.load(f) - end - rescue Errno::ENOENT - raise MentosError, %(Error loading #{lexer_file}. Was it created and vendored?) - end - end - - # Get back all available lexers from mentos itself + # Get all available lexers from mentos itself + # Do not use this method directly, instead use Pygments#lexers # # @return [Array] an array of lexers def lexers!