loading
Generated 2022-10-29T17:52:20+00:00

All Files ( 94.65% covered at 121.75 hits/line )

43 files in total.
1459 relevant lines, 1381 lines covered and 78 lines missed. ( 94.65% )
File % covered Lines Relevant Lines Lines covered Lines missed Avg. Hits / Line
lib/epub.rb 100.00 % 5 5 5 0 1.00
lib/epub/book.rb 100.00 % 7 4 4 0 1.00
lib/epub/book/features.rb 89.13 % 103 46 41 5 8.70
lib/epub/constants.rb 100.00 % 57 15 15 0 1.00
lib/epub/content_document.rb 100.00 % 2 2 2 0 1.00
lib/epub/content_document/navigation.rb 94.44 % 142 72 68 4 7.75
lib/epub/content_document/typable.rb 100.00 % 17 9 9 0 14.11
lib/epub/content_document/xhtml.rb 91.30 % 48 23 21 2 1.70
lib/epub/inspector.rb 100.00 % 50 25 25 0 6.56
lib/epub/metadata.rb 87.38 % 197 103 90 13 59.09
lib/epub/ocf.rb 100.00 % 8 5 5 0 2.20
lib/epub/ocf/container.rb 100.00 % 29 13 13 0 11.15
lib/epub/ocf/encryption.rb 100.00 % 7 4 4 0 1.00
lib/epub/ocf/manifest.rb 100.00 % 6 3 3 0 1.00
lib/epub/ocf/metadata.rb 100.00 % 7 4 4 0 1.00
lib/epub/ocf/physical_container.rb 97.37 % 65 38 37 1 14.63
lib/epub/ocf/physical_container/archive_zip.rb 100.00 % 55 31 31 0 431.81
lib/epub/ocf/physical_container/unpacked_directory.rb 100.00 % 17 9 9 0 5.11
lib/epub/ocf/physical_container/unpacked_uri.rb 83.33 % 28 12 10 2 0.92
lib/epub/ocf/physical_container/zipruby.rb 88.89 % 36 18 16 2 2.89
lib/epub/ocf/rights.rb 100.00 % 6 3 3 0 1.00
lib/epub/ocf/signatures.rb 100.00 % 6 3 3 0 1.00
lib/epub/parser.rb 86.96 % 109 46 40 6 17.24
lib/epub/parser/content_document.rb 94.83 % 111 58 55 3 25.79
lib/epub/parser/metadata.rb 93.85 % 104 65 61 4 1050.17
lib/epub/parser/ocf.rb 94.23 % 89 52 49 3 20.31
lib/epub/parser/publication.rb 98.92 % 141 93 92 1 238.25
lib/epub/parser/xml_document.rb 100.00 % 28 13 13 0 40.85
lib/epub/parser/xml_document/refinements/nokogiri.rb 100.00 % 33 17 17 0 1059.65
lib/epub/parser/xml_document/refinements/oga.rb 100.00 % 77 37 37 0 401.08
lib/epub/parser/xml_document/refinements/rexml.rb 96.00 % 50 25 24 1 48.96
lib/epub/publication.rb 100.00 % 2 2 2 0 1.00
lib/epub/publication/fixed_layout.rb 100.00 % 208 116 116 0 5.41
lib/epub/publication/package.rb 100.00 % 81 37 37 0 67.22
lib/epub/publication/package/bindings.rb 100.00 % 31 17 17 0 9.76
lib/epub/publication/package/guide.rb 100.00 % 54 32 32 0 110.59
lib/epub/publication/package/manifest.rb 96.43 % 252 112 108 4 89.46
lib/epub/publication/package/metadata.rb 100.00 % 9 5 5 0 1.00
lib/epub/publication/package/spine.rb 100.00 % 106 54 54 0 30.13
lib/epub/searcher.rb 75.00 % 21 12 9 3 0.75
lib/epub/searcher/publication.rb 69.23 % 142 78 54 24 13.56
lib/epub/searcher/result.rb 100.00 % 92 41 41 0 28.63
lib/epub/searcher/xhtml.rb 100.00 % 161 100 100 0 69.17

lib/epub.rb

100.0% lines covered

5 relevant lines. 5 lines covered and 0 lines missed.
    
  1. 1 require 'epub/inspector'
  2. 1 require 'epub/ocf'
  3. 1 require 'epub/publication'
  4. 1 require 'epub/content_document'
  5. 1 require 'epub/book/features'

lib/epub/book.rb

100.0% lines covered

4 relevant lines. 4 lines covered and 0 lines missed.
    
  1. 1 require 'epub'
  2. 1 module EPUB
  3. 1 class Book
  4. 1 include EPUB::Book::Features
  5. end
  6. end

lib/epub/book/features.rb

89.13% lines covered

46 relevant lines. 41 lines covered and 5 lines missed.
    
  1. 1 require 'forwardable'
  2. 1 module EPUB
  3. 1 class Book
  4. 1 module Features
  5. 1 extend Forwardable
  6. 1 attr_reader :ocf
  7. 1 attr_accessor :epub_file
  8. # When writing, sets +ocf.book+ to self.
  9. # @param [OCF]
  10. 1 def ocf=(mod)
  11. 27 @ocf = mod
  12. 27 mod.book = self
  13. 27 mod
  14. end
  15. # @return [Array<OCF::Container::Rootfile>]
  16. 1 def rootfiles
  17. 105 ocf.container.rootfiles
  18. end
  19. # @return [Array<Publication::Package>]
  20. 1 def packages
  21. 105 rootfiles.map(&:package)
  22. end
  23. 1 alias renditions packages
  24. # First +package+ in +packages+
  25. # @return [Package|nil]
  26. 1 def default_rendition
  27. 21 packages.first
  28. end
  29. 1 alias package default_rendition
  30. # @!parse def_delegators :package, :metadata, :manifest, :spine, :guide, :bindings
  31. 1 def_delegators :package, *Publication::Package::CONTENT_MODELS
  32. 1 def_delegators :metadata, :title, :main_title, :subtitle, :short_title, :collection_title, :edition_title, :extended_title, :description, :date, :unique_identifier, :modified, :release_identifier, :package_identifier
  33. 1 def_delegators :manifest, :nav
  34. 1 def container_adapter
  35. 43 @adapter || OCF::PhysicalContainer.adapter
  36. end
  37. 1 def container_adapter=(adapter)
  38. 2 @adapter = OCF::PhysicalContainer.find_adapter(adapter)
  39. end
  40. # Cover image defined in EPUB 3 or used in EPUB 2
  41. # @return [EPUB::Publication::Package::Manifest::Item]
  42. 1 def cover_image
  43. manifest.cover_image || metadata.cover_image
  44. end
  45. # @overload each_page_on_spine(&blk)
  46. # iterate over items in order of spine when block given
  47. # @yieldparam item [Publication::Package::Manifest::Item]
  48. # @overload each_page_on_spine
  49. # @return [Enumerator] which iterates over {Publication::Package::Manifest::Item}s in order of spine when block not given
  50. 1 def each_page_on_spine(&blk)
  51. 3 enum = package.spine.items
  52. 3 if block_given?
  53. 1 enum.each &blk
  54. else
  55. 2 enum.each
  56. end
  57. end
  58. 1 def each_page_on_toc(&blk)
  59. raise NotImplementedError
  60. end
  61. # @overload each_content(&blk)
  62. # iterate all items over when block given
  63. # @yieldparam item [Publication::Package::Manifest::Item]
  64. # @overload each_content
  65. # @return [Enumerator] which iterates over all {Publication::Package::Manifest::Item}s in EPUB package when block not given
  66. 1 def each_content(&blk)
  67. 3 enum = manifest.items
  68. 3 if block_given?
  69. 1 enum.each &blk
  70. else
  71. 2 enum.to_enum
  72. end
  73. end
  74. 1 def other_navigation
  75. raise NotImplementedError
  76. end
  77. # @return [Array<Publication::Package::Manifest::Item>] All {Publication::Package::Manifest::Item}s in EPUB package
  78. 1 def resources
  79. manifest.items
  80. end
  81. # Syntax sugar
  82. # @return String
  83. 1 def rootfile_path
  84. ocf.container.rootfile.full_path.to_s
  85. end
  86. end
  87. end
  88. end

lib/epub/constants.rb

100.0% lines covered

15 relevant lines. 15 lines covered and 0 lines missed.
    
  1. 1 module EPUB
  2. NAMESPACES = {
  3. 1 'xml' => 'http://www.w3.org/XML/1998/namespace',
  4. 'dc' => 'http://purl.org/dc/elements/1.1/',
  5. 'ocf' => 'urn:oasis:names:tc:opendocument:xmlns:container',
  6. 'opf' => 'http://www.idpf.org/2007/opf',
  7. 'xhtml' => 'http://www.w3.org/1999/xhtml',
  8. 'epub' => 'http://www.idpf.org/2007/ops',
  9. 'm' => 'http://www.w3.org/1998/Math/MathML',
  10. 'svg' => 'http://www.w3.org/2000/svg',
  11. 'smil' => 'http://www.w3.org/ns/SMIL',
  12. 'metadata' => 'http://www.idpf.org/2013/metadata'
  13. }
  14. 1 module MediaType
  15. 1 class UnsupportedMediaType < StandardError; end
  16. 1 EPUB = 'application/epub+zip'
  17. 1 ROOTFILE = 'application/oebps-package+xml'
  18. IMAGE = %w[
  19. 1 image/gif
  20. image/jpeg
  21. image/png
  22. image/svg+xml
  23. ]
  24. APPLICATION = %w[
  25. 1 application/xhtml+xml
  26. application/x-dtbncx+xml
  27. application/vnd.ms-opentype
  28. application/font-woff
  29. application/smil+xml
  30. application/pls+xml
  31. ]
  32. AUDIO = %w[
  33. 1 audio/mpeg
  34. audio/mp4
  35. ]
  36. TEXT = %w[
  37. 1 text/css
  38. text/javascript
  39. ]
  40. 1 CORE = IMAGE + APPLICATION + AUDIO + TEXT
  41. end
  42. # @see https://idpf.github.io/epub-vocabs/structure/
  43. 1 module Type
  44. DOCUMENT_NAVIGATION = %w[
  45. 1 toc
  46. landmarks
  47. ]
  48. PAGINATION = %w[
  49. 1 pagebreak
  50. page_list
  51. ]
  52. 1 TYPES = DOCUMENT_NAVIGATION + PAGINATION
  53. end
  54. end

lib/epub/content_document.rb

100.0% lines covered

2 relevant lines. 2 lines covered and 0 lines missed.
    
  1. 1 require 'epub/content_document/xhtml'
  2. 1 require 'epub/content_document/navigation'

lib/epub/content_document/navigation.rb

94.44% lines covered

72 relevant lines. 68 lines covered and 4 lines missed.
    
  1. 1 require "epub/content_document/typable"
  2. 1 module EPUB
  3. 1 module ContentDocument
  4. 1 class Navigation < XHTML
  5. 1 include Typable
  6. 1 attr_accessor :navigations
  7. 1 def initialize
  8. 12 @navigations = []
  9. 12 @hidden = false
  10. 12 @parent = nil
  11. 12 super
  12. end
  13. 1 def toc
  14. 2 navigations.find(&:toc?)
  15. end
  16. 1 def page_list
  17. navigations.find(&:page_list?)
  18. end
  19. 1 def landmarks
  20. 1 navigations.find(&:landmarks?)
  21. end
  22. # Enumerator version of toc
  23. 1 def contents
  24. 1 enum_for(:each_content).to_a
  25. end
  26. # Enumerator version of page_list
  27. # Usage: navigation.enum_for(:pages)
  28. 1 def pages
  29. raise NotImplementedError
  30. end
  31. # @todo Enumerator version of landmarks
  32. # iterator for #toc
  33. 1 def each_content
  34. 1 toc.traverse do |content, _|
  35. 7 yield content
  36. end
  37. end
  38. # iterator for #page_list
  39. 1 def each_page
  40. raise NotImplementedError
  41. end
  42. # iterator for #landmark
  43. 1 def each_landmark
  44. raise NotImplementedError
  45. end
  46. 1 def navigation
  47. 2 navigations.first
  48. end
  49. 1 module Hidable
  50. 1 attr_accessor :hidden, :parent
  51. 1 def hidden?
  52. 8 if @hidden.nil?
  53. 4 @parent ? @parent.hidden? : false
  54. else
  55. 4 true
  56. end
  57. end
  58. end
  59. 1 class Item
  60. 1 include Hidable
  61. 1 include Typable
  62. 1 attr_accessor :items, :text,
  63. :content_document, :item
  64. 1 attr_reader :href
  65. 1 def initialize
  66. 112 @items = ItemList.new
  67. 112 @items.parent = self
  68. end
  69. 1 def href=(iri)
  70. 72 @href = iri.kind_of?(Addressable::URI) ? iri : Addressable::URI.parse(iri)
  71. end
  72. 1 def traverse(depth=0, &block)
  73. 10 block.call self, depth
  74. 10 items.each do |item|
  75. 8 item.traverse depth + 1, &block
  76. end
  77. end
  78. # For backward compatibility
  79. 1 def type
  80. 8 types.find {|t|
  81. 8 Type::TYPES.include? t
  82. }
  83. end
  84. # For backward compatibility
  85. 1 def type=(t)
  86. 1 types << t
  87. end
  88. 1 %w[toc page_list landmarks].each do |type|
  89. 3 define_method "#{type}?" do
  90. 4 types.include? type
  91. end
  92. end
  93. end
  94. # @todo Implement method to represent navigation structure
  95. 1 class Navigation < Item
  96. 1 module Type
  97. 1 TOC = 'toc'
  98. 1 PAGE_LIST = 'page_list'
  99. 1 LANDMARKS = 'landmarks'
  100. end
  101. 1 alias navigations items
  102. 1 alias navigations= items=
  103. 1 alias heading text
  104. 1 alias heading= text=
  105. end
  106. 1 class ItemList < Array
  107. 1 include Hidable
  108. 1 def <<(item)
  109. 45 super
  110. 45 item.parent = self
  111. end
  112. end
  113. end
  114. end
  115. end

lib/epub/content_document/typable.rb

100.0% lines covered

9 relevant lines. 9 lines covered and 0 lines missed.
    
  1. 1 require "set"
  2. 1 module EPUB
  3. 1 module ContentDocument
  4. 1 module Typable
  5. 1 attr_reader :types
  6. 1 def types
  7. 16 @types ||= Set.new
  8. end
  9. 1 def types=(ts)
  10. 104 @types = ts.kind_of?(Set) ? ts : Set.new(ts)
  11. end
  12. end
  13. end
  14. end

lib/epub/content_document/xhtml.rb

91.3% lines covered

23 relevant lines. 21 lines covered and 2 lines missed.
    
  1. 1 module EPUB
  2. 1 module ContentDocument
  3. 1 class XHTML
  4. 1 attr_accessor :item
  5. # @param [Boolean] detect_encoding See {Publication::Package::Manifest::Item#read}
  6. # @return [String] Returns the content string.
  7. 1 def read(detect_encoding: true)
  8. 10 item.read(detect_encoding: detect_encoding)
  9. end
  10. 1 alias raw_document read
  11. # @return [true|false] Whether referenced directly from spine or not.
  12. 1 def top_level?
  13. 2 !! item.itemref
  14. end
  15. # @return [String] Returns the value of title element.
  16. # If none, returns empty string
  17. 1 def title
  18. 2 title_elem = rexml.get_elements('.//title').first
  19. 2 if title_elem
  20. 1 title_elem.text
  21. else
  22. 1 warn 'title element not found'
  23. 1 ''
  24. end
  25. end
  26. # @return [REXML::Document] content as REXML::Document object
  27. 1 def rexml
  28. 3 @rexml ||= REXML::Document.new(raw_document)
  29. end
  30. # @return [Oga::XML::Document] content as Oga::XML::Document object
  31. 1 def oga
  32. require "oga"
  33. @oga ||= Oga.parse_xml(raw_document)
  34. end
  35. # @return [Nokogiri::XML::Document] content as Nokogiri::XML::Document object
  36. 1 def nokogiri
  37. 3 require 'nokogiri'
  38. 3 @nokogiri ||= Nokogiri.XML(raw_document)
  39. end
  40. end
  41. end
  42. end

lib/epub/inspector.rb

100.0% lines covered

25 relevant lines. 25 lines covered and 0 lines missed.
    
  1. 1 module EPUB
  2. 1 module Inspector
  3. 1 INSTANCE_VARIABLES_OPTION = {:exclude => []}
  4. 1 SIMPLE_TEMPLATE = "#<%{class}:%{object_id}>"
  5. 1 def inspect_simply
  6. 5 SIMPLE_TEMPLATE % {
  7. :class => self.class,
  8. :object_id => inspect_object_id
  9. }
  10. end
  11. 1 def inspect_object_id
  12. 13 (__id__ << 1).to_s(16)
  13. end
  14. 1 def inspect_instance_variables(options={})
  15. 8 options = INSTANCE_VARIABLES_OPTION.merge(options)
  16. 8 exclude = options[:exclude]
  17. 8 (instance_variables - exclude).map {|name|
  18. 45 value = instance_variable_get(name)
  19. 45 "#{name}=#{value.inspect}"
  20. }.join(' ')
  21. end
  22. 1 module PublicationModel
  23. 1 class << self
  24. 1 def included(mod)
  25. 5 mod.__send__ :include, Inspector
  26. end
  27. end
  28. 1 def inspect
  29. 3 template % {
  30. :class => self.class,
  31. 3 :package => (package && package.inspect_simply),
  32. :object_id => inspect_object_id,
  33. :attributes => inspect_instance_variables(exclude: [:@package])
  34. }
  35. end
  36. 1 def template
  37. 3 t = "#<%{class}:%{object_id}"
  38. 3 t << " @package=%{package}" if package
  39. 3 t << " %{attributes}>"
  40. end
  41. end
  42. end
  43. end

lib/epub/metadata.rb

87.38% lines covered

103 relevant lines. 90 lines covered and 13 lines missed.
    
  1. 1 require 'set'
  2. 1 module EPUB
  3. 1 class Metadata
  4. 1 include Inspector::PublicationModel
  5. 1 DC_ELEMS = [:identifiers, :titles, :languages] +
  6. [:contributors, :coverages, :creators, :dates, :descriptions, :formats, :publishers,
  7. :relations, :rights, :sources, :subjects, :types]
  8. # Used for CFI
  9. 1 attr_reader :children
  10. 1 attr_accessor :package, :unique_identifier, :metas, :links,
  11. 16 *(DC_ELEMS.collect {|elem| "dc_#{elem}"})
  12. 1 DC_ELEMS.each do |elem|
  13. 15 alias_method elem, "dc_#{elem}"
  14. 15 alias_method "#{elem}=", "dc_#{elem}="
  15. end
  16. 1 def initialize
  17. 171 (DC_ELEMS + [:metas, :links]).each do |elem|
  18. 2907 __send__ "#{elem}=", []
  19. end
  20. 171 @children = []
  21. end
  22. 1 def release_identifier
  23. 1 "#{unique_identifier}@#{modified}"
  24. end
  25. 1 alias package_identifier release_identifier
  26. 1 def title
  27. 4 return extended_title unless extended_title.empty?
  28. 6 compositted = titles.select {|title| title.display_seq}.sort.join("\n")
  29. 2 return compositted unless compositted.empty?
  30. 1 return main_title unless main_title.empty?
  31. titles.sort.join("\n")
  32. end
  33. 1 %w[main short collection edition extended].each do |type|
  34. 5 define_method "#{type}_title" do
  35. 42 titles.select {|title| title.title_type.to_s == type}.sort.join(' ')
  36. end
  37. end
  38. 1 def subtitle
  39. titles.select {|title| title.title_type.to_s == 'subtitle'}.sort.join(' ')
  40. end
  41. 1 def description
  42. descriptions.join(' ')
  43. end
  44. 1 def date
  45. dates.first
  46. end
  47. 1 def language
  48. languages.first
  49. end
  50. 1 def modified
  51. 4 metas.find {|meta|
  52. 7 meta.property == 'dcterms:modified' &&
  53. meta.refiners.empty?
  54. }
  55. end
  56. # Cover image used in EPUB 2
  57. # @return [EPUB::Publication::Package::Manifest::Item]
  58. 1 def cover_image
  59. cover_image_meta = metas.find {|meta| meta.name == "cover"}
  60. return unless cover_image_meta
  61. package.manifest[cover_image_meta.meta_content]
  62. end
  63. 1 def to_h
  64. DC_ELEMS.inject({}) do |hsh, elem|
  65. hsh[elem] = __send__(elem)
  66. hsh
  67. end
  68. end
  69. 1 def primary_metas
  70. metas.select {|meta| meta.primary_expression?}
  71. end
  72. 1 module Refinee
  73. 1 PROPERTIES = %w[alternate-script display-seq file-as group-position identifier-type meta-auth role title-type]
  74. 1 attr_writer :refiners
  75. 1 def refiners
  76. 848 @refiners ||= Set.new
  77. end
  78. 1 PROPERTIES.each do |voc|
  79. 8 met = voc.gsub(/-/, '_')
  80. 8 attr_writer met
  81. 8 define_method met do
  82. 128 refiners.find {|refiner| refiner.property == voc}
  83. end
  84. end
  85. end
  86. 1 class DCMES
  87. 1 include Refinee
  88. 1 attr_accessor :content, :id, :lang, :dir
  89. 1 def to_s
  90. 13 content.to_s
  91. end
  92. end
  93. 1 class Identifier < DCMES
  94. # @note This is ad-hoc
  95. # @todo Define and include OPF module for opf:scheme attribute
  96. # @todo Define general way to handle with identifier-type refiners
  97. 1 attr_accessor :scheme
  98. # @note This is ad-hoc
  99. # @todo Define and include OPF module for opf:scheme attribute
  100. # @todo Define general way to handle with identifier-type refiners
  101. 1 def isbn?
  102. 5 scheme == 'ISBN' or
  103. content.to_s.downcase.start_with? 'urn:isbn' or
  104. refiners.any? {|refiner|
  105. 2 refiner.property == 'identifier-type' and
  106. refiner.scheme == 'onix:codelist5' and
  107. %w[02 15].include? refiner.content
  108. }
  109. end
  110. end
  111. 1 class Title < DCMES
  112. 1 include Comparable
  113. 1 def <=>(other)
  114. 5 return 1 if other.display_seq.nil?
  115. 4 return -1 if display_seq.nil?
  116. 4 display_seq.to_s.to_i <=> other.display_seq.to_s.to_i
  117. end
  118. end
  119. 1 class Meta
  120. 1 include Refinee
  121. 1 attr_accessor :property, :id, :scheme, :content, :name, :meta_content
  122. 1 attr_reader :refines
  123. 1 def refines=(refinee)
  124. 740 refinee.refiners << self
  125. 740 @refines = refinee
  126. end
  127. 1 def refines?
  128. 2 ! refines.nil?
  129. end
  130. 1 alias subexpression? refines?
  131. 1 def primary_expression?
  132. 1 ! subexpression?
  133. end
  134. 1 def inspect
  135. 5 ivs = instance_variables.map {|iv|
  136. 8 [iv, instance_variable_get(iv).inspect].join('=')
  137. }.join(' ')
  138. 5 '<#%s:%#0x %s>' % [self.class, __id__, ivs]
  139. end
  140. 1 def to_s
  141. 41 content.to_s
  142. end
  143. end
  144. 1 class Link
  145. 1 include Refinee
  146. 1 attr_accessor :href, :rel, :id, :media_type
  147. 1 attr_reader :refines
  148. 1 def refines=(refinee)
  149. 45 refinee.refiners << self
  150. 45 @refines = refinee
  151. end
  152. end
  153. 1 class UnsupportedModel
  154. 1 attr_accessor :raw_element
  155. 1 def initialize(raw_element)
  156. @raw_element = raw_element
  157. end
  158. end
  159. end
  160. end

lib/epub/ocf.rb

100.0% lines covered

5 relevant lines. 5 lines covered and 0 lines missed.
    
  1. 1 module EPUB
  2. 1 class OCF
  3. 1 MODULES = %w[container encryption manifest metadata rights signatures]
  4. 7 MODULES.each {|m| require "epub/ocf/#{m}"}
  5. 1 attr_accessor :book, *MODULES
  6. end
  7. end

lib/epub/ocf/container.rb

100.0% lines covered

13 relevant lines. 13 lines covered and 0 lines missed.
    
  1. 1 module EPUB
  2. 1 class OCF
  3. 1 class Container
  4. 1 FILE = 'container.xml'
  5. 1 attr_reader :rootfiles
  6. 1 def initialize
  7. 34 @rootfiles = []
  8. end
  9. # syntax sugar
  10. 1 def rootfile
  11. 1 rootfiles.first
  12. end
  13. 1 class Rootfile
  14. 1 attr_accessor :full_path, :media_type,
  15. :package
  16. # @param full_path [Addressable::URI|nil]
  17. # @param media_type [String]
  18. 1 def initialize(full_path=nil, media_type=EPUB::MediaType::ROOTFILE)
  19. 100 @full_path, @media_type = full_path, media_type
  20. end
  21. end
  22. end
  23. end
  24. end

lib/epub/ocf/encryption.rb

100.0% lines covered

4 relevant lines. 4 lines covered and 0 lines missed.
    
  1. 1 module EPUB
  2. 1 class OCF
  3. 1 class Encryption
  4. 1 attr_accessor :content
  5. end
  6. end
  7. end

lib/epub/ocf/manifest.rb

100.0% lines covered

3 relevant lines. 3 lines covered and 0 lines missed.
    
  1. 1 module EPUB
  2. 1 class OCF
  3. 1 class Manifest
  4. end
  5. end
  6. end

lib/epub/ocf/metadata.rb

100.0% lines covered

4 relevant lines. 4 lines covered and 0 lines missed.
    
  1. 1 module EPUB
  2. 1 class OCF
  3. 1 class UnknownFormatMetadata
  4. 1 attr_accessor :content
  5. end
  6. end
  7. end

lib/epub/ocf/physical_container.rb

97.37% lines covered

38 relevant lines. 37 lines covered and 1 lines missed.
    
  1. 1 require 'monitor'
  2. 1 require 'epub/ocf/physical_container/archive_zip'
  3. 1 require 'epub/ocf/physical_container/unpacked_directory'
  4. 1 require 'epub/ocf/physical_container/unpacked_uri'
  5. 1 module EPUB
  6. 1 class OCF
  7. 1 class PhysicalContainer
  8. 1 class NoEntry < StandardError
  9. 1 class << self
  10. 1 def from_error(error)
  11. 8 no_entry = new(error.message)
  12. 8 no_entry.set_backtrace error.backtrace
  13. 8 no_entry
  14. end
  15. end
  16. end
  17. 1 @adapter = ArchiveZip
  18. 1 class << self
  19. 1 def find_adapter(adapter)
  20. 6 return adapter if adapter.instance_of? Class
  21. 3 if adapter == :Zipruby && ! const_defined?(adapter)
  22. require 'epub/ocf/physical_container/zipruby'
  23. end
  24. 3 const_get adapter
  25. end
  26. 1 def adapter
  27. 71 raise NoMethodError, "undefined method `#{__method__}' for #{self}" unless self == PhysicalContainer
  28. 71 @adapter
  29. end
  30. 1 def adapter=(adapter)
  31. 4 raise NoMethodError, "undefined method `#{__method__}' for #{self}" unless self == PhysicalContainer
  32. 4 @adapter = find_adapter(adapter)
  33. end
  34. 1 def open(container_path)
  35. 56 _adapter.new(container_path).open do |container|
  36. 56 yield container
  37. end
  38. end
  39. 1 def read(container_path, path_name)
  40. 20 open(container_path) {|container|
  41. 20 container.read(path_name.to_s)
  42. }
  43. end
  44. 1 private
  45. 1 def _adapter
  46. 56 (self == PhysicalContainer) ? @adapter : self
  47. end
  48. end
  49. 1 def initialize(container_path)
  50. 71 @container_path = container_path
  51. 71 @monitor = Monitor.new
  52. end
  53. end
  54. end
  55. end

lib/epub/ocf/physical_container/archive_zip.rb

100.0% lines covered

31 relevant lines. 31 lines covered and 0 lines missed.
    
  1. 1 require 'archive/zip'
  2. 1 module EPUB
  3. 1 class OCF
  4. 1 class PhysicalContainer
  5. 1 class ArchiveZip < self
  6. 1 def initialize(container_path)
  7. 52 super
  8. 52 @entries = {}
  9. 52 @last_iterated_entry_index = 0
  10. end
  11. 1 def open
  12. 56 Archive::Zip.open @container_path do |archive|
  13. 56 @monitor.synchronize do
  14. 56 @archive = archive
  15. begin
  16. 56 yield self
  17. ensure
  18. 56 @archive = nil
  19. end
  20. end
  21. end
  22. end
  23. 1 def read(path_name)
  24. 256 if @archive
  25. 250 target_index = @entries[path_name]
  26. 250 @archive.each.with_index do |entry, index|
  27. 3814 if target_index
  28. 1054 if target_index == index
  29. 102 return entry.file_data.read
  30. else
  31. 952 next
  32. end
  33. end
  34. 2760 next if index < @last_iterated_entry_index
  35. # We can force encoding UTF-8 because EPUB spec allows only UTF-8 filenames
  36. 836 entry_path = entry.zip_path.force_encoding('UTF-8')
  37. 836 @entries[entry_path] = index
  38. 836 @last_iterated_entry_index = index
  39. 836 if entry_path == path_name
  40. 44 return entry.file_data.read
  41. end
  42. end
  43. 104 raise NoEntry, "Entry not found: #{path_name}"
  44. else
  45. 12 open {|container| container.read(path_name)}
  46. end
  47. end
  48. end
  49. end
  50. end
  51. end

lib/epub/ocf/physical_container/unpacked_directory.rb

100.0% lines covered

9 relevant lines. 9 lines covered and 0 lines missed.
    
  1. 1 module EPUB
  2. 1 class OCF
  3. 1 class PhysicalContainer
  4. 1 class UnpackedDirectory < self
  5. 1 def open
  6. 8 yield self
  7. end
  8. 1 def read(path_name)
  9. 24 ::File.read(::File.join(@container_path, path_name))
  10. rescue ::Errno::ENOENT => error
  11. 8 raise NoEntry.from_error(error)
  12. end
  13. end
  14. end
  15. end
  16. end

lib/epub/ocf/physical_container/unpacked_uri.rb

83.33% lines covered

12 relevant lines. 10 lines covered and 2 lines missed.
    
  1. 1 require 'open-uri'
  2. 1 module EPUB
  3. 1 class OCF
  4. 1 class PhysicalContainer
  5. 1 class UnpackedURI < self
  6. # EPUB URI: http://example.net/path/to/book/
  7. # container.xml: http://example.net/path/to/book/META-INF/container.xml
  8. # @param [URI, String] container_path URI of EPUB container's root directory.
  9. # For exapmle, <code>"http://example.net/path/to/book/"</code>, which
  10. # should contain <code>"http://example.net/path/to/book/META-INF/container.xml"</code> as its container.xml file. Note that this should end with "/"(slash).
  11. 1 def initialize(container_path)
  12. 2 super(URI(container_path))
  13. end
  14. 1 def open
  15. yield self
  16. end
  17. 1 def read(path_name)
  18. 1 (@container_path + path_name).read
  19. rescue ::OpenURI::HTTPError => error
  20. raise NoEntry.from_error(error)
  21. end
  22. end
  23. end
  24. end
  25. end

lib/epub/ocf/physical_container/zipruby.rb

88.89% lines covered

18 relevant lines. 16 lines covered and 2 lines missed.
    
  1. 1 require 'zipruby'
  2. 1 module EPUB
  3. 1 class OCF
  4. 1 class PhysicalContainer
  5. 1 class Zipruby < self
  6. 1 def open
  7. 5 Zip::Archive.open @container_path do |archive|
  8. 5 @monitor.synchronize do
  9. begin
  10. 5 @archive = archive
  11. 5 yield self
  12. rescue ::Zip::Error => error
  13. raise NoEntry.from_error(error)
  14. ensure
  15. 5 @archive = nil
  16. end
  17. end
  18. end
  19. end
  20. 1 def read(path_name)
  21. 5 if @archive
  22. 8 @archive.fopen(path_name) {|entry| entry.read}
  23. else
  24. 2 open {|container| container.read(path_name)}
  25. end
  26. rescue ::Zip::Error => error
  27. raise NoEntry.from_error(error)
  28. ensure
  29. 5 @archive = nil
  30. end
  31. end
  32. end
  33. end
  34. end

lib/epub/ocf/rights.rb

100.0% lines covered

3 relevant lines. 3 lines covered and 0 lines missed.
    
  1. 1 module EPUB
  2. 1 class OCF
  3. 1 class Rights
  4. end
  5. end
  6. end

lib/epub/ocf/signatures.rb

100.0% lines covered

3 relevant lines. 3 lines covered and 0 lines missed.
    
  1. 1 module EPUB
  2. 1 class OCF
  3. 1 class Signatures
  4. end
  5. end
  6. end

lib/epub/parser.rb

86.96% lines covered

46 relevant lines. 40 lines covered and 6 lines missed.
    
  1. 1 require 'epub'
  2. 1 require 'epub/constants'
  3. 1 require 'epub/book'
  4. 1 module EPUB
  5. 1 class Parser
  6. 1 class << self
  7. # Parse an EPUB file
  8. #
  9. # @example
  10. # EPUB::Parser.parse('path/to/book.epub') # => EPUB::Book object
  11. #
  12. # @example
  13. # class MyBook
  14. # include EPUB::Book::Feature
  15. # end
  16. # book = MyBook.new
  17. # parsed_book = EPUB::Parser.parse('path/to/book.epub', book: book) # => #<MyBook:0x000000019760e8 @epub_file=..>
  18. # parsed_book.equal? book # => true
  19. #
  20. # @example
  21. # book = EPUB::Parser.parse('path/to/book.epub', class: MyBook) # => #<MyBook:0x000000019b0568 @epub_file=...>
  22. # book.instance_of? MyBook # => true
  23. #
  24. # @param [String] filepath
  25. # @param [Hash] options the type of return is specified by this argument.
  26. # If no options, returns {EPUB::Book} object.
  27. # For details of options, see below.
  28. # @option options [EPUB] :book instance of class which includes {EPUB} module
  29. # @option options [Class] :class class which includes {EPUB} module
  30. # @option options [EPUB::OCF::PhysicalContainer, Symbol] :container_adapter OCF physical container adapter to use when parsing EPUB container
  31. # When class passed, it is used. When symbol passed, it is considered as subclass name of {EPUB::OCF::PhysicalContainer}.
  32. # If omitted, {EPUB::OCF::PhysicalContainer.adapter} is used.
  33. # @return [EPUB] object which is an instance of class including {EPUB} module.
  34. # When option :book passed, returns the same object whose attributes about EPUB are set.
  35. # When option :class passed, returns the instance of the class.
  36. # Otherwise returns {EPUB::Book} object.
  37. 1 def parse(filepath, container_adapter: nil, book: nil, initialize_with: nil, **options)
  38. 22 new(filepath, container_adapter: container_adapter, book: book, initialize_with: initialize_with, **options).parse
  39. end
  40. end
  41. 1 def initialize(filepath, container_adapter: nil, book: nil, initialize_with: nil, **options)
  42. 29 if filepath.to_s.encoding == Encoding::ASCII_8BIT
  43. # On Windows and macOS, encoding of file name is set by Ruby,
  44. # but on UNIX, always is ASCII-8BIT
  45. # See https://docs.ruby-lang.org/ja/2.7.0/class/IO.html
  46. filepath = filepath.to_s.dup
  47. require "nkf"
  48. filepath.force_encoding NKF.guess(filepath)
  49. end
  50. 29 path_is_uri = (container_adapter == EPUB::OCF::PhysicalContainer::UnpackedURI or
  51. container_adapter == :UnpackedURI or
  52. EPUB::OCF::PhysicalContainer.adapter == EPUB::OCF::PhysicalContainer::UnpackedURI)
  53. raise "File #{filepath} not found" if
  54. 29 !path_is_uri and !File.exist?(filepath)
  55. 29 @filepath = path_is_uri ? filepath : File.realpath(filepath)
  56. 29 @book = create_book(book: book, initialize_with: initialize_with, **options)
  57. 29 if path_is_uri
  58. @book.container_adapter = :UnpackedURI
  59. 29 elsif File.directory? @filepath
  60. 2 @book.container_adapter = :UnpackedDirectory
  61. end
  62. 29 @book.epub_file = @filepath
  63. 29 if options[:container_adapter]
  64. @book.container_adapter = options[:container_adapter]
  65. end
  66. end
  67. 1 def parse
  68. 27 @book.container_adapter.open @filepath do |container|
  69. 27 @book.ocf = OCF.parse(container)
  70. 27 @book.ocf.container.rootfiles.each {|rootfile|
  71. 81 package = Publication.parse(container, rootfile.full_path.to_s)
  72. 81 rootfile.package = package
  73. 81 @book.packages << package
  74. 81 package.book = @book
  75. }
  76. end
  77. 27 @book
  78. end
  79. 1 private
  80. 1 def create_book(book: nil, initialize_with: nil, **params)
  81. case
  82. 29 when book
  83. 2 book
  84. when params[:class]
  85. 2 if initialize_with
  86. params[:class].new initialize_with
  87. else
  88. 2 params[:class].new
  89. end
  90. else
  91. 25 Book.new
  92. end
  93. end
  94. end
  95. end
  96. 1 require 'epub/parser/version'
  97. 1 require 'epub/parser/xml_document'
  98. 1 require 'epub/parser/ocf'
  99. 1 require 'epub/parser/publication'
  100. 1 require 'epub/parser/content_document'

lib/epub/parser/content_document.rb

94.83% lines covered

58 relevant lines. 55 lines covered and 3 lines missed.
    
  1. 1 require 'epub/content_document'
  2. 1 require 'epub/constants'
  3. 1 require 'epub/parser/xml_document'
  4. 1 module EPUB
  5. 1 class Parser
  6. 1 class ContentDocument
  7. 1 using XMLDocument::Refinements
  8. # @param [EPUB::Publication::Package::Manifest::Item] item
  9. 1 def initialize(item)
  10. 19 @item = item
  11. end
  12. 1 def parse
  13. 17 content_document = case @item.media_type
  14. when 'application/xhtml+xml'
  15. 17 if @item.nav?
  16. 11 EPUB::ContentDocument::Navigation.new
  17. else
  18. 6 EPUB::ContentDocument::XHTML.new
  19. end
  20. when 'image/svg+xml'
  21. EPUB::ContentDocument::SVG.new
  22. else
  23. nil
  24. end
  25. 17 return content_document if content_document.nil?
  26. 17 content_document.item = @item
  27. 17 document = XMLDocument.new(@item.read)
  28. # parse_content_document(document)
  29. 17 if @item.nav?
  30. 11 content_document.navigations = parse_navigations(document)
  31. end
  32. 17 content_document
  33. end
  34. # @param [XMLDocument, REXML::Document, Oga::XML::Document, Nokogiri::HTML::Document] document HTML document or element including nav
  35. # @return [Array<EPUB::ContentDocument::Navigation::Navigation>] navs array of Navigation object
  36. 1 def parse_navigations(document)
  37. 34 document.each_element_by_xpath('/xhtml:html/xhtml:body//xhtml:nav', EPUB::NAMESPACES).collect {|elem| parse_navigation elem}
  38. end
  39. # @param [REXML::Element, Oga::XML::Element, Nokogiri::XML::Element] element nav element
  40. # @return [EPUB::ContentDocument::Navigation::Navigation] nav Navigation object
  41. 1 def parse_navigation(element)
  42. 22 nav = EPUB::ContentDocument::Navigation::Navigation.new
  43. 22 nav.text = find_heading(element)
  44. 22 hidden = element.attribute_with_prefix('hidden')
  45. 22 nav.hidden = hidden.nil? ? nil : true
  46. 22 nav.types = element.attribute_with_prefix('type', 'epub')&.split(/\s+/)
  47. 22 element.each_element_by_xpath('./xhtml:ol/xhtml:li', EPUB::NAMESPACES).map do |elem|
  48. 42 nav.items << parse_navigation_item(elem)
  49. end
  50. 22 nav
  51. end
  52. # @param [REXML::Element, Oga::XML::Element, Nokogiri::XML::Element] element li element
  53. 1 def parse_navigation_item(element)
  54. 82 item = EPUB::ContentDocument::Navigation::Item.new
  55. 82 a_or_span = element.each_element_by_xpath('./xhtml:a[1]|xhtml:span[1]', EPUB::NAMESPACES).first
  56. 82 return a_or_span if a_or_span.nil?
  57. 82 item.text = a_or_span.content
  58. 82 item.types = a_or_span.attribute_with_prefix('type', 'epub')&.split(/\s+/)
  59. 82 if a_or_span.name == 'a'
  60. 72 if item.text.empty?
  61. 20 embedded_content = a_or_span.each_element_by_xpath('./xhtml:audio[1]|xhtml:canvas[1]|xhtml:embed[1]|xhtml:iframe[1]|xhtml:img[1]|xhtml:math[1]|xhtml:object[1]|xhtml:svg[1]|xhtml:video[1]', EPUB::NAMESPACES).first
  62. 20 unless embedded_content.nil?
  63. 20 case embedded_content.name
  64. when 'audio', 'canvas', 'embed', 'iframe'
  65. 10 item.text = embedded_content.attribute_with_prefix('name') || embedded_content.attribute_with_prefix('srcdoc')
  66. when 'img'
  67. 10 item.text = embedded_content.attribute_with_prefix('alt')
  68. when 'math', 'object'
  69. item.text = embedded_content.attribute_with_prefix('name')
  70. when 'svg', 'video'
  71. else
  72. end
  73. end
  74. 20 item.text = a_or_span.attribute_with_prefix('title').to_s if item.text.nil? || item.text.empty?
  75. end
  76. 72 item.href = a_or_span.attribute_with_prefix('href')
  77. 72 item.item = @item.find_item_by_relative_iri(item.href)
  78. end
  79. 122 item.items = element.each_element_by_xpath('./xhtml:ol[1]/xhtml:li', EPUB::NAMESPACES).map {|li| parse_navigation_item(li)}
  80. 82 item
  81. end
  82. 1 private
  83. # @param [REXML::Element, Oga::XML::Element, Nokogiri::XML::Element] element nav element
  84. # @return [String] heading heading text
  85. 1 def find_heading(element)
  86. 22 heading = element.each_element_by_xpath('./xhtml:h1|xhtml:h2|xhtml:h3|xhtml:h4|xhtml:h5|xhtml:h6|xhtml:hgroup', EPUB::NAMESPACES).first
  87. 22 return nil if heading.nil?
  88. 20 return heading.content unless heading.name == 'hgroup'
  89. 10 (heading.each_element_by_xpath(".//xhtml:h1", EPUB::NAMESPACES) ||
  90. heading.each_element_by_xpath(".//xhtml:h2", EPUB::NAMESPACES) ||
  91. heading.each_element_by_xpath(".//xhtml:h3", EPUB::NAMESPACES) ||
  92. heading.each_element_by_xpath(".//xhtml:h4", EPUB::NAMESPACES) ||
  93. heading.each_element_by_xpath(".//xhtml:h5", EPUB::NAMESPACES) ||
  94. heading.each_element_by_xpath(".//xhtml:h6", EPUB::NAMESPACES)).first.content
  95. end
  96. end
  97. end
  98. end

lib/epub/parser/metadata.rb

93.85% lines covered

65 relevant lines. 61 lines covered and 4 lines missed.
    
  1. 1 module EPUB
  2. 1 class Parser
  3. 1 module Metadata
  4. 1 using XMLDocument::Refinements
  5. 1 def parse_metadata(elem, unique_identifier_id, default_namespace)
  6. 127 metadata = EPUB::Publication::Package::Metadata.new
  7. 127 id_map = {}
  8. 127 default_namespace_uri = EPUB::NAMESPACES[default_namespace]
  9. 127 elem.each_element do |child|
  10. 2208 elem_name = child.name
  11. model =
  12. 2208 case child.namespace_uri
  13. when EPUB::NAMESPACES['dc']
  14. 847 case elem_name
  15. when 'identifier'
  16. 171 identifier = build_model(child, :Identifier, ['id'])
  17. 171 metadata.identifiers << identifier
  18. 171 identifier.scheme = child.attribute_with_prefix('scheme', 'opf')
  19. 171 identifier
  20. when 'title'
  21. 274 title = build_model(child, :Title)
  22. 274 metadata.titles << title
  23. 274 title
  24. when 'language'
  25. 98 language = build_model(child, :DCMES, ['id'])
  26. 98 metadata.languages << language
  27. 98 language
  28. when 'title', 'contributor', 'coverage', 'creator', 'date', 'description', 'format', 'publisher', 'relation', 'source', 'subject', 'rights', 'type'
  29. 304 attr = elem_name == 'rights' ? elem_name : elem_name + 's'
  30. 304 dcmes = build_model(child)
  31. 304 metadata.__send__(attr) << dcmes
  32. 304 dcmes
  33. else
  34. build_unsupported_model(child)
  35. end
  36. when default_namespace_uri
  37. 1361 case elem_name
  38. when 'meta'
  39. 1229 meta = build_model(child, :Meta, %w[property id scheme content name])
  40. 1229 metadata.metas << meta
  41. 1229 meta
  42. when 'link'
  43. 132 link = build_model(child, :Link, %w[id media-type])
  44. 132 metadata.links << link
  45. 132 link.href = child.attribute_with_prefix('href')
  46. 132 link.rel = Set.new(child.attribute_with_prefix('rel').split(/\s+/))
  47. 132 link
  48. else
  49. build_unsupported_model(child)
  50. end
  51. else
  52. build_unsupported_model(child)
  53. end
  54. 2208 metadata.children << model
  55. 2208 if model.kind_of?(EPUB::Metadata::Identifier) &&
  56. model.id == unique_identifier_id
  57. 127 metadata.unique_identifier = model
  58. end
  59. 2208 if model.respond_to?(:id) && model.id
  60. 651 id_map[model.id] = {refinee: model}
  61. end
  62. 2208 refines = child.attribute_with_prefix('refines')
  63. 2208 if refines && refines.start_with?('#')
  64. 768 id = refines[1..-1]
  65. 768 id_map[id] ||= {}
  66. 768 id_map[id][:refiners] ||= []
  67. 768 id_map[id][:refiners] << model
  68. end
  69. end
  70. 127 id_map.values.each do |hsh|
  71. 651 next unless hsh[:refiners]
  72. 460 next unless hsh[:refinee]
  73. 1228 hsh[:refiners].each {|meta| meta.refines = hsh[:refinee]}
  74. end
  75. 127 metadata
  76. end
  77. 1 def build_model(elem, klass=:DCMES, attributes=%w[id lang dir])
  78. 2208 model = EPUB::Metadata.const_get(klass).new
  79. 2208 attributes.each do |attr|
  80. 8412 writer_name = (attr == "content") ? "meta_content=" : "#{attr.gsub('-', '_')}="
  81. 8412 namespace = (attr == "lang") ? "xml" : nil
  82. 8412 model.__send__ writer_name, elem.attribute_with_prefix(attr, namespace)
  83. end
  84. 2208 model.content = elem.content unless klass == :Link
  85. 2208 model.content.strip! if klass == :Identifier
  86. 2208 model
  87. end
  88. 1 def build_unsupported_model(elem)
  89. EPUB::Metadata::UnsupportedModel.new(elem)
  90. end
  91. end
  92. end
  93. end

lib/epub/parser/ocf.rb

94.23% lines covered

52 relevant lines. 49 lines covered and 3 lines missed.
    
  1. 1 require 'epub/constants'
  2. 1 require 'epub/ocf'
  3. 1 require 'epub/ocf/physical_container'
  4. 1 require 'epub/parser/metadata'
  5. 1 require "epub/parser/xml_document"
  6. 1 module EPUB
  7. 1 class Parser
  8. 1 class OCF
  9. 1 using XMLDocument::Refinements
  10. 1 include Metadata
  11. 1 DIRECTORY = 'META-INF'
  12. 1 class << self
  13. 1 def parse(container)
  14. 27 new(container).parse
  15. end
  16. end
  17. 1 def initialize(container)
  18. 36 @container = container
  19. 36 @ocf = EPUB::OCF.new
  20. end
  21. 1 def parse
  22. 28 EPUB::OCF::MODULES.each do |m|
  23. begin
  24. 168 data = @container.read(File.join(DIRECTORY, "#{m}.xml"))
  25. 56 @ocf.__send__ "#{m}=", __send__("parse_#{m}", data)
  26. rescue EPUB::OCF::PhysicalContainer::NoEntry
  27. end
  28. end
  29. 28 @ocf
  30. end
  31. 1 def parse_container(xml)
  32. 33 container = EPUB::OCF::Container.new
  33. 33 doc = XMLDocument.new(xml)
  34. 33 doc.each_element_by_xpath "/ocf:container/ocf:rootfiles/ocf:rootfile", EPUB::NAMESPACES do |elem|
  35. 99 rootfile = EPUB::OCF::Container::Rootfile.new
  36. 99 rootfile.full_path = Addressable::URI.parse(elem.attribute_with_prefix('full-path'))
  37. 99 rootfile.media_type = elem.attribute_with_prefix('media-type')
  38. 99 container.rootfiles << rootfile
  39. end
  40. 33 container
  41. end
  42. 1 def parse_encryption(content)
  43. 1 encryption = EPUB::OCF::Encryption.new
  44. 1 encryption.content = content
  45. 1 encryption
  46. end
  47. 1 def parse_manifest(content)
  48. warn "Not implemented: #{self.class}##{__method__}" if $VERBOSE
  49. end
  50. 1 def parse_metadata(content)
  51. 30 doc = XMLDocument.new(content)
  52. 30 unless multiple_rendition_metadata?(doc)
  53. 1 warn "Not implemented: #{self.class}##{__method__}" if $VERBOSE
  54. 1 metadata = EPUB::OCF::UnknownFormatMetadata.new
  55. 1 metadata.content = content
  56. 1 return metadata
  57. end
  58. 29 super(doc.root, doc.root.attribute_with_prefix('unique-identifier'), 'metadata')
  59. end
  60. 1 def parse_rights(content)
  61. warn "Not implemented: #{self.class}##{__method__}" if $VERBOSE
  62. end
  63. 1 def parse_signatures(content)
  64. warn "Not implemented: #{self.class}##{__method__}" if $VERBOSE
  65. end
  66. 1 private
  67. 1 def multiple_rendition_metadata?(doc)
  68. 30 doc.root &&
  69. doc.root.name == 'metadata' &&
  70. doc.root.namespaces['xmlns'] == EPUB::NAMESPACES['metadata']
  71. end
  72. end
  73. end
  74. end

lib/epub/parser/publication.rb

98.92% lines covered

93 relevant lines. 92 lines covered and 1 lines missed.
    
  1. 1 require 'strscan'
  2. 1 require 'epub/publication'
  3. 1 require 'epub/constants'
  4. 1 require 'epub/parser/metadata'
  5. 1 module EPUB
  6. 1 class Parser
  7. 1 class Publication
  8. 1 using XMLDocument::Refinements
  9. 1 include Metadata
  10. 1 class << self
  11. 1 def parse(container, file)
  12. 81 opf = container.read(Addressable::URI.unencode(file))
  13. 81 new(opf).parse
  14. end
  15. end
  16. 1 def initialize(opf)
  17. 125 @doc = XMLDocument.new(opf)
  18. end
  19. 1 def parse
  20. 90 package = parse_package(@doc)
  21. 90 (EPUB::Publication::Package::CONTENT_MODELS - [:bindings]).each do |model|
  22. 360 package.__send__ "#{model}=", __send__("parse_#{model}", @doc)
  23. end
  24. 90 package.bindings = parse_bindings(@doc, package.manifest)
  25. 90 package
  26. end
  27. 1 def parse_package(doc)
  28. 125 package = EPUB::Publication::Package.new
  29. 125 elem = doc.root
  30. 125 %w[version xml:lang dir id].each do |attr|
  31. 500 package.__send__ "#{attr.gsub(/\:/, '_')}=", elem.attribute_with_prefix(attr)
  32. end
  33. 125 package.prefix = parse_prefix(elem.attribute_with_prefix('prefix'))
  34. 125 EPUB::Publication.__send__ :include, EPUB::Publication::FixedLayout if package.prefix.key? EPUB::Publication::FixedLayout::PREFIX_KEY
  35. 125 package
  36. end
  37. 1 def parse_metadata(doc)
  38. 98 super(doc.each_element_by_xpath('/opf:package/opf:metadata', EPUB::NAMESPACES).first, doc.root.attribute_with_prefix('unique-identifier'), 'opf')
  39. end
  40. 1 def parse_manifest(doc)
  41. 110 manifest = EPUB::Publication::Package::Manifest.new
  42. 110 elem = doc.each_element_by_xpath('/opf:package/opf:manifest', EPUB::NAMESPACES).first
  43. 110 manifest.id = elem.attribute_with_prefix('id')
  44. 110 fallback_map = {}
  45. 110 elem.each_element_by_xpath('./opf:item', EPUB::NAMESPACES).each do |e|
  46. 1199 item = EPUB::Publication::Package::Manifest::Item.new
  47. 1199 %w[id media-type media-overlay].each do |attr|
  48. 3597 item.__send__ "#{attr.gsub(/-/, '_')}=", e.attribute_with_prefix(attr)
  49. end
  50. 1199 item.href = e.attribute_with_prefix('href')
  51. 1199 fallback = e.attribute_with_prefix('fallback')
  52. 1199 fallback_map[fallback] = item if fallback
  53. 1199 properties = e.attribute_with_prefix('properties')
  54. 1199 item.properties = properties.split(' ') if properties
  55. 1199 manifest << item
  56. end
  57. 110 fallback_map.each_pair do |id, from|
  58. 112 from.fallback = manifest[id]
  59. end
  60. 110 manifest
  61. end
  62. 1 def parse_spine(doc)
  63. 90 spine = EPUB::Publication::Package::Spine.new
  64. 90 elem = doc.each_element_by_xpath('/opf:package/opf:spine', EPUB::NAMESPACES).first
  65. 90 %w[id toc page-progression-direction].each do |attr|
  66. 270 spine.__send__ "#{attr.gsub(/-/, '_')}=", elem.attribute_with_prefix(attr)
  67. end
  68. 90 elem.each_element_by_xpath('./opf:itemref', EPUB::NAMESPACES).each do |e|
  69. 288 itemref = EPUB::Publication::Package::Spine::Itemref.new
  70. 288 %w[idref id].each do |attr|
  71. 576 itemref.__send__ "#{attr}=", e.attribute_with_prefix(attr)
  72. end
  73. 288 itemref.linear = (e.attribute_with_prefix('linear') != 'no')
  74. 288 properties = e.attribute_with_prefix('properties')
  75. 288 itemref.properties = properties.split(' ') if properties
  76. 288 spine << itemref
  77. end
  78. 90 spine
  79. end
  80. 1 def parse_guide(doc)
  81. 93 guide = EPUB::Publication::Package::Guide.new
  82. 93 doc.each_element_by_xpath '/opf:package/opf:guide/opf:reference', EPUB::NAMESPACES do |ref|
  83. 39 reference = EPUB::Publication::Package::Guide::Reference.new
  84. 39 %w[type title].each do |attr|
  85. 78 reference.__send__ "#{attr}=", ref.attribute_with_prefix(attr)
  86. end
  87. 39 reference.href = ref.attribute_with_prefix('href')
  88. 39 guide << reference
  89. end
  90. 93 guide
  91. end
  92. 1 def parse_bindings(doc, handler_map)
  93. 97 bindings = EPUB::Publication::Package::Bindings.new
  94. 97 doc.each_element_by_xpath '/opf:package/opf:bindings/opf:mediaType', EPUB::NAMESPACES do |elem|
  95. 43 media_type = EPUB::Publication::Package::Bindings::MediaType.new
  96. 43 media_type.media_type = elem.attribute_with_prefix('media-type')
  97. 43 media_type.handler = handler_map[elem.attribute_with_prefix('handler')]
  98. 43 bindings << media_type
  99. end
  100. 97 bindings
  101. end
  102. 1 def parse_prefix(str)
  103. 125 prefixes = {}
  104. 125 return prefixes if str.nil? or str.empty?
  105. 124 scanner = StringScanner.new(str)
  106. 124 scanner.scan /\s*/
  107. 124 while prefix = scanner.scan(/[^\:\s]+/)
  108. 193 scanner.scan /[\:\s]+/
  109. 193 iri = scanner.scan(/[^\s]+/)
  110. 193 if iri.nil? or iri.empty?
  111. warn "no IRI detected for prefix `#{prefix}`"
  112. else
  113. 193 prefixes[prefix] = iri
  114. end
  115. 193 scanner.scan /\s*/
  116. end
  117. 124 prefixes
  118. end
  119. end
  120. end
  121. end

lib/epub/parser/xml_document.rb

100.0% lines covered

13 relevant lines. 13 lines covered and 0 lines missed.
    
  1. 1 module EPUB
  2. 1 class Parser
  3. 1 class XMLDocument
  4. 1 class << self
  5. 1 attr_accessor :backend
  6. 1 def new(xml)
  7. 259 case backend
  8. when :Oga
  9. 65 Oga.parse_xml(xml)
  10. when :Nokogiri
  11. 186 Nokogiri.XML(xml)
  12. else
  13. 8 REXML::Document.new(xml)
  14. end
  15. end
  16. end
  17. end
  18. end
  19. end
  20. 1 %i[Nokogiri Oga REXML].each do |backend|
  21. begin
  22. 3 require "epub/parser/xml_document/refinements/#{backend.downcase}"
  23. 3 EPUB::Parser::XMLDocument.backend ||= backend
  24. rescue LoadError
  25. end
  26. end

lib/epub/parser/xml_document/refinements/nokogiri.rb

100.0% lines covered

17 relevant lines. 17 lines covered and 0 lines missed.
    
  1. 1 require "nokogiri"
  2. 1 module EPUB
  3. 1 class Parser
  4. 1 class XMLDocument
  5. 1 module Refinements
  6. 1 module Nokogiri
  7. 1 refine ::Nokogiri::XML::Node do
  8. 1 def each_element_by_xpath(xpath, namespaces = nil, &block)
  9. 637 xpath(xpath, namespaces).each &block
  10. end
  11. 1 def attribute_with_prefix(name, prefix = nil)
  12. 15669 attribute_with_ns(name, EPUB::NAMESPACES[prefix])&.value
  13. end
  14. 1 def each_element(xpath = nil, &block)
  15. 95 element_children.each(&block)
  16. end
  17. 1 alias elements element_children
  18. 1 def namespace_uri
  19. 1600 namespace.href
  20. end
  21. end
  22. end
  23. 1 include Nokogiri
  24. end
  25. end
  26. end
  27. end

lib/epub/parser/xml_document/refinements/oga.rb

100.0% lines covered

37 relevant lines. 37 lines covered and 0 lines missed.
    
  1. 1 require "oga"
  2. 1 module EPUB
  3. 1 class Parser
  4. 1 class XMLDocument
  5. 1 module Refinements
  6. 1 module Oga
  7. 1 [::Oga::XML::Document, ::Oga::XML::Node].each do |klass|
  8. 2 refine klass do
  9. [
  10. 2 [:document, ::Oga::XML::Document],
  11. [:element, ::Oga::XML::Element],
  12. [:text, ::Oga::XML::Text]
  13. ].each do |(type, klass)|
  14. 6 define_method "#{type}?" do
  15. 1044 kind_of? klass
  16. end
  17. end
  18. 2 def root
  19. # Couldn't use find(&:element?) for Rubies under 2.6
  20. 215 root_node.children.find {|child| child.element?}
  21. end
  22. 2 def elements
  23. # Couldn't use find(&:element?) for Rubies under 2.6
  24. 548 children.select {|child| child.element?}
  25. end
  26. # Need for Rubies under 2.6
  27. 2 def respond_to?(name, include_all = false)
  28. 15 [:root, :elements].include?(name) || super
  29. end
  30. 2 def each_element_by_xpath(xpath, namespaces = nil, &block)
  31. 286 xpath(xpath, namespaces: namespaces).each &block
  32. end
  33. end
  34. end
  35. 1 refine ::Oga::XML::Element do
  36. 1 def attribute_with_prefix(name, prefix = nil)
  37. 5047 name = prefix ? "#{prefix}:#{name}" : name
  38. 5047 get(name)
  39. end
  40. 1 def each_element(xpath = nil, &block)
  41. 27 each_node do |node|
  42. 1529 throw :skip_children unless node.kind_of?(::Oga::XML::Element)
  43. 516 block.call node
  44. end
  45. end
  46. 1 def namespace_uri
  47. 516 namespace&.uri
  48. end
  49. 1 alias original_namespaces namespaces
  50. 1 def namespaces
  51. 5 original_namespaces.each_with_object({}) {|(name, namespace), nss|
  52. 10 nss[name] = namespace.uri
  53. }
  54. end
  55. 1 alias content text
  56. end
  57. 1 refine ::Oga::XML::Text do
  58. 1 alias content text
  59. end
  60. end
  61. 1 include Oga
  62. end
  63. end
  64. end
  65. end

lib/epub/parser/xml_document/refinements/rexml.rb

96.0% lines covered

25 relevant lines. 24 lines covered and 1 lines missed.
    
  1. 1 require "rexml/document"
  2. 1 module EPUB
  3. 1 class Parser
  4. 1 class XMLDocument
  5. 1 module Refinements
  6. 1 module REXML
  7. 1 [::REXML::Element, ::REXML::Text].each do |klass|
  8. 2 refine klass do
  9. 2 %i[document element text].each do |type|
  10. 6 define_method "#{type}?" do
  11. node_type == type
  12. end
  13. end
  14. end
  15. end
  16. 1 refine ::REXML::Element do
  17. 1 def each_element_by_xpath(xpath, namespaces = nil, &block)
  18. 54 ::REXML::XPath.each self, xpath, namespaces, &block
  19. end
  20. 1 def attribute_with_prefix(name, prefix = nil)
  21. 859 attribute(name, EPUB::NAMESPACES[prefix])&.value
  22. end
  23. 1 alias namespace_uri namespace
  24. 1 def content
  25. 98 each_child.inject("") {|text, node|
  26. 94 case node.node_type
  27. when :document, :element
  28. 2 text << node.content
  29. when :text
  30. 92 text << node.value
  31. end
  32. }
  33. end
  34. end
  35. 1 refine ::REXML::Text do
  36. 1 alias content value
  37. end
  38. end
  39. 1 include REXML
  40. end
  41. end
  42. end
  43. end

lib/epub/publication.rb

100.0% lines covered

2 relevant lines. 2 lines covered and 0 lines missed.
    
  1. 1 require 'epub/publication/package'
  2. 1 require 'epub/publication/fixed_layout'

lib/epub/publication/fixed_layout.rb

100.0% lines covered

116 relevant lines. 116 lines covered and 0 lines missed.
    
  1. 1 module EPUB
  2. 1 module Publication
  3. 1 module FixedLayout
  4. 1 PREFIX_KEY = 'rendition'.freeze
  5. 1 PREFIX_VALUE = 'http://www.idpf.org/vocab/rendition/#'.freeze
  6. RENDITION_PROPERTIES = {
  7. 1 'layout' => ['reflowable'.freeze, 'pre-paginated'.freeze].freeze,
  8. 'orientation' => ['auto'.freeze, 'landscape'.freeze, 'portrait'.freeze].freeze,
  9. 'spread' => ['auto'.freeze, 'none'.freeze, 'landscape'.freeze, 'portrait'.freeze, 'both'.freeze].freeze
  10. }.freeze
  11. 1 class UnsupportedRenditionValue < StandardError; end
  12. 1 class << self
  13. 1 def included(package_class)
  14. [
  15. 2 [Package, PackageMixin],
  16. [Package::Metadata, MetadataMixin],
  17. [Package::Spine::Itemref, ItemrefMixin],
  18. [Package::Manifest::Item, ItemMixin],
  19. [ContentDocument::XHTML, ContentDocumentMixin],
  20. ].each do |(base, mixin)|
  21. 10 base.__send__ :include, mixin
  22. end
  23. end
  24. end
  25. 1 module Rendition
  26. # @note Call after defining #rendition_xxx and #renditionn_xxx=
  27. 1 def def_rendition_methods
  28. 4 RENDITION_PROPERTIES.each_key do |property|
  29. 12 alias_method property, "rendition_#{property}"
  30. 12 alias_method "#{property}=", "rendition_#{property}="
  31. end
  32. 4 def_rendition_layout_methods
  33. end
  34. 1 def def_rendition_layout_methods
  35. 4 property = 'layout'
  36. 4 RENDITION_PROPERTIES[property].each do |value|
  37. 8 method_name_base = value.gsub('-', '_')
  38. 8 writer_name = "#{method_name_base}="
  39. 8 define_method writer_name do |new_value|
  40. 4 new_prop = new_value ? value : values.find {|l| l != value}
  41. 4 __send__ "rendition_#{property}=", new_prop
  42. end
  43. 8 maker_name = "make_#{method_name_base}"
  44. 8 define_method maker_name do
  45. 5 __send__ "rendition_#{property}=", value
  46. end
  47. 8 destructive_method_name = "#{method_name_base}!"
  48. 8 alias_method destructive_method_name, maker_name
  49. 8 predicate_name = "#{method_name_base}?"
  50. 8 define_method predicate_name do
  51. 13 __send__("rendition_#{property}") == value
  52. end
  53. end
  54. end
  55. end
  56. 1 module PackageMixin
  57. # @return [true, false]
  58. 1 def using_fixed_layout
  59. 4 prefix.has_key? PREFIX_KEY and
  60. prefix[PREFIX_KEY] == PREFIX_VALUE
  61. end
  62. 1 alias using_fixed_layout? using_fixed_layout
  63. # @param using_fixed_layout [true, false]
  64. 1 def using_fixed_layout=(using_fixed_layout)
  65. 2 if using_fixed_layout
  66. 1 prefix[PREFIX_KEY] = PREFIX_VALUE
  67. else
  68. 1 prefix.delete PREFIX_KEY
  69. end
  70. end
  71. end
  72. 1 module MetadataMixin
  73. 1 extend Rendition
  74. 1 RENDITION_PROPERTIES.each_pair do |property, values|
  75. 3 define_method "rendition_#{property}" do
  76. 45 meta = metas.find {|m| m.property == "rendition:#{property}"}
  77. 27 meta ? meta.content : values.first
  78. end
  79. 3 define_method "rendition_#{property}=" do |new_value|
  80. 14 raise UnsupportedRenditionValue, new_value unless values.include? new_value
  81. 13 prefixed_property = "rendition:#{property}"
  82. 13 values_to_be_deleted = values - [new_value]
  83. 21 metas.delete_if {|meta| meta.property == prefixed_property && values_to_be_deleted.include?(meta.content)}
  84. 14 unless metas.any? {|meta| meta.property == prefixed_property && meta.content == new_value}
  85. 12 meta = Package::Metadata::Meta.new
  86. 12 meta.property = prefixed_property
  87. 12 meta.content = new_value
  88. 12 metas << meta
  89. end
  90. 13 new_value
  91. end
  92. end
  93. 1 def_rendition_methods
  94. end
  95. 1 module ItemrefMixin
  96. 1 extend Rendition
  97. 1 PAGE_SPREAD_PROPERTY = 'center'
  98. 1 PAGE_SPREAD_PREFIX = 'rendition:page-spread-'
  99. 1 class << self
  100. # @todo Define using Module#prepend after Ruby 2.0 will become popular
  101. 1 def included(base)
  102. 2 return if base.instance_methods.include? :page_spread_without_fixed_layout
  103. 1 base.__send__ :alias_method, :page_spread_without_fixed_layout, :page_spread
  104. 1 base.__send__ :alias_method, :page_spread_writer_without_fixed_layout, :page_spread=
  105. 1 prefixed_page_spread_property = "#{PAGE_SPREAD_PREFIX}#{PAGE_SPREAD_PROPERTY}"
  106. 1 base.__send__ :define_method, :page_spread do
  107. 3 property = page_spread_without_fixed_layout
  108. 3 return property if property
  109. 2 properties.include?(prefixed_page_spread_property) ? PAGE_SPREAD_PROPERTY : nil
  110. end
  111. 1 base.__send__ :define_method, :page_spread= do |new_value|
  112. 6 if new_value == PAGE_SPREAD_PROPERTY
  113. 2 page_spread_writer_without_fixed_layout nil
  114. 2 properties << prefixed_page_spread_property
  115. else
  116. 4 page_spread_writer_without_fixed_layout new_value
  117. end
  118. 6 new_value
  119. end
  120. end
  121. end
  122. 1 RENDITION_PROPERTIES.each do |property, values|
  123. 3 rendition_property_prefix = "rendition:#{property}-"
  124. 3 reader_name = "rendition_#{property}"
  125. 3 define_method reader_name do
  126. 27 prop_value = properties.find {|prop| prop.start_with? rendition_property_prefix}
  127. 16 prop_value ? prop_value.gsub(/\A#{Regexp.escape(rendition_property_prefix)}/, '') :
  128. spine.package.metadata.__send__(reader_name)
  129. end
  130. 3 writer_name = "#{reader_name}="
  131. 3 define_method writer_name do |new_value|
  132. 12 if new_value.nil?
  133. 2 properties.delete_if {|prop| prop.start_with? rendition_property_prefix}
  134. 1 return new_value
  135. end
  136. 11 raise UnsupportedRenditionValue, new_value unless values.include? new_value
  137. 27 values_to_be_deleted = (values - [new_value]).map {|value| "#{rendition_property_prefix}#{value}"}
  138. 14 properties.delete_if {|prop| values_to_be_deleted.include? prop}
  139. 11 new_property = "#{rendition_property_prefix}#{new_value}"
  140. 11 properties << new_property unless properties.include? new_property
  141. 11 new_value
  142. end
  143. end
  144. 1 def_rendition_methods
  145. end
  146. 1 module ItemMixin
  147. 1 extend Rendition
  148. 1 RENDITION_PROPERTIES.each_key do |property|
  149. 3 define_method "rendition_#{property}" do
  150. 2 itemref.__send__ property
  151. end
  152. 3 writer_name = "rendition_#{property}="
  153. 3 define_method writer_name do |value|
  154. 2 itemref.__send__ writer_name, value
  155. end
  156. end
  157. 1 def_rendition_methods
  158. end
  159. 1 module ContentDocumentMixin
  160. 1 extend Rendition
  161. 1 RENDITION_PROPERTIES.each_key do |property|
  162. 3 reader_name = "rendition_#{property}"
  163. 3 define_method reader_name do
  164. 1 item.__send__ reader_name
  165. end
  166. 3 writer_name = "rendition_#{property}="
  167. 3 define_method writer_name do |value|
  168. 1 item.__send__ writer_name, value
  169. end
  170. end
  171. 1 def_rendition_methods
  172. end
  173. end
  174. end
  175. end

lib/epub/publication/package.rb

100.0% lines covered

37 relevant lines. 37 lines covered and 0 lines missed.
    
  1. 1 module EPUB
  2. 1 module Publication
  3. 1 class Package
  4. 1 include Inspector
  5. 1 CONTENT_MODELS = [:metadata, :manifest, :spine, :guide, :bindings]
  6. RESERVED_VOCABULARY_PREFIXES = {
  7. 1 '' => 'http://idpf.org/epub/vocab/package/#',
  8. 'dcterms' => 'http://purl.org/dc/terms/',
  9. 'marc' => 'http://id.loc.gov/vocabulary/',
  10. 'media' => 'http://www.idpf.org/epub/vocab/overlays/#',
  11. 'onix' => 'http://www.editeur.org/ONIX/book/codelists/current.html#',
  12. 'xsd' => 'http://www.w3.org/2001/XMLSchema#'
  13. }
  14. 1 class << self
  15. 1 def define_content_model(model_name)
  16. 5 define_method "#{model_name}=" do |model|
  17. 524 current_model = __send__(model_name)
  18. 524 current_model.package = nil if current_model
  19. 524 model.package = self
  20. 524 instance_variable_set "@#{model_name}", model
  21. end
  22. end
  23. end
  24. 1 attr_accessor :book,
  25. :version, :prefix, :xml_lang, :dir, :id
  26. 1 attr_reader *CONTENT_MODELS
  27. 1 alias lang xml_lang
  28. 1 alias lang= xml_lang=
  29. 1 CONTENT_MODELS.each do |model|
  30. 5 define_content_model model
  31. end
  32. 1 def initialize
  33. 162 @prefix = {}
  34. end
  35. # @return [EPUB::Metadata::Identifier] Unique Identifier
  36. 1 def unique_identifier
  37. 2 @metadata.unique_identifier
  38. end
  39. # Corresponding {Rootfile}
  40. # @return [OCF::Container::Rootfile]
  41. 1 def rootfile
  42. 86 @book.ocf.container.rootfiles.find {|rf| rf.package == self}
  43. end
  44. # Full path in EPUB archive
  45. # @return [Addressable::URI]
  46. 1 def full_path
  47. 18 rootfile.full_path if rootfile
  48. end
  49. 1 def inspect
  50. 4 "#<%{class}:%{object_id} %{attributes} %{models}>" % {
  51. :class => self.class,
  52. :object_id => inspect_object_id,
  53. 20 :attributes => inspect_instance_variables(exclude: CONTENT_MODELS.map {|model| :"@#{model}"}),
  54. :models => inspect_models
  55. }
  56. end
  57. 1 def inspect_models
  58. 4 CONTENT_MODELS.map {|name|
  59. 20 model = __send__(name)
  60. 20 representation = model.nil? ? model.inspect : model.inspect_simply
  61. 20 "@#{name}=#{representation}"
  62. }.join(' ')
  63. end
  64. end
  65. end
  66. end
  67. 1 EPUB::Publication::Package::CONTENT_MODELS.each do |f|
  68. 5 require_relative "package/#{f}"
  69. end

lib/epub/publication/package/bindings.rb

100.0% lines covered

17 relevant lines. 17 lines covered and 0 lines missed.
    
  1. 1 module EPUB
  2. 1 module Publication
  3. 1 class Package
  4. 1 class Bindings
  5. 1 include Inspector::PublicationModel
  6. 1 attr_accessor :package
  7. 1 def initialize
  8. 97 @media_types = {}
  9. end
  10. 1 def <<(media_type)
  11. 43 @media_types[media_type.media_type] = media_type
  12. end
  13. 1 def [](media_type)
  14. 6 _, mt = @media_types.detect {|key, _| key == media_type}
  15. 3 mt
  16. end
  17. 1 def media_types
  18. 5 @media_types.values
  19. end
  20. 1 class MediaType
  21. 1 attr_accessor :media_type, :handler
  22. end
  23. end
  24. end
  25. end
  26. end

lib/epub/publication/package/guide.rb

100.0% lines covered

32 relevant lines. 32 lines covered and 0 lines missed.
    
  1. 1 module EPUB
  2. 1 module Publication
  3. 1 class Package
  4. 1 class Guide
  5. 1 include Inspector::PublicationModel
  6. 1 attr_accessor :package, :references
  7. 1 def initialize
  8. 93 Reference::TYPES.each do |type|
  9. 1581 variable_name = '@' + type.gsub('-', '_')
  10. 1581 instance_variable_set variable_name, nil
  11. end
  12. 93 @references = []
  13. end
  14. 1 def <<(reference)
  15. 39 reference.guide = self
  16. 39 references << reference
  17. end
  18. 1 class Reference
  19. 1 TYPES = %w[cover title-page toc index glossary acknowledgements bibliography colophon copyright-page dedication epigraph foreword loi lot notes preface text]
  20. 1 attr_accessor :guide,
  21. :type, :title
  22. 1 attr_reader :href
  23. 1 def href=(iri)
  24. 39 @href = iri.kind_of?(Addressable::URI) ? iri : Addressable::URI.parse(iri)
  25. end
  26. 1 def item
  27. 1 return @item if @item
  28. 1 request_uri = href.request_uri
  29. 1 @item = @guide.package.manifest.items.find {|item|
  30. 10 item.href.request_uri == request_uri
  31. }
  32. end
  33. end
  34. 1 Reference::TYPES.each do |type|
  35. 17 method_name = type.gsub('-', '_')
  36. 17 define_method method_name do
  37. 3 var = instance_variable_get "@#{method_name}"
  38. 3 return var if var
  39. 4 var = references.find {|ref| ref.type == type}
  40. 2 instance_variable_set "@#{method_name}", var
  41. end
  42. end
  43. end
  44. end
  45. end
  46. end

lib/epub/publication/package/manifest.rb

96.43% lines covered

112 relevant lines. 108 lines covered and 4 lines missed.
    
  1. 1 require 'set'
  2. 1 require 'addressable/uri'
  3. 1 require 'rchardet'
  4. 1 require 'epub/constants'
  5. 1 require 'epub/parser/content_document'
  6. 1 module EPUB
  7. 1 module Publication
  8. 1 class Package
  9. 1 class Manifest
  10. 1 include Inspector::PublicationModel
  11. 1 attr_accessor :package,
  12. :id
  13. 1 def initialize
  14. 140 @items = {}
  15. end
  16. # @param item [Item]
  17. # @return [Manifest] self
  18. 1 def <<(item)
  19. 1291 item.manifest = self
  20. 1291 @items[item.id] = item
  21. 1291 self
  22. end
  23. 1 def each_nav
  24. 3 if block_given?
  25. 1 each_item do |item|
  26. 4 yield item if item.nav?
  27. end
  28. else
  29. 2 each_item.lazy.select(&:nav?)
  30. end
  31. end
  32. # @return [Array<Item>] item which includes "nav" as one of +properties+. It represents this item is a navigation of book.
  33. 1 def navs
  34. 9 items.select(&:nav?)
  35. end
  36. # @return [Item, nil] the first item of #navs
  37. 1 def nav
  38. 8 navs.first
  39. end
  40. # @return [Item, nil] item which includes "cover-image" as one of +properties+. It represents this item is cover image.
  41. 1 def cover_image
  42. 1 items.find(&:cover_image?)
  43. end
  44. # @overload each_item
  45. # @yield [item]
  46. # @yieldparam [Item]
  47. # @overload each_item
  48. # @return [Enumerator]
  49. 1 def each_item
  50. 4 if block_given?
  51. 1 @items.each_value do |item|
  52. 4 yield item
  53. end
  54. else
  55. 3 @items.each_value
  56. end
  57. end
  58. 1 def items
  59. 88 @items.values
  60. end
  61. # @param item_id [String]
  62. # @return [Item, nil]
  63. 1 def [](item_id)
  64. 222 @items[item_id]
  65. end
  66. 1 class Item
  67. 1 DUMMY_ROOT_IRI = Addressable::URI.parse('http://example.net/').freeze
  68. 1 include Inspector
  69. # @!attribute [rw] manifest
  70. # @return [Manifest] Returns the value of manifest
  71. # @!attribute [rw] id
  72. # @return [String] Returns the value of id
  73. # @!attribute [rw] href
  74. # @return [Addressable::URI] Returns the value of href,
  75. # which is relative IRI from rootfile(OPF file)
  76. # @!attribute [rw] media_type
  77. # @return [String] Returns the value of media_type
  78. # @!attribute [rw] properties
  79. # @return [Set<String>] Returns the value of properties
  80. # @!attribute [rw] media_overlay
  81. # @return [String] Returns the value of media_overlay
  82. # @!attribute [rw] fallback
  83. # @return [Item] Returns the value of attribute fallback
  84. 1 attr_accessor :manifest,
  85. :id, :media_type, :fallback, :media_overlay
  86. 1 attr_reader :properties, :href
  87. 1 def initialize
  88. 1298 @properties = Set.new
  89. 1298 @full_path = nil
  90. end
  91. 1 def properties=(props)
  92. 281 @properties = props.kind_of?(Set) ? props : Set.new(props)
  93. end
  94. 1 def href=(iri)
  95. 1211 @href = iri.kind_of?(Addressable::URI) ? iri : Addressable::URI.parse(iri)
  96. end
  97. # @todo Handle circular fallback chain
  98. 1 def fallback_chain
  99. 2 @fallback_chain ||= traverse_fallback_chain([])
  100. end
  101. # full path in archive
  102. # @return [Addressable::URI]
  103. 1 def full_path
  104. 18 return @full_path if @full_path
  105. 18 path = DUMMY_ROOT_IRI + manifest.package.full_path + href
  106. 18 path.scheme = nil
  107. 18 path.host = nil
  108. 18 path.path = path.path[1..-1]
  109. 18 @full_path = path
  110. end
  111. # full path in archive
  112. # @return [String]
  113. 1 def entry_name
  114. 16 Addressable::URI.unencode(full_path)
  115. end
  116. # Read content from EPUB archive
  117. #
  118. # @param detect_encoding [Boolean] Whether #read tries auto-detection of character encoding. The default value is +false+.
  119. # @return [String] Content with encoding:
  120. # US-ASCII when the content is not in text format such images.
  121. # UTF-8 when the content is in text format and +detect_encoding+ is +false+.
  122. # auto-detected encoding when the content is in text format and +detect_encoding+ is +true+.
  123. 1 def read(detect_encoding: false)
  124. 15 raw_content = manifest.package.book.container_adapter.read(manifest.package.book.epub_file, entry_name)
  125. 15 unless media_type.start_with?('text/') or
  126. media_type.end_with?('xml') or
  127. ['application/json', 'application/javascript', 'application/ecmascript', 'application/xml-dtd'].include?(media_type)
  128. return raw_content
  129. end
  130. 15 if detect_encoding
  131. # CharDet.detect doesn't raise Encoding::CompatibilityError
  132. # that is caused when trying compare CharDet's internal
  133. # ASCII-8BIT RegExp with a String with other encoding
  134. # because Zip::File#read returns a String with encoding ASCII-8BIT.
  135. # So, no need to rescue the error here.
  136. 3 encoding = CharDet.detect(raw_content)['encoding']
  137. 3 if encoding
  138. 3 raw_content.force_encoding(encoding)
  139. else
  140. warn "No encoding detected for #{entry_name}. Set to ASCII-8BIT" if $DEBUG || $VERBOSE
  141. raw_content
  142. end
  143. else
  144. 12 raw_content.force_encoding("UTF-8");
  145. end
  146. end
  147. 1 def xhtml?
  148. 2 media_type == 'application/xhtml+xml'
  149. end
  150. 1 def nav?
  151. 146 properties.include? 'nav'
  152. end
  153. 1 def cover_image?
  154. 4 properties.include? 'cover-image'
  155. end
  156. # @todo Handle circular fallback chain
  157. 1 def use_fallback_chain(options = {})
  158. 7 supported = EPUB::MediaType::CORE
  159. 7 if ad = options[:supported]
  160. 1 supported = supported | (ad.respond_to?(:to_ary) ? ad : [ad])
  161. end
  162. 7 if del = options[:unsupported]
  163. 3 supported = supported - (del.respond_to?(:to_ary) ? del : [del])
  164. end
  165. 7 return yield self if supported.include? media_type
  166. 4 if (bindings = manifest.package.bindings) && (binding_media_type = bindings[media_type])
  167. 1 return yield binding_media_type.handler
  168. end
  169. 6 return fallback.use_fallback_chain(options) {|fb| yield fb} if fallback
  170. raise EPUB::MediaType::UnsupportedMediaType
  171. end
  172. 1 def content_document
  173. 23 return nil unless %w[application/xhtml+xml image/svg+xml].include? media_type
  174. 16 @content_document ||= Parser::ContentDocument.new(self).parse
  175. end
  176. # @return [Package::Spine::Itemref]
  177. # @return nil when no Itemref refers this Item
  178. 1 def itemref
  179. 14 manifest.package.spine.itemrefs.find {|itemref| itemref.idref == id}
  180. end
  181. # @param iri [Addressable::URI] relative iri
  182. # @return [Item]
  183. # @return [nil] when item not found
  184. # @raise ArgumentError when +iri+ is not relative
  185. # @raise ArgumentError when +iri+ starts with "/"(slash)
  186. # @note Algorithm stolen form Rack::Utils#clean_path_info
  187. 1 def find_item_by_relative_iri(iri)
  188. 75 raise ArgumentError, "Not relative: #{iri.inspect}" unless iri.relative?
  189. 75 raise ArgumentError, "Start with slash: #{iri.inspect}" if iri.path.start_with? Addressable::URI::SLASH
  190. 74 target_href = href + iri
  191. 74 target_href.fragment = nil
  192. 74 segments = target_href.to_s.split(Addressable::URI::SLASH)
  193. 74 clean_segments = []
  194. 74 segments.each do |segment|
  195. 84 next if segment.empty? || segment == '.'
  196. 84 segment == '..' ? clean_segments.pop : clean_segments << segment
  197. end
  198. 74 target_iri = Addressable::URI.parse(clean_segments.join(Addressable::URI::SLASH))
  199. 313 manifest.items.find { |item| item.href == target_iri}
  200. end
  201. 1 def inspect
  202. 1 "#<%{class}:%{object_id} %{manifest} %{attributes}>" % {
  203. :class => self.class,
  204. :object_id => inspect_object_id,
  205. :manifest => "@manifest=#{@manifest.inspect_simply}",
  206. :attributes => inspect_instance_variables(exclude: [:@manifest])
  207. }
  208. end
  209. 1 protected
  210. 1 def traverse_fallback_chain(chain)
  211. 4 chain << self
  212. 4 return chain unless fallback
  213. 2 fallback.traverse_fallback_chain(chain)
  214. end
  215. end
  216. end
  217. end
  218. end
  219. end

lib/epub/publication/package/metadata.rb

100.0% lines covered

5 relevant lines. 5 lines covered and 0 lines missed.
    
  1. 1 require 'epub/metadata'
  2. 1 module EPUB
  3. 1 module Publication
  4. 1 class Package
  5. 1 Metadata = EPUB::Metadata
  6. end
  7. end
  8. end

lib/epub/publication/package/spine.rb

100.0% lines covered

54 relevant lines. 54 lines covered and 0 lines missed.
    
  1. 1 require 'set'
  2. 1 module EPUB
  3. 1 module Publication
  4. 1 class Package
  5. 1 class Spine
  6. 1 include Inspector::PublicationModel
  7. 1 attr_accessor :package,
  8. :id, :toc, :page_progression_direction
  9. 1 attr_reader :itemrefs
  10. 1 def initialize
  11. 106 @itemrefs = []
  12. end
  13. # @return self
  14. 1 def <<(itemref)
  15. 304 itemref.spine = self
  16. 304 @itemrefs << itemref
  17. 304 self
  18. end
  19. # @yield [itemref]
  20. # @yieldparam [Itemref] itemref
  21. # @yieldreturn [Object] returns the last value of block
  22. # @return [Object, Enumerator]
  23. # returns the last value of block when block given, Enumerator when not
  24. 1 def each_itemref
  25. 18 if block_given?
  26. 72 itemrefs.each {|itemref| yield itemref}
  27. else
  28. 6 enum_for :each_itemref
  29. end
  30. end
  31. # @return [Enumerator] Enumerator which yeilds {Manifest::Item}
  32. # referred by each of {#itemrefs}
  33. 1 def items
  34. 20 itemrefs.collect {|itemref| itemref.item}
  35. end
  36. 1 class Itemref
  37. 1 PAGE_SPREAD_PROPERTIES = ['left'.freeze, 'right'.freeze].freeze
  38. 1 PAGE_SPREAD_PREFIX = 'page-spread-'.freeze
  39. 1 attr_accessor :spine,
  40. :idref, :linear, :id
  41. 1 attr_reader :properties
  42. 1 def initialize
  43. 313 @properties = Set.new
  44. end
  45. 1 def properties=(props)
  46. 4 @properties = props.kind_of?(Set) ? props : Set.new(props)
  47. end
  48. # @return [true|false]
  49. 1 def linear?
  50. 6 !! linear
  51. end
  52. # @return [Package::Manifest::Item] item referred by this object
  53. 1 def item
  54. 77 @item ||= @spine.package.manifest[idref]
  55. end
  56. 1 def item=(item)
  57. 1 self.idref = item.id
  58. 1 item
  59. end
  60. 1 def ==(other)
  61. 3 [:spine, :idref, :id].all? {|meth|
  62. 9 self.__send__(meth) == other.__send__(meth)
  63. } and
  64. 3 (linear? == other.linear?) and
  65. 3 (properties == other.properties)
  66. end
  67. # @return ["left", "right", nil]
  68. 1 def page_spread
  69. 5 property = properties.find {|prop| prop.start_with? PAGE_SPREAD_PREFIX}
  70. 3 property ? property.gsub(/\A#{Regexp.escape(PAGE_SPREAD_PREFIX)}/, '') : nil
  71. end
  72. # @param new_value ["left", "right", nil]
  73. 1 def page_spread=(new_value)
  74. 6 if new_value.nil?
  75. 3 properties.delete_if {|prop| prop.start_with? PAGE_SPREAD_PREFIX}
  76. 2 return new_value
  77. end
  78. 4 raise "Unsupported page-spread property: #{new_value}" unless PAGE_SPREAD_PROPERTIES.include? new_value
  79. 8 props_to_be_deleted = (PAGE_SPREAD_PROPERTIES - [new_value]).map {|prop| "#{PAGE_SPREAD_PREFIX}#{prop}"}
  80. 5 properties.delete_if {|prop| props_to_be_deleted.include? prop}
  81. 4 new_property = "#{PAGE_SPREAD_PREFIX}#{new_value}"
  82. 4 properties << new_property unless properties.include? new_property
  83. 4 new_value
  84. end
  85. end
  86. end
  87. end
  88. end
  89. end

lib/epub/searcher.rb

75.0% lines covered

12 relevant lines. 9 lines covered and 3 lines missed.
    
  1. 1 require 'epub/searcher/result'
  2. 1 require 'epub/searcher/publication'
  3. 1 require 'epub/searcher/xhtml'
  4. 1 module EPUB
  5. 1 module Searcher
  6. 1 class << self
  7. 1 def search_text(epub, word, **options)
  8. Publication.search_text(epub.package, word, options)
  9. end
  10. 1 def search_element(epub, css: nil, xpath: nil, namespaces: {})
  11. Publication.search_element(epub.package, css: css, xpath: xpath, namespaces: namespaces)
  12. end
  13. 1 def search_by_cfi(epub, cfi)
  14. Publication.search_by_cfi(epub.package, cfi)
  15. end
  16. end
  17. end
  18. end

lib/epub/searcher/publication.rb

69.23% lines covered

78 relevant lines. 54 lines covered and 24 lines missed.
    
  1. 1 require 'epub/publication'
  2. 1 require "epub/parser/xml_document"
  3. 1 module EPUB
  4. 1 module Searcher
  5. 1 class Publication
  6. 1 using Parser::XMLDocument::Refinements
  7. 1 class << self
  8. 1 def search_text(package, word, **options)
  9. 3 new(package).search_text(word, **options)
  10. end
  11. 1 def search_element(package, css: nil, xpath: nil, namespaces: {})
  12. 3 new(package).search_element(css: css, xpath: xpath, namespaces: namespaces)
  13. end
  14. 1 def search_by_cfi(package, cfi)
  15. new(package).search_by_cfi(cfi)
  16. end
  17. end
  18. 1 def initialize(package)
  19. 6 @package = package
  20. end
  21. # @return [Array<Result>]
  22. 1 def search_text(word, algorithm: :seamless)
  23. 3 results = []
  24. 3 spine = @package.spine
  25. 3 spine_step = Result::Step.new(:element, 2, {:name => 'spine', :id => spine.id})
  26. 3 spine.each_itemref.with_index do |itemref, index|
  27. 15 itemref_step = Result::Step.new(:itemref, index, {:id => itemref.id})
  28. 15 XHTML::ALGORITHMS[algorithm].search_text(Parser::XMLDocument.new(itemref.item.read), word).each do |sub_result|
  29. 6 results << Result.new([spine_step, itemref_step] + sub_result.parent_steps, sub_result.start_steps, sub_result.end_steps)
  30. end
  31. end
  32. 3 results
  33. end
  34. # @todo: Refactoring
  35. # @return [Array<Hash>] An array of rearch results. Each result is composed of:
  36. # * +:element+: [REXML::Element, Oga::XML::Element, Nokogiri::XML::ELement] Found element
  37. # * +:itemref+: [EPUB::Publication::Package::Spine::Itemref] Itemref that element's document belongs to
  38. # * +:location+: [EPUB::CFI::Location] CFI that indicates the element
  39. # * +:package+: [EPUB::Publication::Package] Package that the element belongs to
  40. 1 def search_element(css: nil, xpath: nil, namespaces: {})
  41. 3 raise ArgumentError, 'Both css and xpath are nil' if css.nil? && xpath.nil?
  42. 3 namespaces = EPUB::NAMESPACES.merge(namespaces)
  43. 3 results = []
  44. 3 spine_step = EPUB::CFI::Step.new((EPUB::Publication::Package::CONTENT_MODELS.index(:spine) + 1) * 2)
  45. 3 @package.spine.each_itemref.with_index do |itemref, index|
  46. 15 assertion = itemref.id ? EPUB::CFI::IDAssertion.new(itemref.id) : nil
  47. 15 itemref_step = EPUB::CFI::Step.new((index + 1) * 2, assertion)
  48. 15 path_to_itemref = EPUB::CFI::Path.new([spine_step, itemref_step])
  49. 15 content_document = itemref.item.content_document
  50. 15 next unless content_document
  51. 9 elems = if xpath
  52. 6 doc = Parser::XMLDocument.new(content_document.read)
  53. 6 doc.each_element_by_xpath(xpath, namespaces)
  54. else
  55. begin
  56. 3 doc = content_document.nokogiri
  57. rescue LoadError
  58. raise "#{self.class}##{__method__} with `css` argument requires Nokogiri gem for now. Install Nokogiri and then try again."
  59. end
  60. 3 doc.css(css)
  61. end
  62. 9 elems.each do |elem|
  63. 22 path = find_path(elem)
  64. 22 results << {
  65. location: EPUB::CFI::Location.new([path_to_itemref, path]),
  66. package: @package,
  67. itemref: itemref,
  68. element: elem
  69. }
  70. end
  71. end
  72. 3 results
  73. end
  74. # @note Currenty can handle only location CFI without offset
  75. # @todo Use XHTML module
  76. # @todo Handle CFI with offset
  77. # @todo Handle range CFI
  78. # @param [EPUB::CFI] cfi
  79. # @return [Array] Path in EPUB Rendition
  80. 1 def search_by_cfi(cfi)
  81. path_in_package = cfi.paths.first
  82. spine = @package.spine
  83. model = [@package.metadata, @package.manifest, spine, @package.guide, @package.bindings].compact[path_in_package.steps.first.value / 2 - 1]
  84. raise NotImplementedError, "Currently, #{__method__} supports spine only(#{cfi})" unless model == spine
  85. raise ArgumentError, "Cannot identify <itemref>'s child" if path_in_package.steps.length > 2
  86. step_to_itemref = path_in_package.steps[1]
  87. itemref = spine.itemrefs[step_to_itemref.value / 2 - 1]
  88. doc = itemref.item.content_document.nokogiri
  89. path_in_doc = cfi.paths[1]
  90. current_node = doc.root
  91. path_in_doc.steps.each do |step|
  92. if step.element?
  93. current_node = current_node.elements.to_a[step.value / 2 - 1]
  94. else
  95. element_index = (step.value - 1) / 2 - 1
  96. if element_index == -1
  97. current_node = current_node.children.first
  98. else
  99. prev = current_node.elements.to_a[element_index]
  100. break unless prev
  101. current_node = prev.next_sibling
  102. break unless current_node
  103. end
  104. end
  105. end
  106. raise NotImplementedError, "Currently, #{__method__} doesn't support deeper DOM tree such as including <iframe>" if cfi.paths[2]
  107. [itemref, current_node]
  108. end
  109. 1 private
  110. 1 def find_path(elem)
  111. 22 steps = []
  112. 22 until elem.parent.document?
  113. 148 index = elem.parent.elements.to_a.index(elem)
  114. 148 id_attr = elem.attribute_with_prefix("id")
  115. 148 assertion = id_attr ? EPUB::CFI::IDAssertion.new(id_attr) : nil
  116. 148 steps.unshift EPUB::CFI::Step.new((index + 1) * 2, assertion)
  117. 148 elem = elem.parent
  118. end
  119. 22 EPUB::CFI::Path.new(steps)
  120. end
  121. end
  122. end
  123. end

lib/epub/searcher/result.rb

100.0% lines covered

41 relevant lines. 41 lines covered and 0 lines missed.
    
  1. 1 require 'epub/cfi'
  2. 1 module EPUB
  3. 1 module Searcher
  4. 1 class Result
  5. 1 class << self
  6. # @example
  7. # Result.aggregate_step_intersection([a, b, c], [a, b, d]) # => [[a, b], [c], [d]]
  8. # @example
  9. # Result.aggregate_step_intersection([a, b, c], [a, d, c]) # => [[a], [b, c], [d, c]]
  10. # # Note that c here is not included in the first element of returned value.
  11. # @param steps1 [Array<Step>, Array<Array>]
  12. # @param steps2 [Array<Step>, Array<Array>]
  13. # @return [Array<Array<Array>>] Three arrays:
  14. # 1. "intersection" of +steps1+ and +steps2+. "intersection" here is not the term of mathmatics
  15. # 2. remaining steps of +steps1+
  16. # 3. remaining steps of +steps2+
  17. 1 def aggregate_step_intersection(steps1, steps2)
  18. 13 intersection = []
  19. 13 steps1_remaining = []
  20. 13 steps2_remaining = []
  21. 13 broken = false
  22. 13 steps1.zip steps2 do |step1, step2|
  23. 44 broken = true unless step1 && step2 && step1 == step2
  24. 44 if broken
  25. 1 steps1_remaining << step1 unless step1.nil?
  26. 1 steps2_remaining << step2 unless step2.nil?
  27. else
  28. 43 intersection << step1
  29. end
  30. end
  31. 13 [intersection, steps1_remaining, steps2_remaining]
  32. end
  33. end
  34. 1 attr_reader :parent_steps, :start_steps, :end_steps
  35. # @param parent_steps [Array<Step>] common steps between start and end
  36. # @param start_steps [Array<Step>] steps to start from +parent_steps+
  37. # @param end_steps [Array<Step>] steps to end from +parent_steps+
  38. 1 def initialize(parent_steps, start_steps, end_steps)
  39. 82 @parent_steps, @start_steps, @end_steps = parent_steps, start_steps, end_steps
  40. end
  41. 1 def to_cfi
  42. 3 str = [@parent_steps, @start_steps, @end_steps].collect {|steps|
  43. 9 steps ? steps.collect(&:to_cfi).join : nil
  44. }.compact.join(',')
  45. 3 EPUB::CFI(str)
  46. end
  47. 1 def ==(other)
  48. 18 [@parent_steps + @start_steps.to_a] == [other.parent_steps + other.start_steps.to_a] and
  49. [@parent_steps + @end_steps.to_a] == [other.parent_steps + other.end_steps.to_a]
  50. end
  51. 1 class Step
  52. 1 attr_reader :type, :index, :info
  53. 1 def initialize(type, index, info={})
  54. 557 @type, @index, @info = type, index, info
  55. end
  56. 1 def ==(other)
  57. 190 self.type == other.type and
  58. self.index == other.index and
  59. self.info == other.info
  60. end
  61. 1 def to_cfi
  62. 30 case type
  63. when :element
  64. 23 '/%d%s' % [(index + 1) * 2, id_assertion]
  65. when :text
  66. 2 '/%d' % [(index + 1)]
  67. when :character
  68. 4 ':%d' % [index]
  69. when :itemref
  70. 1 '/%d%s!' % [(index + 1) * 2, id_assertion]
  71. end
  72. end
  73. 1 private
  74. 1 def id_assertion
  75. 24 info[:id] ? "[#{info[:id]}]" : nil
  76. end
  77. end
  78. end
  79. end
  80. end

lib/epub/searcher/xhtml.rb

100.0% lines covered

100 relevant lines. 100 lines covered and 0 lines missed.
    
  1. 1 require 'epub'
  2. 1 require 'epub/parser/xml_document'
  3. 1 module EPUB
  4. 1 module Searcher
  5. 1 class XHTML
  6. 1 using Parser::XMLDocument::Refinements
  7. 1 ALGORITHMS = {}
  8. 1 class << self
  9. # @param element [REXML::Element, REXML::Document, Oga::XML::ELement, Oga::XML::Document, Nokogiri::XML::Element, Nokogiri::XML::Document]
  10. # @param word [String]
  11. # @return [Array<Result>]
  12. 1 def search_text(element, word)
  13. 33 new(element.respond_to?(:root) ? element.root : element).search_text(word)
  14. end
  15. end
  16. # @param word [String]
  17. 1 def initialize(element)
  18. 33 @element = element
  19. end
  20. 1 class Restricted < self
  21. # @param element [REXML::Element, Oga::XML::Element, Nokogiri::XML::Element]
  22. # @return [Array<Result>]
  23. 1 def search_text(word, element=nil)
  24. 137 results = []
  25. 137 elem_index = 0
  26. 137 (element || @element).children.each do |child|
  27. 313 if child.element?
  28. 132 child_step = Result::Step.new(:element, elem_index, {:name => child.name, :id => child.attribute_with_prefix('id')})
  29. 132 if child.name == 'img'
  30. 5 if child.attribute_with_prefix('alt').index(word)
  31. 2 results << Result.new([child_step], nil, nil)
  32. end
  33. else
  34. 127 search_text(word, child).each do |sub_result|
  35. 34 results << Result.new([child_step] + sub_result.parent_steps, sub_result.start_steps, sub_result.end_steps)
  36. end
  37. end
  38. 132 elem_index += 1
  39. 181 elsif child.text?
  40. 181 text_index = elem_index
  41. 181 char_index = 0
  42. 181 text_step = Result::Step.new(:text, text_index)
  43. 181 while char_index = child.text.index(word, char_index)
  44. 8 results << Result.new([text_step], [Result::Step.new(:character, char_index)], [Result::Step.new(:character, char_index + word.length)])
  45. 8 char_index += 1
  46. end
  47. end
  48. end
  49. 137 results
  50. end
  51. end
  52. 1 ALGORITHMS[:restricted] = Restricted
  53. 1 class Seamless < self
  54. 1 def initialize(element)
  55. 23 super
  56. 23 @indices = nil
  57. end
  58. 1 def search_text(word)
  59. 23 unless @indices
  60. 23 @indices, @content = build_indices(@element)
  61. end
  62. 23 visit(@indices, @content, word)
  63. end
  64. 1 def build_indices(element)
  65. 151 indices = {}
  66. 151 content = ''
  67. 151 elem_index = 0
  68. 151 element.children.each do |child|
  69. 317 if child.element?
  70. 133 child_step = [:element, elem_index, {:name => child.name, :id => child.attribute_with_prefix('id')}]
  71. 133 elem_index += 1
  72. 133 if child.name == 'img'
  73. 5 alt = child.attribute_with_prefix('alt')
  74. 5 next if alt.nil? || alt.empty?
  75. 5 indices[content.length] = [child_step]
  76. 5 content << alt
  77. else
  78. # TODO: Consider block level elements
  79. 128 content_length = content.length
  80. 128 sub_indices, sub_content = build_indices(child)
  81. # TODO: Pass content_length and child_step to build_indices and remove this block
  82. 128 sub_indices.each_pair do |sub_pos, child_steps|
  83. 625 indices[content_length + sub_pos] = [child_step] + child_steps
  84. end
  85. 128 content << sub_content
  86. end
  87. 184 elsif child.text? || child.cdata?
  88. 184 text_index = elem_index
  89. 184 text_step = [:text, text_index]
  90. 184 indices[content.length] = [text_step]
  91. 184 content << child.content
  92. end
  93. end
  94. 151 [indices, content]
  95. end
  96. 1 private
  97. 1 def visit(indices, content, word)
  98. 23 results = []
  99. 23 offsets = indices.keys
  100. 23 i = 0
  101. 23 while i = content.index(word, i)
  102. 14 offset = find_offset(offsets, i)
  103. 14 start_steps = to_result_steps(indices[offset])
  104. 14 last_step = start_steps.last
  105. 14 if last_step.info[:name] == 'img'
  106. 1 parent_steps = start_steps
  107. 1 start_steps = end_steps = nil
  108. else
  109. 13 word_length = word.length
  110. 13 start_char_step = Result::Step.new(:character, i - offset)
  111. 13 end_offset = find_offset(offsets, i + word_length, true)
  112. 13 end_steps = to_result_steps(indices[end_offset])
  113. 13 end_char_step = Result::Step.new(:character, i + word_length - end_offset)
  114. 13 parent_steps, start_steps, end_steps = Result.aggregate_step_intersection(start_steps, end_steps)
  115. 13 start_steps << start_char_step
  116. 13 end_steps << end_char_step
  117. end
  118. 14 results << Result.new(parent_steps, start_steps, end_steps)
  119. 14 i += 1
  120. end
  121. 23 results
  122. end
  123. # Find max offset greater than or equal to index
  124. # @param offsets [Array<Integer>] keys of indices
  125. # @param index [Integer] position of search word in content string
  126. # @todo: more efficient algorithm
  127. 1 def find_offset(offsets, index, for_end_position=false)
  128. 27 comparison_operator = for_end_position ? :< : :<=
  129. 27 l = offsets.length
  130. 27 offset_index = (0..l).bsearch {|i|
  131. 99 o = offsets[l - i]
  132. 99 next false unless o
  133. 88 o.send(comparison_operator, index)
  134. }
  135. 27 offsets[l - offset_index]
  136. end
  137. 1 def to_result_steps(steps)
  138. 121 steps.map {|step| Result::Step.new(*step)}
  139. end
  140. end
  141. 1 ALGORITHMS[:seamless] = Seamless
  142. end
  143. end
  144. end