All Files ( 94.65% covered at 121.75 hits/line )
43 files in total.
1459 relevant lines,
1381 lines covered and
78 lines missed.
(
94.65%
)
- 1
require 'epub/inspector'
- 1
require 'epub/ocf'
- 1
require 'epub/publication'
- 1
require 'epub/content_document'
- 1
require 'epub/book/features'
- 1
require 'epub'
- 1
module EPUB
- 1
class Book
- 1
include EPUB::Book::Features
end
end
- 1
require 'forwardable'
- 1
module EPUB
- 1
class Book
- 1
module Features
- 1
extend Forwardable
- 1
attr_reader :ocf
- 1
attr_accessor :epub_file
# When writing, sets +ocf.book+ to self.
# @param [OCF]
- 1
def ocf=(mod)
- 27
@ocf = mod
- 27
mod.book = self
- 27
mod
end
# @return [Array<OCF::Container::Rootfile>]
- 1
def rootfiles
- 105
ocf.container.rootfiles
end
# @return [Array<Publication::Package>]
- 1
def packages
- 105
rootfiles.map(&:package)
end
- 1
alias renditions packages
# First +package+ in +packages+
# @return [Package|nil]
- 1
def default_rendition
- 21
packages.first
end
- 1
alias package default_rendition
# @!parse def_delegators :package, :metadata, :manifest, :spine, :guide, :bindings
- 1
def_delegators :package, *Publication::Package::CONTENT_MODELS
- 1
def_delegators :metadata, :title, :main_title, :subtitle, :short_title, :collection_title, :edition_title, :extended_title, :description, :date, :unique_identifier, :modified, :release_identifier, :package_identifier
- 1
def_delegators :manifest, :nav
- 1
def container_adapter
- 43
@adapter || OCF::PhysicalContainer.adapter
end
- 1
def container_adapter=(adapter)
- 2
@adapter = OCF::PhysicalContainer.find_adapter(adapter)
end
# Cover image defined in EPUB 3 or used in EPUB 2
# @return [EPUB::Publication::Package::Manifest::Item]
- 1
def cover_image
manifest.cover_image || metadata.cover_image
end
# @overload each_page_on_spine(&blk)
# iterate over items in order of spine when block given
# @yieldparam item [Publication::Package::Manifest::Item]
# @overload each_page_on_spine
# @return [Enumerator] which iterates over {Publication::Package::Manifest::Item}s in order of spine when block not given
- 1
def each_page_on_spine(&blk)
- 3
enum = package.spine.items
- 3
if block_given?
- 1
enum.each &blk
else
- 2
enum.each
end
end
- 1
def each_page_on_toc(&blk)
raise NotImplementedError
end
# @overload each_content(&blk)
# iterate all items over when block given
# @yieldparam item [Publication::Package::Manifest::Item]
# @overload each_content
# @return [Enumerator] which iterates over all {Publication::Package::Manifest::Item}s in EPUB package when block not given
- 1
def each_content(&blk)
- 3
enum = manifest.items
- 3
if block_given?
- 1
enum.each &blk
else
- 2
enum.to_enum
end
end
- 1
def other_navigation
raise NotImplementedError
end
# @return [Array<Publication::Package::Manifest::Item>] All {Publication::Package::Manifest::Item}s in EPUB package
- 1
def resources
manifest.items
end
# Syntax sugar
# @return String
- 1
def rootfile_path
ocf.container.rootfile.full_path.to_s
end
end
end
end
- 1
module EPUB
NAMESPACES = {
- 1
'xml' => 'http://www.w3.org/XML/1998/namespace',
'dc' => 'http://purl.org/dc/elements/1.1/',
'ocf' => 'urn:oasis:names:tc:opendocument:xmlns:container',
'opf' => 'http://www.idpf.org/2007/opf',
'xhtml' => 'http://www.w3.org/1999/xhtml',
'epub' => 'http://www.idpf.org/2007/ops',
'm' => 'http://www.w3.org/1998/Math/MathML',
'svg' => 'http://www.w3.org/2000/svg',
'smil' => 'http://www.w3.org/ns/SMIL',
'metadata' => 'http://www.idpf.org/2013/metadata'
}
- 1
module MediaType
- 1
class UnsupportedMediaType < StandardError; end
- 1
EPUB = 'application/epub+zip'
- 1
ROOTFILE = 'application/oebps-package+xml'
IMAGE = %w[
- 1
image/gif
image/jpeg
image/png
image/svg+xml
]
APPLICATION = %w[
- 1
application/xhtml+xml
application/x-dtbncx+xml
application/vnd.ms-opentype
application/font-woff
application/smil+xml
application/pls+xml
]
AUDIO = %w[
- 1
audio/mpeg
audio/mp4
]
TEXT = %w[
- 1
text/css
text/javascript
]
- 1
CORE = IMAGE + APPLICATION + AUDIO + TEXT
end
# @see https://idpf.github.io/epub-vocabs/structure/
- 1
module Type
DOCUMENT_NAVIGATION = %w[
- 1
toc
landmarks
]
PAGINATION = %w[
- 1
pagebreak
page_list
]
- 1
TYPES = DOCUMENT_NAVIGATION + PAGINATION
end
end
- 1
require 'epub/content_document/xhtml'
- 1
require 'epub/content_document/navigation'
- 1
require "epub/content_document/typable"
- 1
module EPUB
- 1
module ContentDocument
- 1
class Navigation < XHTML
- 1
include Typable
- 1
attr_accessor :navigations
- 1
def initialize
- 12
@navigations = []
- 12
@hidden = false
- 12
@parent = nil
- 12
super
end
- 1
def toc
- 2
navigations.find(&:toc?)
end
- 1
def page_list
navigations.find(&:page_list?)
end
- 1
def landmarks
- 1
navigations.find(&:landmarks?)
end
# Enumerator version of toc
- 1
def contents
- 1
enum_for(:each_content).to_a
end
# Enumerator version of page_list
# Usage: navigation.enum_for(:pages)
- 1
def pages
raise NotImplementedError
end
# @todo Enumerator version of landmarks
# iterator for #toc
- 1
def each_content
- 1
toc.traverse do |content, _|
- 7
yield content
end
end
# iterator for #page_list
- 1
def each_page
raise NotImplementedError
end
# iterator for #landmark
- 1
def each_landmark
raise NotImplementedError
end
- 1
def navigation
- 2
navigations.first
end
- 1
module Hidable
- 1
attr_accessor :hidden, :parent
- 1
def hidden?
- 8
if @hidden.nil?
- 4
@parent ? @parent.hidden? : false
else
- 4
true
end
end
end
- 1
class Item
- 1
include Hidable
- 1
include Typable
- 1
attr_accessor :items, :text,
:content_document, :item
- 1
attr_reader :href
- 1
def initialize
- 112
@items = ItemList.new
- 112
@items.parent = self
end
- 1
def href=(iri)
- 72
@href = iri.kind_of?(Addressable::URI) ? iri : Addressable::URI.parse(iri)
end
- 1
def traverse(depth=0, &block)
- 10
block.call self, depth
- 10
items.each do |item|
- 8
item.traverse depth + 1, &block
end
end
# For backward compatibility
- 1
def type
- 8
types.find {|t|
- 8
Type::TYPES.include? t
}
end
# For backward compatibility
- 1
def type=(t)
- 1
types << t
end
- 1
%w[toc page_list landmarks].each do |type|
- 3
define_method "#{type}?" do
- 4
types.include? type
end
end
end
# @todo Implement method to represent navigation structure
- 1
class Navigation < Item
- 1
module Type
- 1
TOC = 'toc'
- 1
PAGE_LIST = 'page_list'
- 1
LANDMARKS = 'landmarks'
end
- 1
alias navigations items
- 1
alias navigations= items=
- 1
alias heading text
- 1
alias heading= text=
end
- 1
class ItemList < Array
- 1
include Hidable
- 1
def <<(item)
- 45
super
- 45
item.parent = self
end
end
end
end
end
- 1
require "set"
- 1
module EPUB
- 1
module ContentDocument
- 1
module Typable
- 1
attr_reader :types
- 1
def types
- 16
@types ||= Set.new
end
- 1
def types=(ts)
- 104
@types = ts.kind_of?(Set) ? ts : Set.new(ts)
end
end
end
end
- 1
module EPUB
- 1
module ContentDocument
- 1
class XHTML
- 1
attr_accessor :item
# @param [Boolean] detect_encoding See {Publication::Package::Manifest::Item#read}
# @return [String] Returns the content string.
- 1
def read(detect_encoding: true)
- 10
item.read(detect_encoding: detect_encoding)
end
- 1
alias raw_document read
# @return [true|false] Whether referenced directly from spine or not.
- 1
def top_level?
- 2
!! item.itemref
end
# @return [String] Returns the value of title element.
# If none, returns empty string
- 1
def title
- 2
title_elem = rexml.get_elements('.//title').first
- 2
if title_elem
- 1
title_elem.text
else
- 1
warn 'title element not found'
- 1
''
end
end
# @return [REXML::Document] content as REXML::Document object
- 1
def rexml
- 3
@rexml ||= REXML::Document.new(raw_document)
end
# @return [Oga::XML::Document] content as Oga::XML::Document object
- 1
def oga
require "oga"
@oga ||= Oga.parse_xml(raw_document)
end
# @return [Nokogiri::XML::Document] content as Nokogiri::XML::Document object
- 1
def nokogiri
- 3
require 'nokogiri'
- 3
@nokogiri ||= Nokogiri.XML(raw_document)
end
end
end
end
- 1
module EPUB
- 1
module Inspector
- 1
INSTANCE_VARIABLES_OPTION = {:exclude => []}
- 1
SIMPLE_TEMPLATE = "#<%{class}:%{object_id}>"
- 1
def inspect_simply
- 5
SIMPLE_TEMPLATE % {
:class => self.class,
:object_id => inspect_object_id
}
end
- 1
def inspect_object_id
- 13
(__id__ << 1).to_s(16)
end
- 1
def inspect_instance_variables(options={})
- 8
options = INSTANCE_VARIABLES_OPTION.merge(options)
- 8
exclude = options[:exclude]
- 8
(instance_variables - exclude).map {|name|
- 45
value = instance_variable_get(name)
- 45
"#{name}=#{value.inspect}"
}.join(' ')
end
- 1
module PublicationModel
- 1
class << self
- 1
def included(mod)
- 5
mod.__send__ :include, Inspector
end
end
- 1
def inspect
- 3
template % {
:class => self.class,
- 3
:package => (package && package.inspect_simply),
:object_id => inspect_object_id,
:attributes => inspect_instance_variables(exclude: [:@package])
}
end
- 1
def template
- 3
t = "#<%{class}:%{object_id}"
- 3
t << " @package=%{package}" if package
- 3
t << " %{attributes}>"
end
end
end
end
- 1
require 'set'
- 1
module EPUB
- 1
class Metadata
- 1
include Inspector::PublicationModel
- 1
DC_ELEMS = [:identifiers, :titles, :languages] +
[:contributors, :coverages, :creators, :dates, :descriptions, :formats, :publishers,
:relations, :rights, :sources, :subjects, :types]
# Used for CFI
- 1
attr_reader :children
- 1
attr_accessor :package, :unique_identifier, :metas, :links,
- 16
*(DC_ELEMS.collect {|elem| "dc_#{elem}"})
- 1
DC_ELEMS.each do |elem|
- 15
alias_method elem, "dc_#{elem}"
- 15
alias_method "#{elem}=", "dc_#{elem}="
end
- 1
def initialize
- 171
(DC_ELEMS + [:metas, :links]).each do |elem|
- 2907
__send__ "#{elem}=", []
end
- 171
@children = []
end
- 1
def release_identifier
- 1
"#{unique_identifier}@#{modified}"
end
- 1
alias package_identifier release_identifier
- 1
def title
- 4
return extended_title unless extended_title.empty?
- 6
compositted = titles.select {|title| title.display_seq}.sort.join("\n")
- 2
return compositted unless compositted.empty?
- 1
return main_title unless main_title.empty?
titles.sort.join("\n")
end
- 1
%w[main short collection edition extended].each do |type|
- 5
define_method "#{type}_title" do
- 42
titles.select {|title| title.title_type.to_s == type}.sort.join(' ')
end
end
- 1
def subtitle
titles.select {|title| title.title_type.to_s == 'subtitle'}.sort.join(' ')
end
- 1
def description
descriptions.join(' ')
end
- 1
def date
dates.first
end
- 1
def language
languages.first
end
- 1
def modified
- 4
metas.find {|meta|
- 7
meta.property == 'dcterms:modified' &&
meta.refiners.empty?
}
end
# Cover image used in EPUB 2
# @return [EPUB::Publication::Package::Manifest::Item]
- 1
def cover_image
cover_image_meta = metas.find {|meta| meta.name == "cover"}
return unless cover_image_meta
package.manifest[cover_image_meta.meta_content]
end
- 1
def to_h
DC_ELEMS.inject({}) do |hsh, elem|
hsh[elem] = __send__(elem)
hsh
end
end
- 1
def primary_metas
metas.select {|meta| meta.primary_expression?}
end
- 1
module Refinee
- 1
PROPERTIES = %w[alternate-script display-seq file-as group-position identifier-type meta-auth role title-type]
- 1
attr_writer :refiners
- 1
def refiners
- 848
@refiners ||= Set.new
end
- 1
PROPERTIES.each do |voc|
- 8
met = voc.gsub(/-/, '_')
- 8
attr_writer met
- 8
define_method met do
- 128
refiners.find {|refiner| refiner.property == voc}
end
end
end
- 1
class DCMES
- 1
include Refinee
- 1
attr_accessor :content, :id, :lang, :dir
- 1
def to_s
- 13
content.to_s
end
end
- 1
class Identifier < DCMES
# @note This is ad-hoc
# @todo Define and include OPF module for opf:scheme attribute
# @todo Define general way to handle with identifier-type refiners
- 1
attr_accessor :scheme
# @note This is ad-hoc
# @todo Define and include OPF module for opf:scheme attribute
# @todo Define general way to handle with identifier-type refiners
- 1
def isbn?
- 5
scheme == 'ISBN' or
content.to_s.downcase.start_with? 'urn:isbn' or
refiners.any? {|refiner|
- 2
refiner.property == 'identifier-type' and
refiner.scheme == 'onix:codelist5' and
%w[02 15].include? refiner.content
}
end
end
- 1
class Title < DCMES
- 1
include Comparable
- 1
def <=>(other)
- 5
return 1 if other.display_seq.nil?
- 4
return -1 if display_seq.nil?
- 4
display_seq.to_s.to_i <=> other.display_seq.to_s.to_i
end
end
- 1
class Meta
- 1
include Refinee
- 1
attr_accessor :property, :id, :scheme, :content, :name, :meta_content
- 1
attr_reader :refines
- 1
def refines=(refinee)
- 740
refinee.refiners << self
- 740
@refines = refinee
end
- 1
def refines?
- 2
! refines.nil?
end
- 1
alias subexpression? refines?
- 1
def primary_expression?
- 1
! subexpression?
end
- 1
def inspect
- 5
ivs = instance_variables.map {|iv|
- 8
[iv, instance_variable_get(iv).inspect].join('=')
}.join(' ')
- 5
'<#%s:%#0x %s>' % [self.class, __id__, ivs]
end
- 1
def to_s
- 41
content.to_s
end
end
- 1
class Link
- 1
include Refinee
- 1
attr_accessor :href, :rel, :id, :media_type
- 1
attr_reader :refines
- 1
def refines=(refinee)
- 45
refinee.refiners << self
- 45
@refines = refinee
end
end
- 1
class UnsupportedModel
- 1
attr_accessor :raw_element
- 1
def initialize(raw_element)
@raw_element = raw_element
end
end
end
end
- 1
module EPUB
- 1
class OCF
- 1
MODULES = %w[container encryption manifest metadata rights signatures]
- 7
MODULES.each {|m| require "epub/ocf/#{m}"}
- 1
attr_accessor :book, *MODULES
end
end
- 1
module EPUB
- 1
class OCF
- 1
class Container
- 1
FILE = 'container.xml'
- 1
attr_reader :rootfiles
- 1
def initialize
- 34
@rootfiles = []
end
# syntax sugar
- 1
def rootfile
- 1
rootfiles.first
end
- 1
class Rootfile
- 1
attr_accessor :full_path, :media_type,
:package
# @param full_path [Addressable::URI|nil]
# @param media_type [String]
- 1
def initialize(full_path=nil, media_type=EPUB::MediaType::ROOTFILE)
- 100
@full_path, @media_type = full_path, media_type
end
end
end
end
end
- 1
module EPUB
- 1
class OCF
- 1
class Encryption
- 1
attr_accessor :content
end
end
end
- 1
module EPUB
- 1
class OCF
- 1
class Manifest
end
end
end
- 1
module EPUB
- 1
class OCF
- 1
class UnknownFormatMetadata
- 1
attr_accessor :content
end
end
end
- 1
require 'monitor'
- 1
require 'epub/ocf/physical_container/archive_zip'
- 1
require 'epub/ocf/physical_container/unpacked_directory'
- 1
require 'epub/ocf/physical_container/unpacked_uri'
- 1
module EPUB
- 1
class OCF
- 1
class PhysicalContainer
- 1
class NoEntry < StandardError
- 1
class << self
- 1
def from_error(error)
- 8
no_entry = new(error.message)
- 8
no_entry.set_backtrace error.backtrace
- 8
no_entry
end
end
end
- 1
@adapter = ArchiveZip
- 1
class << self
- 1
def find_adapter(adapter)
- 6
return adapter if adapter.instance_of? Class
- 3
if adapter == :Zipruby && ! const_defined?(adapter)
require 'epub/ocf/physical_container/zipruby'
end
- 3
const_get adapter
end
- 1
def adapter
- 71
raise NoMethodError, "undefined method `#{__method__}' for #{self}" unless self == PhysicalContainer
- 71
@adapter
end
- 1
def adapter=(adapter)
- 4
raise NoMethodError, "undefined method `#{__method__}' for #{self}" unless self == PhysicalContainer
- 4
@adapter = find_adapter(adapter)
end
- 1
def open(container_path)
- 56
_adapter.new(container_path).open do |container|
- 56
yield container
end
end
- 1
def read(container_path, path_name)
- 20
open(container_path) {|container|
- 20
container.read(path_name.to_s)
}
end
- 1
private
- 1
def _adapter
- 56
(self == PhysicalContainer) ? @adapter : self
end
end
- 1
def initialize(container_path)
- 71
@container_path = container_path
- 71
@monitor = Monitor.new
end
end
end
end
- 1
require 'archive/zip'
- 1
module EPUB
- 1
class OCF
- 1
class PhysicalContainer
- 1
class ArchiveZip < self
- 1
def initialize(container_path)
- 52
super
- 52
@entries = {}
- 52
@last_iterated_entry_index = 0
end
- 1
def open
- 56
Archive::Zip.open @container_path do |archive|
- 56
@monitor.synchronize do
- 56
@archive = archive
begin
- 56
yield self
ensure
- 56
@archive = nil
end
end
end
end
- 1
def read(path_name)
- 256
if @archive
- 250
target_index = @entries[path_name]
- 250
@archive.each.with_index do |entry, index|
- 3814
if target_index
- 1054
if target_index == index
- 102
return entry.file_data.read
else
- 952
next
end
end
- 2760
next if index < @last_iterated_entry_index
# We can force encoding UTF-8 because EPUB spec allows only UTF-8 filenames
- 836
entry_path = entry.zip_path.force_encoding('UTF-8')
- 836
@entries[entry_path] = index
- 836
@last_iterated_entry_index = index
- 836
if entry_path == path_name
- 44
return entry.file_data.read
end
end
- 104
raise NoEntry, "Entry not found: #{path_name}"
else
- 12
open {|container| container.read(path_name)}
end
end
end
end
end
end
- 1
module EPUB
- 1
class OCF
- 1
class PhysicalContainer
- 1
class UnpackedDirectory < self
- 1
def open
- 8
yield self
end
- 1
def read(path_name)
- 24
::File.read(::File.join(@container_path, path_name))
rescue ::Errno::ENOENT => error
- 8
raise NoEntry.from_error(error)
end
end
end
end
end
- 1
require 'open-uri'
- 1
module EPUB
- 1
class OCF
- 1
class PhysicalContainer
- 1
class UnpackedURI < self
# EPUB URI: http://example.net/path/to/book/
# container.xml: http://example.net/path/to/book/META-INF/container.xml
# @param [URI, String] container_path URI of EPUB container's root directory.
# For exapmle, <code>"http://example.net/path/to/book/"</code>, which
# should contain <code>"http://example.net/path/to/book/META-INF/container.xml"</code> as its container.xml file. Note that this should end with "/"(slash).
- 1
def initialize(container_path)
- 2
super(URI(container_path))
end
- 1
def open
yield self
end
- 1
def read(path_name)
- 1
(@container_path + path_name).read
rescue ::OpenURI::HTTPError => error
raise NoEntry.from_error(error)
end
end
end
end
end
- 1
require 'zipruby'
- 1
module EPUB
- 1
class OCF
- 1
class PhysicalContainer
- 1
class Zipruby < self
- 1
def open
- 5
Zip::Archive.open @container_path do |archive|
- 5
@monitor.synchronize do
begin
- 5
@archive = archive
- 5
yield self
rescue ::Zip::Error => error
raise NoEntry.from_error(error)
ensure
- 5
@archive = nil
end
end
end
end
- 1
def read(path_name)
- 5
if @archive
- 8
@archive.fopen(path_name) {|entry| entry.read}
else
- 2
open {|container| container.read(path_name)}
end
rescue ::Zip::Error => error
raise NoEntry.from_error(error)
ensure
- 5
@archive = nil
end
end
end
end
end
- 1
module EPUB
- 1
class OCF
- 1
class Rights
end
end
end
- 1
module EPUB
- 1
class OCF
- 1
class Signatures
end
end
end
- 1
require 'epub'
- 1
require 'epub/constants'
- 1
require 'epub/book'
- 1
module EPUB
- 1
class Parser
- 1
class << self
# Parse an EPUB file
#
# @example
# EPUB::Parser.parse('path/to/book.epub') # => EPUB::Book object
#
# @example
# class MyBook
# include EPUB::Book::Feature
# end
# book = MyBook.new
# parsed_book = EPUB::Parser.parse('path/to/book.epub', book: book) # => #<MyBook:0x000000019760e8 @epub_file=..>
# parsed_book.equal? book # => true
#
# @example
# book = EPUB::Parser.parse('path/to/book.epub', class: MyBook) # => #<MyBook:0x000000019b0568 @epub_file=...>
# book.instance_of? MyBook # => true
#
# @param [String] filepath
# @param [Hash] options the type of return is specified by this argument.
# If no options, returns {EPUB::Book} object.
# For details of options, see below.
# @option options [EPUB] :book instance of class which includes {EPUB} module
# @option options [Class] :class class which includes {EPUB} module
# @option options [EPUB::OCF::PhysicalContainer, Symbol] :container_adapter OCF physical container adapter to use when parsing EPUB container
# When class passed, it is used. When symbol passed, it is considered as subclass name of {EPUB::OCF::PhysicalContainer}.
# If omitted, {EPUB::OCF::PhysicalContainer.adapter} is used.
# @return [EPUB] object which is an instance of class including {EPUB} module.
# When option :book passed, returns the same object whose attributes about EPUB are set.
# When option :class passed, returns the instance of the class.
# Otherwise returns {EPUB::Book} object.
- 1
def parse(filepath, container_adapter: nil, book: nil, initialize_with: nil, **options)
- 22
new(filepath, container_adapter: container_adapter, book: book, initialize_with: initialize_with, **options).parse
end
end
- 1
def initialize(filepath, container_adapter: nil, book: nil, initialize_with: nil, **options)
- 29
if filepath.to_s.encoding == Encoding::ASCII_8BIT
# On Windows and macOS, encoding of file name is set by Ruby,
# but on UNIX, always is ASCII-8BIT
# See https://docs.ruby-lang.org/ja/2.7.0/class/IO.html
filepath = filepath.to_s.dup
require "nkf"
filepath.force_encoding NKF.guess(filepath)
end
- 29
path_is_uri = (container_adapter == EPUB::OCF::PhysicalContainer::UnpackedURI or
container_adapter == :UnpackedURI or
EPUB::OCF::PhysicalContainer.adapter == EPUB::OCF::PhysicalContainer::UnpackedURI)
raise "File #{filepath} not found" if
- 29
!path_is_uri and !File.exist?(filepath)
- 29
@filepath = path_is_uri ? filepath : File.realpath(filepath)
- 29
@book = create_book(book: book, initialize_with: initialize_with, **options)
- 29
if path_is_uri
@book.container_adapter = :UnpackedURI
- 29
elsif File.directory? @filepath
- 2
@book.container_adapter = :UnpackedDirectory
end
- 29
@book.epub_file = @filepath
- 29
if options[:container_adapter]
@book.container_adapter = options[:container_adapter]
end
end
- 1
def parse
- 27
@book.container_adapter.open @filepath do |container|
- 27
@book.ocf = OCF.parse(container)
- 27
@book.ocf.container.rootfiles.each {|rootfile|
- 81
package = Publication.parse(container, rootfile.full_path.to_s)
- 81
rootfile.package = package
- 81
@book.packages << package
- 81
package.book = @book
}
end
- 27
@book
end
- 1
private
- 1
def create_book(book: nil, initialize_with: nil, **params)
case
- 29
when book
- 2
book
when params[:class]
- 2
if initialize_with
params[:class].new initialize_with
else
- 2
params[:class].new
end
else
- 25
Book.new
end
end
end
end
- 1
require 'epub/parser/version'
- 1
require 'epub/parser/xml_document'
- 1
require 'epub/parser/ocf'
- 1
require 'epub/parser/publication'
- 1
require 'epub/parser/content_document'
- 1
require 'epub/content_document'
- 1
require 'epub/constants'
- 1
require 'epub/parser/xml_document'
- 1
module EPUB
- 1
class Parser
- 1
class ContentDocument
- 1
using XMLDocument::Refinements
# @param [EPUB::Publication::Package::Manifest::Item] item
- 1
def initialize(item)
- 19
@item = item
end
- 1
def parse
- 17
content_document = case @item.media_type
when 'application/xhtml+xml'
- 17
if @item.nav?
- 11
EPUB::ContentDocument::Navigation.new
else
- 6
EPUB::ContentDocument::XHTML.new
end
when 'image/svg+xml'
EPUB::ContentDocument::SVG.new
else
nil
end
- 17
return content_document if content_document.nil?
- 17
content_document.item = @item
- 17
document = XMLDocument.new(@item.read)
# parse_content_document(document)
- 17
if @item.nav?
- 11
content_document.navigations = parse_navigations(document)
end
- 17
content_document
end
# @param [XMLDocument, REXML::Document, Oga::XML::Document, Nokogiri::HTML::Document] document HTML document or element including nav
# @return [Array<EPUB::ContentDocument::Navigation::Navigation>] navs array of Navigation object
- 1
def parse_navigations(document)
- 34
document.each_element_by_xpath('/xhtml:html/xhtml:body//xhtml:nav', EPUB::NAMESPACES).collect {|elem| parse_navigation elem}
end
# @param [REXML::Element, Oga::XML::Element, Nokogiri::XML::Element] element nav element
# @return [EPUB::ContentDocument::Navigation::Navigation] nav Navigation object
- 1
def parse_navigation(element)
- 22
nav = EPUB::ContentDocument::Navigation::Navigation.new
- 22
nav.text = find_heading(element)
- 22
hidden = element.attribute_with_prefix('hidden')
- 22
nav.hidden = hidden.nil? ? nil : true
- 22
nav.types = element.attribute_with_prefix('type', 'epub')&.split(/\s+/)
- 22
element.each_element_by_xpath('./xhtml:ol/xhtml:li', EPUB::NAMESPACES).map do |elem|
- 42
nav.items << parse_navigation_item(elem)
end
- 22
nav
end
# @param [REXML::Element, Oga::XML::Element, Nokogiri::XML::Element] element li element
- 1
def parse_navigation_item(element)
- 82
item = EPUB::ContentDocument::Navigation::Item.new
- 82
a_or_span = element.each_element_by_xpath('./xhtml:a[1]|xhtml:span[1]', EPUB::NAMESPACES).first
- 82
return a_or_span if a_or_span.nil?
- 82
item.text = a_or_span.content
- 82
item.types = a_or_span.attribute_with_prefix('type', 'epub')&.split(/\s+/)
- 82
if a_or_span.name == 'a'
- 72
if item.text.empty?
- 20
embedded_content = a_or_span.each_element_by_xpath('./xhtml:audio[1]|xhtml:canvas[1]|xhtml:embed[1]|xhtml:iframe[1]|xhtml:img[1]|xhtml:math[1]|xhtml:object[1]|xhtml:svg[1]|xhtml:video[1]', EPUB::NAMESPACES).first
- 20
unless embedded_content.nil?
- 20
case embedded_content.name
when 'audio', 'canvas', 'embed', 'iframe'
- 10
item.text = embedded_content.attribute_with_prefix('name') || embedded_content.attribute_with_prefix('srcdoc')
when 'img'
- 10
item.text = embedded_content.attribute_with_prefix('alt')
when 'math', 'object'
item.text = embedded_content.attribute_with_prefix('name')
when 'svg', 'video'
else
end
end
- 20
item.text = a_or_span.attribute_with_prefix('title').to_s if item.text.nil? || item.text.empty?
end
- 72
item.href = a_or_span.attribute_with_prefix('href')
- 72
item.item = @item.find_item_by_relative_iri(item.href)
end
- 122
item.items = element.each_element_by_xpath('./xhtml:ol[1]/xhtml:li', EPUB::NAMESPACES).map {|li| parse_navigation_item(li)}
- 82
item
end
- 1
private
# @param [REXML::Element, Oga::XML::Element, Nokogiri::XML::Element] element nav element
# @return [String] heading heading text
- 1
def find_heading(element)
- 22
heading = element.each_element_by_xpath('./xhtml:h1|xhtml:h2|xhtml:h3|xhtml:h4|xhtml:h5|xhtml:h6|xhtml:hgroup', EPUB::NAMESPACES).first
- 22
return nil if heading.nil?
- 20
return heading.content unless heading.name == 'hgroup'
- 10
(heading.each_element_by_xpath(".//xhtml:h1", EPUB::NAMESPACES) ||
heading.each_element_by_xpath(".//xhtml:h2", EPUB::NAMESPACES) ||
heading.each_element_by_xpath(".//xhtml:h3", EPUB::NAMESPACES) ||
heading.each_element_by_xpath(".//xhtml:h4", EPUB::NAMESPACES) ||
heading.each_element_by_xpath(".//xhtml:h5", EPUB::NAMESPACES) ||
heading.each_element_by_xpath(".//xhtml:h6", EPUB::NAMESPACES)).first.content
end
end
end
end
- 1
module EPUB
- 1
class Parser
- 1
module Metadata
- 1
using XMLDocument::Refinements
- 1
def parse_metadata(elem, unique_identifier_id, default_namespace)
- 127
metadata = EPUB::Publication::Package::Metadata.new
- 127
id_map = {}
- 127
default_namespace_uri = EPUB::NAMESPACES[default_namespace]
- 127
elem.each_element do |child|
- 2208
elem_name = child.name
model =
- 2208
case child.namespace_uri
when EPUB::NAMESPACES['dc']
- 847
case elem_name
when 'identifier'
- 171
identifier = build_model(child, :Identifier, ['id'])
- 171
metadata.identifiers << identifier
- 171
identifier.scheme = child.attribute_with_prefix('scheme', 'opf')
- 171
identifier
when 'title'
- 274
title = build_model(child, :Title)
- 274
metadata.titles << title
- 274
title
when 'language'
- 98
language = build_model(child, :DCMES, ['id'])
- 98
metadata.languages << language
- 98
language
when 'title', 'contributor', 'coverage', 'creator', 'date', 'description', 'format', 'publisher', 'relation', 'source', 'subject', 'rights', 'type'
- 304
attr = elem_name == 'rights' ? elem_name : elem_name + 's'
- 304
dcmes = build_model(child)
- 304
metadata.__send__(attr) << dcmes
- 304
dcmes
else
build_unsupported_model(child)
end
when default_namespace_uri
- 1361
case elem_name
when 'meta'
- 1229
meta = build_model(child, :Meta, %w[property id scheme content name])
- 1229
metadata.metas << meta
- 1229
meta
when 'link'
- 132
link = build_model(child, :Link, %w[id media-type])
- 132
metadata.links << link
- 132
link.href = child.attribute_with_prefix('href')
- 132
link.rel = Set.new(child.attribute_with_prefix('rel').split(/\s+/))
- 132
link
else
build_unsupported_model(child)
end
else
build_unsupported_model(child)
end
- 2208
metadata.children << model
- 2208
if model.kind_of?(EPUB::Metadata::Identifier) &&
model.id == unique_identifier_id
- 127
metadata.unique_identifier = model
end
- 2208
if model.respond_to?(:id) && model.id
- 651
id_map[model.id] = {refinee: model}
end
- 2208
refines = child.attribute_with_prefix('refines')
- 2208
if refines && refines.start_with?('#')
- 768
id = refines[1..-1]
- 768
id_map[id] ||= {}
- 768
id_map[id][:refiners] ||= []
- 768
id_map[id][:refiners] << model
end
end
- 127
id_map.values.each do |hsh|
- 651
next unless hsh[:refiners]
- 460
next unless hsh[:refinee]
- 1228
hsh[:refiners].each {|meta| meta.refines = hsh[:refinee]}
end
- 127
metadata
end
- 1
def build_model(elem, klass=:DCMES, attributes=%w[id lang dir])
- 2208
model = EPUB::Metadata.const_get(klass).new
- 2208
attributes.each do |attr|
- 8412
writer_name = (attr == "content") ? "meta_content=" : "#{attr.gsub('-', '_')}="
- 8412
namespace = (attr == "lang") ? "xml" : nil
- 8412
model.__send__ writer_name, elem.attribute_with_prefix(attr, namespace)
end
- 2208
model.content = elem.content unless klass == :Link
- 2208
model.content.strip! if klass == :Identifier
- 2208
model
end
- 1
def build_unsupported_model(elem)
EPUB::Metadata::UnsupportedModel.new(elem)
end
end
end
end
- 1
require 'epub/constants'
- 1
require 'epub/ocf'
- 1
require 'epub/ocf/physical_container'
- 1
require 'epub/parser/metadata'
- 1
require "epub/parser/xml_document"
- 1
module EPUB
- 1
class Parser
- 1
class OCF
- 1
using XMLDocument::Refinements
- 1
include Metadata
- 1
DIRECTORY = 'META-INF'
- 1
class << self
- 1
def parse(container)
- 27
new(container).parse
end
end
- 1
def initialize(container)
- 36
@container = container
- 36
@ocf = EPUB::OCF.new
end
- 1
def parse
- 28
EPUB::OCF::MODULES.each do |m|
begin
- 168
data = @container.read(File.join(DIRECTORY, "#{m}.xml"))
- 56
@ocf.__send__ "#{m}=", __send__("parse_#{m}", data)
rescue EPUB::OCF::PhysicalContainer::NoEntry
end
end
- 28
@ocf
end
- 1
def parse_container(xml)
- 33
container = EPUB::OCF::Container.new
- 33
doc = XMLDocument.new(xml)
- 33
doc.each_element_by_xpath "/ocf:container/ocf:rootfiles/ocf:rootfile", EPUB::NAMESPACES do |elem|
- 99
rootfile = EPUB::OCF::Container::Rootfile.new
- 99
rootfile.full_path = Addressable::URI.parse(elem.attribute_with_prefix('full-path'))
- 99
rootfile.media_type = elem.attribute_with_prefix('media-type')
- 99
container.rootfiles << rootfile
end
- 33
container
end
- 1
def parse_encryption(content)
- 1
encryption = EPUB::OCF::Encryption.new
- 1
encryption.content = content
- 1
encryption
end
- 1
def parse_manifest(content)
warn "Not implemented: #{self.class}##{__method__}" if $VERBOSE
end
- 1
def parse_metadata(content)
- 30
doc = XMLDocument.new(content)
- 30
unless multiple_rendition_metadata?(doc)
- 1
warn "Not implemented: #{self.class}##{__method__}" if $VERBOSE
- 1
metadata = EPUB::OCF::UnknownFormatMetadata.new
- 1
metadata.content = content
- 1
return metadata
end
- 29
super(doc.root, doc.root.attribute_with_prefix('unique-identifier'), 'metadata')
end
- 1
def parse_rights(content)
warn "Not implemented: #{self.class}##{__method__}" if $VERBOSE
end
- 1
def parse_signatures(content)
warn "Not implemented: #{self.class}##{__method__}" if $VERBOSE
end
- 1
private
- 1
def multiple_rendition_metadata?(doc)
- 30
doc.root &&
doc.root.name == 'metadata' &&
doc.root.namespaces['xmlns'] == EPUB::NAMESPACES['metadata']
end
end
end
end
- 1
require 'strscan'
- 1
require 'epub/publication'
- 1
require 'epub/constants'
- 1
require 'epub/parser/metadata'
- 1
module EPUB
- 1
class Parser
- 1
class Publication
- 1
using XMLDocument::Refinements
- 1
include Metadata
- 1
class << self
- 1
def parse(container, file)
- 81
opf = container.read(Addressable::URI.unencode(file))
- 81
new(opf).parse
end
end
- 1
def initialize(opf)
- 125
@doc = XMLDocument.new(opf)
end
- 1
def parse
- 90
package = parse_package(@doc)
- 90
(EPUB::Publication::Package::CONTENT_MODELS - [:bindings]).each do |model|
- 360
package.__send__ "#{model}=", __send__("parse_#{model}", @doc)
end
- 90
package.bindings = parse_bindings(@doc, package.manifest)
- 90
package
end
- 1
def parse_package(doc)
- 125
package = EPUB::Publication::Package.new
- 125
elem = doc.root
- 125
%w[version xml:lang dir id].each do |attr|
- 500
package.__send__ "#{attr.gsub(/\:/, '_')}=", elem.attribute_with_prefix(attr)
end
- 125
package.prefix = parse_prefix(elem.attribute_with_prefix('prefix'))
- 125
EPUB::Publication.__send__ :include, EPUB::Publication::FixedLayout if package.prefix.key? EPUB::Publication::FixedLayout::PREFIX_KEY
- 125
package
end
- 1
def parse_metadata(doc)
- 98
super(doc.each_element_by_xpath('/opf:package/opf:metadata', EPUB::NAMESPACES).first, doc.root.attribute_with_prefix('unique-identifier'), 'opf')
end
- 1
def parse_manifest(doc)
- 110
manifest = EPUB::Publication::Package::Manifest.new
- 110
elem = doc.each_element_by_xpath('/opf:package/opf:manifest', EPUB::NAMESPACES).first
- 110
manifest.id = elem.attribute_with_prefix('id')
- 110
fallback_map = {}
- 110
elem.each_element_by_xpath('./opf:item', EPUB::NAMESPACES).each do |e|
- 1199
item = EPUB::Publication::Package::Manifest::Item.new
- 1199
%w[id media-type media-overlay].each do |attr|
- 3597
item.__send__ "#{attr.gsub(/-/, '_')}=", e.attribute_with_prefix(attr)
end
- 1199
item.href = e.attribute_with_prefix('href')
- 1199
fallback = e.attribute_with_prefix('fallback')
- 1199
fallback_map[fallback] = item if fallback
- 1199
properties = e.attribute_with_prefix('properties')
- 1199
item.properties = properties.split(' ') if properties
- 1199
manifest << item
end
- 110
fallback_map.each_pair do |id, from|
- 112
from.fallback = manifest[id]
end
- 110
manifest
end
- 1
def parse_spine(doc)
- 90
spine = EPUB::Publication::Package::Spine.new
- 90
elem = doc.each_element_by_xpath('/opf:package/opf:spine', EPUB::NAMESPACES).first
- 90
%w[id toc page-progression-direction].each do |attr|
- 270
spine.__send__ "#{attr.gsub(/-/, '_')}=", elem.attribute_with_prefix(attr)
end
- 90
elem.each_element_by_xpath('./opf:itemref', EPUB::NAMESPACES).each do |e|
- 288
itemref = EPUB::Publication::Package::Spine::Itemref.new
- 288
%w[idref id].each do |attr|
- 576
itemref.__send__ "#{attr}=", e.attribute_with_prefix(attr)
end
- 288
itemref.linear = (e.attribute_with_prefix('linear') != 'no')
- 288
properties = e.attribute_with_prefix('properties')
- 288
itemref.properties = properties.split(' ') if properties
- 288
spine << itemref
end
- 90
spine
end
- 1
def parse_guide(doc)
- 93
guide = EPUB::Publication::Package::Guide.new
- 93
doc.each_element_by_xpath '/opf:package/opf:guide/opf:reference', EPUB::NAMESPACES do |ref|
- 39
reference = EPUB::Publication::Package::Guide::Reference.new
- 39
%w[type title].each do |attr|
- 78
reference.__send__ "#{attr}=", ref.attribute_with_prefix(attr)
end
- 39
reference.href = ref.attribute_with_prefix('href')
- 39
guide << reference
end
- 93
guide
end
- 1
def parse_bindings(doc, handler_map)
- 97
bindings = EPUB::Publication::Package::Bindings.new
- 97
doc.each_element_by_xpath '/opf:package/opf:bindings/opf:mediaType', EPUB::NAMESPACES do |elem|
- 43
media_type = EPUB::Publication::Package::Bindings::MediaType.new
- 43
media_type.media_type = elem.attribute_with_prefix('media-type')
- 43
media_type.handler = handler_map[elem.attribute_with_prefix('handler')]
- 43
bindings << media_type
end
- 97
bindings
end
- 1
def parse_prefix(str)
- 125
prefixes = {}
- 125
return prefixes if str.nil? or str.empty?
- 124
scanner = StringScanner.new(str)
- 124
scanner.scan /\s*/
- 124
while prefix = scanner.scan(/[^\:\s]+/)
- 193
scanner.scan /[\:\s]+/
- 193
iri = scanner.scan(/[^\s]+/)
- 193
if iri.nil? or iri.empty?
warn "no IRI detected for prefix `#{prefix}`"
else
- 193
prefixes[prefix] = iri
end
- 193
scanner.scan /\s*/
end
- 124
prefixes
end
end
end
end
- 1
module EPUB
- 1
class Parser
- 1
class XMLDocument
- 1
class << self
- 1
attr_accessor :backend
- 1
def new(xml)
- 259
case backend
when :Oga
- 65
Oga.parse_xml(xml)
when :Nokogiri
- 186
Nokogiri.XML(xml)
else
- 8
REXML::Document.new(xml)
end
end
end
end
end
end
- 1
%i[Nokogiri Oga REXML].each do |backend|
begin
- 3
require "epub/parser/xml_document/refinements/#{backend.downcase}"
- 3
EPUB::Parser::XMLDocument.backend ||= backend
rescue LoadError
end
end
- 1
require "nokogiri"
- 1
module EPUB
- 1
class Parser
- 1
class XMLDocument
- 1
module Refinements
- 1
module Nokogiri
- 1
refine ::Nokogiri::XML::Node do
- 1
def each_element_by_xpath(xpath, namespaces = nil, &block)
- 637
xpath(xpath, namespaces).each &block
end
- 1
def attribute_with_prefix(name, prefix = nil)
- 15669
attribute_with_ns(name, EPUB::NAMESPACES[prefix])&.value
end
- 1
def each_element(xpath = nil, &block)
- 95
element_children.each(&block)
end
- 1
alias elements element_children
- 1
def namespace_uri
- 1600
namespace.href
end
end
end
- 1
include Nokogiri
end
end
end
end
- 1
require "oga"
- 1
module EPUB
- 1
class Parser
- 1
class XMLDocument
- 1
module Refinements
- 1
module Oga
- 1
[::Oga::XML::Document, ::Oga::XML::Node].each do |klass|
- 2
refine klass do
[
- 2
[:document, ::Oga::XML::Document],
[:element, ::Oga::XML::Element],
[:text, ::Oga::XML::Text]
].each do |(type, klass)|
- 6
define_method "#{type}?" do
- 1044
kind_of? klass
end
end
- 2
def root
# Couldn't use find(&:element?) for Rubies under 2.6
- 215
root_node.children.find {|child| child.element?}
end
- 2
def elements
# Couldn't use find(&:element?) for Rubies under 2.6
- 548
children.select {|child| child.element?}
end
# Need for Rubies under 2.6
- 2
def respond_to?(name, include_all = false)
- 15
[:root, :elements].include?(name) || super
end
- 2
def each_element_by_xpath(xpath, namespaces = nil, &block)
- 286
xpath(xpath, namespaces: namespaces).each &block
end
end
end
- 1
refine ::Oga::XML::Element do
- 1
def attribute_with_prefix(name, prefix = nil)
- 5047
name = prefix ? "#{prefix}:#{name}" : name
- 5047
get(name)
end
- 1
def each_element(xpath = nil, &block)
- 27
each_node do |node|
- 1529
throw :skip_children unless node.kind_of?(::Oga::XML::Element)
- 516
block.call node
end
end
- 1
def namespace_uri
- 516
namespace&.uri
end
- 1
alias original_namespaces namespaces
- 1
def namespaces
- 5
original_namespaces.each_with_object({}) {|(name, namespace), nss|
- 10
nss[name] = namespace.uri
}
end
- 1
alias content text
end
- 1
refine ::Oga::XML::Text do
- 1
alias content text
end
end
- 1
include Oga
end
end
end
end
- 1
require "rexml/document"
- 1
module EPUB
- 1
class Parser
- 1
class XMLDocument
- 1
module Refinements
- 1
module REXML
- 1
[::REXML::Element, ::REXML::Text].each do |klass|
- 2
refine klass do
- 2
%i[document element text].each do |type|
- 6
define_method "#{type}?" do
node_type == type
end
end
end
end
- 1
refine ::REXML::Element do
- 1
def each_element_by_xpath(xpath, namespaces = nil, &block)
- 54
::REXML::XPath.each self, xpath, namespaces, &block
end
- 1
def attribute_with_prefix(name, prefix = nil)
- 859
attribute(name, EPUB::NAMESPACES[prefix])&.value
end
- 1
alias namespace_uri namespace
- 1
def content
- 98
each_child.inject("") {|text, node|
- 94
case node.node_type
when :document, :element
- 2
text << node.content
when :text
- 92
text << node.value
end
}
end
end
- 1
refine ::REXML::Text do
- 1
alias content value
end
end
- 1
include REXML
end
end
end
end
- 1
require 'epub/publication/package'
- 1
require 'epub/publication/fixed_layout'
- 1
module EPUB
- 1
module Publication
- 1
module FixedLayout
- 1
PREFIX_KEY = 'rendition'.freeze
- 1
PREFIX_VALUE = 'http://www.idpf.org/vocab/rendition/#'.freeze
RENDITION_PROPERTIES = {
- 1
'layout' => ['reflowable'.freeze, 'pre-paginated'.freeze].freeze,
'orientation' => ['auto'.freeze, 'landscape'.freeze, 'portrait'.freeze].freeze,
'spread' => ['auto'.freeze, 'none'.freeze, 'landscape'.freeze, 'portrait'.freeze, 'both'.freeze].freeze
}.freeze
- 1
class UnsupportedRenditionValue < StandardError; end
- 1
class << self
- 1
def included(package_class)
[
- 2
[Package, PackageMixin],
[Package::Metadata, MetadataMixin],
[Package::Spine::Itemref, ItemrefMixin],
[Package::Manifest::Item, ItemMixin],
[ContentDocument::XHTML, ContentDocumentMixin],
].each do |(base, mixin)|
- 10
base.__send__ :include, mixin
end
end
end
- 1
module Rendition
# @note Call after defining #rendition_xxx and #renditionn_xxx=
- 1
def def_rendition_methods
- 4
RENDITION_PROPERTIES.each_key do |property|
- 12
alias_method property, "rendition_#{property}"
- 12
alias_method "#{property}=", "rendition_#{property}="
end
- 4
def_rendition_layout_methods
end
- 1
def def_rendition_layout_methods
- 4
property = 'layout'
- 4
RENDITION_PROPERTIES[property].each do |value|
- 8
method_name_base = value.gsub('-', '_')
- 8
writer_name = "#{method_name_base}="
- 8
define_method writer_name do |new_value|
- 4
new_prop = new_value ? value : values.find {|l| l != value}
- 4
__send__ "rendition_#{property}=", new_prop
end
- 8
maker_name = "make_#{method_name_base}"
- 8
define_method maker_name do
- 5
__send__ "rendition_#{property}=", value
end
- 8
destructive_method_name = "#{method_name_base}!"
- 8
alias_method destructive_method_name, maker_name
- 8
predicate_name = "#{method_name_base}?"
- 8
define_method predicate_name do
- 13
__send__("rendition_#{property}") == value
end
end
end
end
- 1
module PackageMixin
# @return [true, false]
- 1
def using_fixed_layout
- 4
prefix.has_key? PREFIX_KEY and
prefix[PREFIX_KEY] == PREFIX_VALUE
end
- 1
alias using_fixed_layout? using_fixed_layout
# @param using_fixed_layout [true, false]
- 1
def using_fixed_layout=(using_fixed_layout)
- 2
if using_fixed_layout
- 1
prefix[PREFIX_KEY] = PREFIX_VALUE
else
- 1
prefix.delete PREFIX_KEY
end
end
end
- 1
module MetadataMixin
- 1
extend Rendition
- 1
RENDITION_PROPERTIES.each_pair do |property, values|
- 3
define_method "rendition_#{property}" do
- 45
meta = metas.find {|m| m.property == "rendition:#{property}"}
- 27
meta ? meta.content : values.first
end
- 3
define_method "rendition_#{property}=" do |new_value|
- 14
raise UnsupportedRenditionValue, new_value unless values.include? new_value
- 13
prefixed_property = "rendition:#{property}"
- 13
values_to_be_deleted = values - [new_value]
- 21
metas.delete_if {|meta| meta.property == prefixed_property && values_to_be_deleted.include?(meta.content)}
- 14
unless metas.any? {|meta| meta.property == prefixed_property && meta.content == new_value}
- 12
meta = Package::Metadata::Meta.new
- 12
meta.property = prefixed_property
- 12
meta.content = new_value
- 12
metas << meta
end
- 13
new_value
end
end
- 1
def_rendition_methods
end
- 1
module ItemrefMixin
- 1
extend Rendition
- 1
PAGE_SPREAD_PROPERTY = 'center'
- 1
PAGE_SPREAD_PREFIX = 'rendition:page-spread-'
- 1
class << self
# @todo Define using Module#prepend after Ruby 2.0 will become popular
- 1
def included(base)
- 2
return if base.instance_methods.include? :page_spread_without_fixed_layout
- 1
base.__send__ :alias_method, :page_spread_without_fixed_layout, :page_spread
- 1
base.__send__ :alias_method, :page_spread_writer_without_fixed_layout, :page_spread=
- 1
prefixed_page_spread_property = "#{PAGE_SPREAD_PREFIX}#{PAGE_SPREAD_PROPERTY}"
- 1
base.__send__ :define_method, :page_spread do
- 3
property = page_spread_without_fixed_layout
- 3
return property if property
- 2
properties.include?(prefixed_page_spread_property) ? PAGE_SPREAD_PROPERTY : nil
end
- 1
base.__send__ :define_method, :page_spread= do |new_value|
- 6
if new_value == PAGE_SPREAD_PROPERTY
- 2
page_spread_writer_without_fixed_layout nil
- 2
properties << prefixed_page_spread_property
else
- 4
page_spread_writer_without_fixed_layout new_value
end
- 6
new_value
end
end
end
- 1
RENDITION_PROPERTIES.each do |property, values|
- 3
rendition_property_prefix = "rendition:#{property}-"
- 3
reader_name = "rendition_#{property}"
- 3
define_method reader_name do
- 27
prop_value = properties.find {|prop| prop.start_with? rendition_property_prefix}
- 16
prop_value ? prop_value.gsub(/\A#{Regexp.escape(rendition_property_prefix)}/, '') :
spine.package.metadata.__send__(reader_name)
end
- 3
writer_name = "#{reader_name}="
- 3
define_method writer_name do |new_value|
- 12
if new_value.nil?
- 2
properties.delete_if {|prop| prop.start_with? rendition_property_prefix}
- 1
return new_value
end
- 11
raise UnsupportedRenditionValue, new_value unless values.include? new_value
- 27
values_to_be_deleted = (values - [new_value]).map {|value| "#{rendition_property_prefix}#{value}"}
- 14
properties.delete_if {|prop| values_to_be_deleted.include? prop}
- 11
new_property = "#{rendition_property_prefix}#{new_value}"
- 11
properties << new_property unless properties.include? new_property
- 11
new_value
end
end
- 1
def_rendition_methods
end
- 1
module ItemMixin
- 1
extend Rendition
- 1
RENDITION_PROPERTIES.each_key do |property|
- 3
define_method "rendition_#{property}" do
- 2
itemref.__send__ property
end
- 3
writer_name = "rendition_#{property}="
- 3
define_method writer_name do |value|
- 2
itemref.__send__ writer_name, value
end
end
- 1
def_rendition_methods
end
- 1
module ContentDocumentMixin
- 1
extend Rendition
- 1
RENDITION_PROPERTIES.each_key do |property|
- 3
reader_name = "rendition_#{property}"
- 3
define_method reader_name do
- 1
item.__send__ reader_name
end
- 3
writer_name = "rendition_#{property}="
- 3
define_method writer_name do |value|
- 1
item.__send__ writer_name, value
end
end
- 1
def_rendition_methods
end
end
end
end
- 1
module EPUB
- 1
module Publication
- 1
class Package
- 1
include Inspector
- 1
CONTENT_MODELS = [:metadata, :manifest, :spine, :guide, :bindings]
RESERVED_VOCABULARY_PREFIXES = {
- 1
'' => 'http://idpf.org/epub/vocab/package/#',
'dcterms' => 'http://purl.org/dc/terms/',
'marc' => 'http://id.loc.gov/vocabulary/',
'media' => 'http://www.idpf.org/epub/vocab/overlays/#',
'onix' => 'http://www.editeur.org/ONIX/book/codelists/current.html#',
'xsd' => 'http://www.w3.org/2001/XMLSchema#'
}
- 1
class << self
- 1
def define_content_model(model_name)
- 5
define_method "#{model_name}=" do |model|
- 524
current_model = __send__(model_name)
- 524
current_model.package = nil if current_model
- 524
model.package = self
- 524
instance_variable_set "@#{model_name}", model
end
end
end
- 1
attr_accessor :book,
:version, :prefix, :xml_lang, :dir, :id
- 1
attr_reader *CONTENT_MODELS
- 1
alias lang xml_lang
- 1
alias lang= xml_lang=
- 1
CONTENT_MODELS.each do |model|
- 5
define_content_model model
end
- 1
def initialize
- 162
@prefix = {}
end
# @return [EPUB::Metadata::Identifier] Unique Identifier
- 1
def unique_identifier
- 2
@metadata.unique_identifier
end
# Corresponding {Rootfile}
# @return [OCF::Container::Rootfile]
- 1
def rootfile
- 86
@book.ocf.container.rootfiles.find {|rf| rf.package == self}
end
# Full path in EPUB archive
# @return [Addressable::URI]
- 1
def full_path
- 18
rootfile.full_path if rootfile
end
- 1
def inspect
- 4
"#<%{class}:%{object_id} %{attributes} %{models}>" % {
:class => self.class,
:object_id => inspect_object_id,
- 20
:attributes => inspect_instance_variables(exclude: CONTENT_MODELS.map {|model| :"@#{model}"}),
:models => inspect_models
}
end
- 1
def inspect_models
- 4
CONTENT_MODELS.map {|name|
- 20
model = __send__(name)
- 20
representation = model.nil? ? model.inspect : model.inspect_simply
- 20
"@#{name}=#{representation}"
}.join(' ')
end
end
end
end
- 1
EPUB::Publication::Package::CONTENT_MODELS.each do |f|
- 5
require_relative "package/#{f}"
end
- 1
module EPUB
- 1
module Publication
- 1
class Package
- 1
class Bindings
- 1
include Inspector::PublicationModel
- 1
attr_accessor :package
- 1
def initialize
- 97
@media_types = {}
end
- 1
def <<(media_type)
- 43
@media_types[media_type.media_type] = media_type
end
- 1
def [](media_type)
- 6
_, mt = @media_types.detect {|key, _| key == media_type}
- 3
mt
end
- 1
def media_types
- 5
@media_types.values
end
- 1
class MediaType
- 1
attr_accessor :media_type, :handler
end
end
end
end
end
- 1
module EPUB
- 1
module Publication
- 1
class Package
- 1
class Guide
- 1
include Inspector::PublicationModel
- 1
attr_accessor :package, :references
- 1
def initialize
- 93
Reference::TYPES.each do |type|
- 1581
variable_name = '@' + type.gsub('-', '_')
- 1581
instance_variable_set variable_name, nil
end
- 93
@references = []
end
- 1
def <<(reference)
- 39
reference.guide = self
- 39
references << reference
end
- 1
class Reference
- 1
TYPES = %w[cover title-page toc index glossary acknowledgements bibliography colophon copyright-page dedication epigraph foreword loi lot notes preface text]
- 1
attr_accessor :guide,
:type, :title
- 1
attr_reader :href
- 1
def href=(iri)
- 39
@href = iri.kind_of?(Addressable::URI) ? iri : Addressable::URI.parse(iri)
end
- 1
def item
- 1
return @item if @item
- 1
request_uri = href.request_uri
- 1
@item = @guide.package.manifest.items.find {|item|
- 10
item.href.request_uri == request_uri
}
end
end
- 1
Reference::TYPES.each do |type|
- 17
method_name = type.gsub('-', '_')
- 17
define_method method_name do
- 3
var = instance_variable_get "@#{method_name}"
- 3
return var if var
- 4
var = references.find {|ref| ref.type == type}
- 2
instance_variable_set "@#{method_name}", var
end
end
end
end
end
end
- 1
require 'set'
- 1
require 'addressable/uri'
- 1
require 'rchardet'
- 1
require 'epub/constants'
- 1
require 'epub/parser/content_document'
- 1
module EPUB
- 1
module Publication
- 1
class Package
- 1
class Manifest
- 1
include Inspector::PublicationModel
- 1
attr_accessor :package,
:id
- 1
def initialize
- 140
@items = {}
end
# @param item [Item]
# @return [Manifest] self
- 1
def <<(item)
- 1291
item.manifest = self
- 1291
@items[item.id] = item
- 1291
self
end
- 1
def each_nav
- 3
if block_given?
- 1
each_item do |item|
- 4
yield item if item.nav?
end
else
- 2
each_item.lazy.select(&:nav?)
end
end
# @return [Array<Item>] item which includes "nav" as one of +properties+. It represents this item is a navigation of book.
- 1
def navs
- 9
items.select(&:nav?)
end
# @return [Item, nil] the first item of #navs
- 1
def nav
- 8
navs.first
end
# @return [Item, nil] item which includes "cover-image" as one of +properties+. It represents this item is cover image.
- 1
def cover_image
- 1
items.find(&:cover_image?)
end
# @overload each_item
# @yield [item]
# @yieldparam [Item]
# @overload each_item
# @return [Enumerator]
- 1
def each_item
- 4
if block_given?
- 1
@items.each_value do |item|
- 4
yield item
end
else
- 3
@items.each_value
end
end
- 1
def items
- 88
@items.values
end
# @param item_id [String]
# @return [Item, nil]
- 1
def [](item_id)
- 222
@items[item_id]
end
- 1
class Item
- 1
DUMMY_ROOT_IRI = Addressable::URI.parse('http://example.net/').freeze
- 1
include Inspector
# @!attribute [rw] manifest
# @return [Manifest] Returns the value of manifest
# @!attribute [rw] id
# @return [String] Returns the value of id
# @!attribute [rw] href
# @return [Addressable::URI] Returns the value of href,
# which is relative IRI from rootfile(OPF file)
# @!attribute [rw] media_type
# @return [String] Returns the value of media_type
# @!attribute [rw] properties
# @return [Set<String>] Returns the value of properties
# @!attribute [rw] media_overlay
# @return [String] Returns the value of media_overlay
# @!attribute [rw] fallback
# @return [Item] Returns the value of attribute fallback
- 1
attr_accessor :manifest,
:id, :media_type, :fallback, :media_overlay
- 1
attr_reader :properties, :href
- 1
def initialize
- 1298
@properties = Set.new
- 1298
@full_path = nil
end
- 1
def properties=(props)
- 281
@properties = props.kind_of?(Set) ? props : Set.new(props)
end
- 1
def href=(iri)
- 1211
@href = iri.kind_of?(Addressable::URI) ? iri : Addressable::URI.parse(iri)
end
# @todo Handle circular fallback chain
- 1
def fallback_chain
- 2
@fallback_chain ||= traverse_fallback_chain([])
end
# full path in archive
# @return [Addressable::URI]
- 1
def full_path
- 18
return @full_path if @full_path
- 18
path = DUMMY_ROOT_IRI + manifest.package.full_path + href
- 18
path.scheme = nil
- 18
path.host = nil
- 18
path.path = path.path[1..-1]
- 18
@full_path = path
end
# full path in archive
# @return [String]
- 1
def entry_name
- 16
Addressable::URI.unencode(full_path)
end
# Read content from EPUB archive
#
# @param detect_encoding [Boolean] Whether #read tries auto-detection of character encoding. The default value is +false+.
# @return [String] Content with encoding:
# US-ASCII when the content is not in text format such images.
# UTF-8 when the content is in text format and +detect_encoding+ is +false+.
# auto-detected encoding when the content is in text format and +detect_encoding+ is +true+.
- 1
def read(detect_encoding: false)
- 15
raw_content = manifest.package.book.container_adapter.read(manifest.package.book.epub_file, entry_name)
- 15
unless media_type.start_with?('text/') or
media_type.end_with?('xml') or
['application/json', 'application/javascript', 'application/ecmascript', 'application/xml-dtd'].include?(media_type)
return raw_content
end
- 15
if detect_encoding
# CharDet.detect doesn't raise Encoding::CompatibilityError
# that is caused when trying compare CharDet's internal
# ASCII-8BIT RegExp with a String with other encoding
# because Zip::File#read returns a String with encoding ASCII-8BIT.
# So, no need to rescue the error here.
- 3
encoding = CharDet.detect(raw_content)['encoding']
- 3
if encoding
- 3
raw_content.force_encoding(encoding)
else
warn "No encoding detected for #{entry_name}. Set to ASCII-8BIT" if $DEBUG || $VERBOSE
raw_content
end
else
- 12
raw_content.force_encoding("UTF-8");
end
end
- 1
def xhtml?
- 2
media_type == 'application/xhtml+xml'
end
- 1
def nav?
- 146
properties.include? 'nav'
end
- 1
def cover_image?
- 4
properties.include? 'cover-image'
end
# @todo Handle circular fallback chain
- 1
def use_fallback_chain(options = {})
- 7
supported = EPUB::MediaType::CORE
- 7
if ad = options[:supported]
- 1
supported = supported | (ad.respond_to?(:to_ary) ? ad : [ad])
end
- 7
if del = options[:unsupported]
- 3
supported = supported - (del.respond_to?(:to_ary) ? del : [del])
end
- 7
return yield self if supported.include? media_type
- 4
if (bindings = manifest.package.bindings) && (binding_media_type = bindings[media_type])
- 1
return yield binding_media_type.handler
end
- 6
return fallback.use_fallback_chain(options) {|fb| yield fb} if fallback
raise EPUB::MediaType::UnsupportedMediaType
end
- 1
def content_document
- 23
return nil unless %w[application/xhtml+xml image/svg+xml].include? media_type
- 16
@content_document ||= Parser::ContentDocument.new(self).parse
end
# @return [Package::Spine::Itemref]
# @return nil when no Itemref refers this Item
- 1
def itemref
- 14
manifest.package.spine.itemrefs.find {|itemref| itemref.idref == id}
end
# @param iri [Addressable::URI] relative iri
# @return [Item]
# @return [nil] when item not found
# @raise ArgumentError when +iri+ is not relative
# @raise ArgumentError when +iri+ starts with "/"(slash)
# @note Algorithm stolen form Rack::Utils#clean_path_info
- 1
def find_item_by_relative_iri(iri)
- 75
raise ArgumentError, "Not relative: #{iri.inspect}" unless iri.relative?
- 75
raise ArgumentError, "Start with slash: #{iri.inspect}" if iri.path.start_with? Addressable::URI::SLASH
- 74
target_href = href + iri
- 74
target_href.fragment = nil
- 74
segments = target_href.to_s.split(Addressable::URI::SLASH)
- 74
clean_segments = []
- 74
segments.each do |segment|
- 84
next if segment.empty? || segment == '.'
- 84
segment == '..' ? clean_segments.pop : clean_segments << segment
end
- 74
target_iri = Addressable::URI.parse(clean_segments.join(Addressable::URI::SLASH))
- 313
manifest.items.find { |item| item.href == target_iri}
end
- 1
def inspect
- 1
"#<%{class}:%{object_id} %{manifest} %{attributes}>" % {
:class => self.class,
:object_id => inspect_object_id,
:manifest => "@manifest=#{@manifest.inspect_simply}",
:attributes => inspect_instance_variables(exclude: [:@manifest])
}
end
- 1
protected
- 1
def traverse_fallback_chain(chain)
- 4
chain << self
- 4
return chain unless fallback
- 2
fallback.traverse_fallback_chain(chain)
end
end
end
end
end
end
- 1
require 'epub/metadata'
- 1
module EPUB
- 1
module Publication
- 1
class Package
- 1
Metadata = EPUB::Metadata
end
end
end
- 1
require 'set'
- 1
module EPUB
- 1
module Publication
- 1
class Package
- 1
class Spine
- 1
include Inspector::PublicationModel
- 1
attr_accessor :package,
:id, :toc, :page_progression_direction
- 1
attr_reader :itemrefs
- 1
def initialize
- 106
@itemrefs = []
end
# @return self
- 1
def <<(itemref)
- 304
itemref.spine = self
- 304
@itemrefs << itemref
- 304
self
end
# @yield [itemref]
# @yieldparam [Itemref] itemref
# @yieldreturn [Object] returns the last value of block
# @return [Object, Enumerator]
# returns the last value of block when block given, Enumerator when not
- 1
def each_itemref
- 18
if block_given?
- 72
itemrefs.each {|itemref| yield itemref}
else
- 6
enum_for :each_itemref
end
end
# @return [Enumerator] Enumerator which yeilds {Manifest::Item}
# referred by each of {#itemrefs}
- 1
def items
- 20
itemrefs.collect {|itemref| itemref.item}
end
- 1
class Itemref
- 1
PAGE_SPREAD_PROPERTIES = ['left'.freeze, 'right'.freeze].freeze
- 1
PAGE_SPREAD_PREFIX = 'page-spread-'.freeze
- 1
attr_accessor :spine,
:idref, :linear, :id
- 1
attr_reader :properties
- 1
def initialize
- 313
@properties = Set.new
end
- 1
def properties=(props)
- 4
@properties = props.kind_of?(Set) ? props : Set.new(props)
end
# @return [true|false]
- 1
def linear?
- 6
!! linear
end
# @return [Package::Manifest::Item] item referred by this object
- 1
def item
- 77
@item ||= @spine.package.manifest[idref]
end
- 1
def item=(item)
- 1
self.idref = item.id
- 1
item
end
- 1
def ==(other)
- 3
[:spine, :idref, :id].all? {|meth|
- 9
self.__send__(meth) == other.__send__(meth)
} and
- 3
(linear? == other.linear?) and
- 3
(properties == other.properties)
end
# @return ["left", "right", nil]
- 1
def page_spread
- 5
property = properties.find {|prop| prop.start_with? PAGE_SPREAD_PREFIX}
- 3
property ? property.gsub(/\A#{Regexp.escape(PAGE_SPREAD_PREFIX)}/, '') : nil
end
# @param new_value ["left", "right", nil]
- 1
def page_spread=(new_value)
- 6
if new_value.nil?
- 3
properties.delete_if {|prop| prop.start_with? PAGE_SPREAD_PREFIX}
- 2
return new_value
end
- 4
raise "Unsupported page-spread property: #{new_value}" unless PAGE_SPREAD_PROPERTIES.include? new_value
- 8
props_to_be_deleted = (PAGE_SPREAD_PROPERTIES - [new_value]).map {|prop| "#{PAGE_SPREAD_PREFIX}#{prop}"}
- 5
properties.delete_if {|prop| props_to_be_deleted.include? prop}
- 4
new_property = "#{PAGE_SPREAD_PREFIX}#{new_value}"
- 4
properties << new_property unless properties.include? new_property
- 4
new_value
end
end
end
end
end
end
- 1
require 'epub/searcher/result'
- 1
require 'epub/searcher/publication'
- 1
require 'epub/searcher/xhtml'
- 1
module EPUB
- 1
module Searcher
- 1
class << self
- 1
def search_text(epub, word, **options)
Publication.search_text(epub.package, word, options)
end
- 1
def search_element(epub, css: nil, xpath: nil, namespaces: {})
Publication.search_element(epub.package, css: css, xpath: xpath, namespaces: namespaces)
end
- 1
def search_by_cfi(epub, cfi)
Publication.search_by_cfi(epub.package, cfi)
end
end
end
end
- 1
require 'epub/publication'
- 1
require "epub/parser/xml_document"
- 1
module EPUB
- 1
module Searcher
- 1
class Publication
- 1
using Parser::XMLDocument::Refinements
- 1
class << self
- 1
def search_text(package, word, **options)
- 3
new(package).search_text(word, **options)
end
- 1
def search_element(package, css: nil, xpath: nil, namespaces: {})
- 3
new(package).search_element(css: css, xpath: xpath, namespaces: namespaces)
end
- 1
def search_by_cfi(package, cfi)
new(package).search_by_cfi(cfi)
end
end
- 1
def initialize(package)
- 6
@package = package
end
# @return [Array<Result>]
- 1
def search_text(word, algorithm: :seamless)
- 3
results = []
- 3
spine = @package.spine
- 3
spine_step = Result::Step.new(:element, 2, {:name => 'spine', :id => spine.id})
- 3
spine.each_itemref.with_index do |itemref, index|
- 15
itemref_step = Result::Step.new(:itemref, index, {:id => itemref.id})
- 15
XHTML::ALGORITHMS[algorithm].search_text(Parser::XMLDocument.new(itemref.item.read), word).each do |sub_result|
- 6
results << Result.new([spine_step, itemref_step] + sub_result.parent_steps, sub_result.start_steps, sub_result.end_steps)
end
end
- 3
results
end
# @todo: Refactoring
# @return [Array<Hash>] An array of rearch results. Each result is composed of:
# * +:element+: [REXML::Element, Oga::XML::Element, Nokogiri::XML::ELement] Found element
# * +:itemref+: [EPUB::Publication::Package::Spine::Itemref] Itemref that element's document belongs to
# * +:location+: [EPUB::CFI::Location] CFI that indicates the element
# * +:package+: [EPUB::Publication::Package] Package that the element belongs to
- 1
def search_element(css: nil, xpath: nil, namespaces: {})
- 3
raise ArgumentError, 'Both css and xpath are nil' if css.nil? && xpath.nil?
- 3
namespaces = EPUB::NAMESPACES.merge(namespaces)
- 3
results = []
- 3
spine_step = EPUB::CFI::Step.new((EPUB::Publication::Package::CONTENT_MODELS.index(:spine) + 1) * 2)
- 3
@package.spine.each_itemref.with_index do |itemref, index|
- 15
assertion = itemref.id ? EPUB::CFI::IDAssertion.new(itemref.id) : nil
- 15
itemref_step = EPUB::CFI::Step.new((index + 1) * 2, assertion)
- 15
path_to_itemref = EPUB::CFI::Path.new([spine_step, itemref_step])
- 15
content_document = itemref.item.content_document
- 15
next unless content_document
- 9
elems = if xpath
- 6
doc = Parser::XMLDocument.new(content_document.read)
- 6
doc.each_element_by_xpath(xpath, namespaces)
else
begin
- 3
doc = content_document.nokogiri
rescue LoadError
raise "#{self.class}##{__method__} with `css` argument requires Nokogiri gem for now. Install Nokogiri and then try again."
end
- 3
doc.css(css)
end
- 9
elems.each do |elem|
- 22
path = find_path(elem)
- 22
results << {
location: EPUB::CFI::Location.new([path_to_itemref, path]),
package: @package,
itemref: itemref,
element: elem
}
end
end
- 3
results
end
# @note Currenty can handle only location CFI without offset
# @todo Use XHTML module
# @todo Handle CFI with offset
# @todo Handle range CFI
# @param [EPUB::CFI] cfi
# @return [Array] Path in EPUB Rendition
- 1
def search_by_cfi(cfi)
path_in_package = cfi.paths.first
spine = @package.spine
model = [@package.metadata, @package.manifest, spine, @package.guide, @package.bindings].compact[path_in_package.steps.first.value / 2 - 1]
raise NotImplementedError, "Currently, #{__method__} supports spine only(#{cfi})" unless model == spine
raise ArgumentError, "Cannot identify <itemref>'s child" if path_in_package.steps.length > 2
step_to_itemref = path_in_package.steps[1]
itemref = spine.itemrefs[step_to_itemref.value / 2 - 1]
doc = itemref.item.content_document.nokogiri
path_in_doc = cfi.paths[1]
current_node = doc.root
path_in_doc.steps.each do |step|
if step.element?
current_node = current_node.elements.to_a[step.value / 2 - 1]
else
element_index = (step.value - 1) / 2 - 1
if element_index == -1
current_node = current_node.children.first
else
prev = current_node.elements.to_a[element_index]
break unless prev
current_node = prev.next_sibling
break unless current_node
end
end
end
raise NotImplementedError, "Currently, #{__method__} doesn't support deeper DOM tree such as including <iframe>" if cfi.paths[2]
[itemref, current_node]
end
- 1
private
- 1
def find_path(elem)
- 22
steps = []
- 22
until elem.parent.document?
- 148
index = elem.parent.elements.to_a.index(elem)
- 148
id_attr = elem.attribute_with_prefix("id")
- 148
assertion = id_attr ? EPUB::CFI::IDAssertion.new(id_attr) : nil
- 148
steps.unshift EPUB::CFI::Step.new((index + 1) * 2, assertion)
- 148
elem = elem.parent
end
- 22
EPUB::CFI::Path.new(steps)
end
end
end
end
- 1
require 'epub/cfi'
- 1
module EPUB
- 1
module Searcher
- 1
class Result
- 1
class << self
# @example
# Result.aggregate_step_intersection([a, b, c], [a, b, d]) # => [[a, b], [c], [d]]
# @example
# Result.aggregate_step_intersection([a, b, c], [a, d, c]) # => [[a], [b, c], [d, c]]
# # Note that c here is not included in the first element of returned value.
# @param steps1 [Array<Step>, Array<Array>]
# @param steps2 [Array<Step>, Array<Array>]
# @return [Array<Array<Array>>] Three arrays:
# 1. "intersection" of +steps1+ and +steps2+. "intersection" here is not the term of mathmatics
# 2. remaining steps of +steps1+
# 3. remaining steps of +steps2+
- 1
def aggregate_step_intersection(steps1, steps2)
- 13
intersection = []
- 13
steps1_remaining = []
- 13
steps2_remaining = []
- 13
broken = false
- 13
steps1.zip steps2 do |step1, step2|
- 44
broken = true unless step1 && step2 && step1 == step2
- 44
if broken
- 1
steps1_remaining << step1 unless step1.nil?
- 1
steps2_remaining << step2 unless step2.nil?
else
- 43
intersection << step1
end
end
- 13
[intersection, steps1_remaining, steps2_remaining]
end
end
- 1
attr_reader :parent_steps, :start_steps, :end_steps
# @param parent_steps [Array<Step>] common steps between start and end
# @param start_steps [Array<Step>] steps to start from +parent_steps+
# @param end_steps [Array<Step>] steps to end from +parent_steps+
- 1
def initialize(parent_steps, start_steps, end_steps)
- 82
@parent_steps, @start_steps, @end_steps = parent_steps, start_steps, end_steps
end
- 1
def to_cfi
- 3
str = [@parent_steps, @start_steps, @end_steps].collect {|steps|
- 9
steps ? steps.collect(&:to_cfi).join : nil
}.compact.join(',')
- 3
EPUB::CFI(str)
end
- 1
def ==(other)
- 18
[@parent_steps + @start_steps.to_a] == [other.parent_steps + other.start_steps.to_a] and
[@parent_steps + @end_steps.to_a] == [other.parent_steps + other.end_steps.to_a]
end
- 1
class Step
- 1
attr_reader :type, :index, :info
- 1
def initialize(type, index, info={})
- 557
@type, @index, @info = type, index, info
end
- 1
def ==(other)
- 190
self.type == other.type and
self.index == other.index and
self.info == other.info
end
- 1
def to_cfi
- 30
case type
when :element
- 23
'/%d%s' % [(index + 1) * 2, id_assertion]
when :text
- 2
'/%d' % [(index + 1)]
when :character
- 4
':%d' % [index]
when :itemref
- 1
'/%d%s!' % [(index + 1) * 2, id_assertion]
end
end
- 1
private
- 1
def id_assertion
- 24
info[:id] ? "[#{info[:id]}]" : nil
end
end
end
end
end
- 1
require 'epub'
- 1
require 'epub/parser/xml_document'
- 1
module EPUB
- 1
module Searcher
- 1
class XHTML
- 1
using Parser::XMLDocument::Refinements
- 1
ALGORITHMS = {}
- 1
class << self
# @param element [REXML::Element, REXML::Document, Oga::XML::ELement, Oga::XML::Document, Nokogiri::XML::Element, Nokogiri::XML::Document]
# @param word [String]
# @return [Array<Result>]
- 1
def search_text(element, word)
- 33
new(element.respond_to?(:root) ? element.root : element).search_text(word)
end
end
# @param word [String]
- 1
def initialize(element)
- 33
@element = element
end
- 1
class Restricted < self
# @param element [REXML::Element, Oga::XML::Element, Nokogiri::XML::Element]
# @return [Array<Result>]
- 1
def search_text(word, element=nil)
- 137
results = []
- 137
elem_index = 0
- 137
(element || @element).children.each do |child|
- 313
if child.element?
- 132
child_step = Result::Step.new(:element, elem_index, {:name => child.name, :id => child.attribute_with_prefix('id')})
- 132
if child.name == 'img'
- 5
if child.attribute_with_prefix('alt').index(word)
- 2
results << Result.new([child_step], nil, nil)
end
else
- 127
search_text(word, child).each do |sub_result|
- 34
results << Result.new([child_step] + sub_result.parent_steps, sub_result.start_steps, sub_result.end_steps)
end
end
- 132
elem_index += 1
- 181
elsif child.text?
- 181
text_index = elem_index
- 181
char_index = 0
- 181
text_step = Result::Step.new(:text, text_index)
- 181
while char_index = child.text.index(word, char_index)
- 8
results << Result.new([text_step], [Result::Step.new(:character, char_index)], [Result::Step.new(:character, char_index + word.length)])
- 8
char_index += 1
end
end
end
- 137
results
end
end
- 1
ALGORITHMS[:restricted] = Restricted
- 1
class Seamless < self
- 1
def initialize(element)
- 23
super
- 23
@indices = nil
end
- 1
def search_text(word)
- 23
unless @indices
- 23
@indices, @content = build_indices(@element)
end
- 23
visit(@indices, @content, word)
end
- 1
def build_indices(element)
- 151
indices = {}
- 151
content = ''
- 151
elem_index = 0
- 151
element.children.each do |child|
- 317
if child.element?
- 133
child_step = [:element, elem_index, {:name => child.name, :id => child.attribute_with_prefix('id')}]
- 133
elem_index += 1
- 133
if child.name == 'img'
- 5
alt = child.attribute_with_prefix('alt')
- 5
next if alt.nil? || alt.empty?
- 5
indices[content.length] = [child_step]
- 5
content << alt
else
# TODO: Consider block level elements
- 128
content_length = content.length
- 128
sub_indices, sub_content = build_indices(child)
# TODO: Pass content_length and child_step to build_indices and remove this block
- 128
sub_indices.each_pair do |sub_pos, child_steps|
- 625
indices[content_length + sub_pos] = [child_step] + child_steps
end
- 128
content << sub_content
end
- 184
elsif child.text? || child.cdata?
- 184
text_index = elem_index
- 184
text_step = [:text, text_index]
- 184
indices[content.length] = [text_step]
- 184
content << child.content
end
end
- 151
[indices, content]
end
- 1
private
- 1
def visit(indices, content, word)
- 23
results = []
- 23
offsets = indices.keys
- 23
i = 0
- 23
while i = content.index(word, i)
- 14
offset = find_offset(offsets, i)
- 14
start_steps = to_result_steps(indices[offset])
- 14
last_step = start_steps.last
- 14
if last_step.info[:name] == 'img'
- 1
parent_steps = start_steps
- 1
start_steps = end_steps = nil
else
- 13
word_length = word.length
- 13
start_char_step = Result::Step.new(:character, i - offset)
- 13
end_offset = find_offset(offsets, i + word_length, true)
- 13
end_steps = to_result_steps(indices[end_offset])
- 13
end_char_step = Result::Step.new(:character, i + word_length - end_offset)
- 13
parent_steps, start_steps, end_steps = Result.aggregate_step_intersection(start_steps, end_steps)
- 13
start_steps << start_char_step
- 13
end_steps << end_char_step
end
- 14
results << Result.new(parent_steps, start_steps, end_steps)
- 14
i += 1
end
- 23
results
end
# Find max offset greater than or equal to index
# @param offsets [Array<Integer>] keys of indices
# @param index [Integer] position of search word in content string
# @todo: more efficient algorithm
- 1
def find_offset(offsets, index, for_end_position=false)
- 27
comparison_operator = for_end_position ? :< : :<=
- 27
l = offsets.length
- 27
offset_index = (0..l).bsearch {|i|
- 99
o = offsets[l - i]
- 99
next false unless o
- 88
o.send(comparison_operator, index)
}
- 27
offsets[l - offset_index]
end
- 1
def to_result_steps(steps)
- 121
steps.map {|step| Result::Step.new(*step)}
end
end
- 1
ALGORITHMS[:seamless] = Seamless
end
end
end