kanbanworkflowstimelinescrumrubyroadmapproject-planningproject-managementopenprojectangularissue-trackerifcgantt-chartganttbug-trackerboardsbcf
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
177 lines
4.0 KiB
177 lines
4.0 KiB
module CodeRay
|
|
module Scanners
|
|
|
|
# HTML Scanner
|
|
#
|
|
# $Id$
|
|
class HTML < Scanner
|
|
|
|
include Streamable
|
|
register_for :html
|
|
|
|
ATTR_NAME = /[\w.:-]+/
|
|
ATTR_VALUE_UNQUOTED = ATTR_NAME
|
|
TAG_END = /\/?>/
|
|
HEX = /[0-9a-fA-F]/
|
|
ENTITY = /
|
|
&
|
|
(?:
|
|
\w+
|
|
|
|
|
\#
|
|
(?:
|
|
\d+
|
|
|
|
|
x#{HEX}+
|
|
)
|
|
)
|
|
;
|
|
/ox
|
|
|
|
PLAIN_STRING_CONTENT = {
|
|
"'" => /[^&'>\n]+/,
|
|
'"' => /[^&">\n]+/,
|
|
}
|
|
|
|
def reset
|
|
super
|
|
@state = :initial
|
|
end
|
|
|
|
private
|
|
def setup
|
|
@state = :initial
|
|
@plain_string_content = nil
|
|
end
|
|
|
|
def scan_tokens tokens, options
|
|
|
|
state = @state
|
|
plain_string_content = @plain_string_content
|
|
|
|
until eos?
|
|
|
|
kind = nil
|
|
match = nil
|
|
|
|
if scan(/\s+/m)
|
|
kind = :space
|
|
|
|
else
|
|
|
|
case state
|
|
|
|
when :initial
|
|
if scan(/<!--.*?-->/m)
|
|
kind = :comment
|
|
elsif scan(/<!DOCTYPE.*?>/m)
|
|
kind = :preprocessor
|
|
elsif scan(/<\?xml.*?\?>/m)
|
|
kind = :preprocessor
|
|
elsif scan(/<\?.*?\?>|<%.*?%>/m)
|
|
kind = :comment
|
|
elsif scan(/<\/[-\w_.:]*>/m)
|
|
kind = :tag
|
|
elsif match = scan(/<[-\w_.:]+>?/m)
|
|
kind = :tag
|
|
state = :attribute unless match[-1] == ?>
|
|
elsif scan(/[^<>&]+/)
|
|
kind = :plain
|
|
elsif scan(/#{ENTITY}/ox)
|
|
kind = :entity
|
|
elsif scan(/[<>&]/)
|
|
kind = :error
|
|
else
|
|
raise_inspect '[BUG] else-case reached with state %p' % [state], tokens
|
|
end
|
|
|
|
when :attribute
|
|
if scan(/#{TAG_END}/)
|
|
kind = :tag
|
|
state = :initial
|
|
elsif scan(/#{ATTR_NAME}/o)
|
|
kind = :attribute_name
|
|
state = :attribute_equal
|
|
else
|
|
kind = :error
|
|
getch
|
|
end
|
|
|
|
when :attribute_equal
|
|
if scan(/=/)
|
|
kind = :operator
|
|
state = :attribute_value
|
|
elsif scan(/#{ATTR_NAME}/o)
|
|
kind = :attribute_name
|
|
elsif scan(/#{TAG_END}/o)
|
|
kind = :tag
|
|
state = :initial
|
|
elsif scan(/./)
|
|
kind = :error
|
|
state = :attribute
|
|
end
|
|
|
|
when :attribute_value
|
|
if scan(/#{ATTR_VALUE_UNQUOTED}/o)
|
|
kind = :attribute_value
|
|
state = :attribute
|
|
elsif match = scan(/["']/)
|
|
tokens << [:open, :string]
|
|
state = :attribute_value_string
|
|
plain_string_content = PLAIN_STRING_CONTENT[match]
|
|
kind = :delimiter
|
|
elsif scan(/#{TAG_END}/o)
|
|
kind = :tag
|
|
state = :initial
|
|
else
|
|
kind = :error
|
|
getch
|
|
end
|
|
|
|
when :attribute_value_string
|
|
if scan(plain_string_content)
|
|
kind = :content
|
|
elsif scan(/['"]/)
|
|
tokens << [matched, :delimiter]
|
|
tokens << [:close, :string]
|
|
state = :attribute
|
|
next
|
|
elsif scan(/#{ENTITY}/ox)
|
|
kind = :entity
|
|
elsif scan(/&/)
|
|
kind = :content
|
|
elsif scan(/[\n>]/)
|
|
tokens << [:close, :string]
|
|
kind = :error
|
|
state = :initial
|
|
end
|
|
|
|
else
|
|
raise_inspect 'Unknown state: %p' % [state], tokens
|
|
|
|
end
|
|
|
|
end
|
|
|
|
match ||= matched
|
|
if $DEBUG and not kind
|
|
raise_inspect 'Error token %p in line %d' %
|
|
[[match, kind], line], tokens, state
|
|
end
|
|
raise_inspect 'Empty token', tokens unless match
|
|
|
|
tokens << [match, kind]
|
|
end
|
|
|
|
if options[:keep_state]
|
|
@state = state
|
|
@plain_string_content = plain_string_content
|
|
end
|
|
|
|
tokens
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
end
|
|
|