# frozen_string_literal: true
module Puppet::Pops
module Parser
# This module is an integral part of the Lexer.
# It defines the string slurping behavior - finding the string and non string parts in interpolated
# strings, translating escape sequences in strings to their single character equivalence.
#
# PERFORMANCE NOTE: The various kinds of slurping could be made even more generic, but requires
# additional parameter passing and evaluation of conditional logic.
# TODO: More detailed performance analysis of excessive character escaping and interpolation.
#
module SlurpSupport
include LexerSupport
SLURP_SQ_PATTERN = /(?:[^\\]|^)(?:[\\]{2})*[']/
SLURP_DQ_PATTERN = /(?:[^\\]|^)(?:[\\]{2})*(["]|[$]\{?)/
SLURP_UQ_PATTERN = /(?:[^\\]|^)(?:[\\]{2})*([$]\{?|\z)/
# unquoted, no escapes
SLURP_UQNE_PATTERN = /(\$\{?|\z)/m
SLURP_ALL_PATTERN = /.*(\z)/m
SQ_ESCAPES = %w{ \\ ' }
DQ_ESCAPES = %w{ \\ $ ' " r n t s u}+["\r\n", "\n"]
UQ_ESCAPES = %w{ \\ $ r n t s u}+["\r\n", "\n"]
def slurp_sqstring
# skip the leading '
@scanner.pos += 1
str = slurp(@scanner, SLURP_SQ_PATTERN, SQ_ESCAPES, :ignore_invalid_escapes)
lex_error(Issues::UNCLOSED_QUOTE, :after => "\"'\"", :followed_by => followed_by) unless str
str[0..-2] # strip closing "'" from result
end
def slurp_dqstring
scn = @scanner
last = scn.matched
str = slurp(scn, SLURP_DQ_PATTERN, DQ_ESCAPES, false)
unless str
lex_error(Issues::UNCLOSED_QUOTE, :after => format_quote(last), :followed_by => followed_by)
end
# Terminator may be a single char '"', '$', or two characters '${' group match 1 (scn[1]) from the last slurp holds this
terminator = scn[1]
[str[0..(-1 - terminator.length)], terminator]
end
# Copy from old lexer - can do much better
def slurp_uqstring
scn = @scanner
str = slurp(scn, @lexing_context[:uq_slurp_pattern], @lexing_context[:escapes], :ignore_invalid_escapes)
# Terminator may be a single char '$', two characters '${', or empty string '' at the end of intput.
# Group match 1 holds this.
# The exceptional case is found by looking at the subgroup 1 of the most recent match made by the scanner (i.e. @scanner[1]).
# This is the last match made by the slurp method (having called scan_until on the scanner).
# If there is a terminating character is must be stripped and returned separately.
#
terminator = scn[1]
[str[0..(-1 - terminator.length)], terminator]
end
# Slurps a string from the given scanner until the given pattern and then replaces any escaped
# characters given by escapes into their control-character equivalent or in case of line breaks, replaces the
# pattern \r?\n with an empty string.
# The returned string contains the terminating character. Returns nil if the scanner can not scan until the given
# pattern.
#
def slurp(scanner, pattern, escapes, ignore_invalid_escapes)
str = scanner.scan_until(pattern) || return
return str unless str.include?('\\')
return str.gsub!(/\\(\\|')/m, '\1') || str if escapes.equal?(SQ_ESCAPES)
# Process unicode escapes first as they require getting 4 hex digits
# If later a \u is found it is warned not to be a unicode escape
if escapes.include?('u')
# gsub must be repeated to cater for adjacent escapes
while(str.gsub!(/((?:[^\\]|^)(?:[\\]{2})*)\\u(?:([\da-fA-F]{4})|\{([\da-fA-F]{1,6})\})/m) { $1 + [($2 || $3).hex].pack("U") })
# empty block. Everything happens in the gsub block
end
end
begin
str.gsub!(/\\([^\r\n]|(?:\r?\n))/m) {
ch = $1
if escapes.include? ch
case ch
when 'r' ; "\r"
when 'n' ; "\n"
when 't' ; "\t"
when 's' ; ' '
when 'u'
lex_warning(Issues::ILLEGAL_UNICODE_ESCAPE)
"\\u"
when "\n" ; ''
when "\r\n"; ''
else ch
end
else
lex_warning(Issues::UNRECOGNIZED_ESCAPE, :ch => ch) unless ignore_invalid_escapes
"\\#{ch}"
end
}
rescue ArgumentError => e
# A invalid byte sequence may be the result of faulty input as well, but that could not possibly
# have reached this far... Unfortunately there is no more specific error and a match on message is
# required to differentiate from other internal problems.
if e.message =~ /invalid byte sequence/
lex_error(Issues::ILLEGAL_UNICODE_ESCAPE)
else
raise e
end
end
str
end
end
end
end
Copyright 2K16 - 2K18 Indonesian Hacker Rulez