emitter.py 42 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137
  1. # Emitter expects events obeying the following grammar:
  2. # stream ::= STREAM-START document* STREAM-END
  3. # document ::= DOCUMENT-START node DOCUMENT-END
  4. # node ::= SCALAR | sequence | mapping
  5. # sequence ::= SEQUENCE-START node* SEQUENCE-END
  6. # mapping ::= MAPPING-START (node node)* MAPPING-END
  7. __all__ = ['Emitter', 'EmitterError']
  8. from .error import YAMLError
  9. from .events import *
  10. class EmitterError(YAMLError):
  11. pass
  12. class ScalarAnalysis:
  13. def __init__(self, scalar, empty, multiline,
  14. allow_flow_plain, allow_block_plain,
  15. allow_single_quoted, allow_double_quoted,
  16. allow_block):
  17. self.scalar = scalar
  18. self.empty = empty
  19. self.multiline = multiline
  20. self.allow_flow_plain = allow_flow_plain
  21. self.allow_block_plain = allow_block_plain
  22. self.allow_single_quoted = allow_single_quoted
  23. self.allow_double_quoted = allow_double_quoted
  24. self.allow_block = allow_block
  25. class Emitter:
  26. DEFAULT_TAG_PREFIXES = {
  27. '!' : '!',
  28. 'tag:yaml.org,2002:' : '!!',
  29. }
  30. def __init__(self, stream, canonical=None, indent=None, width=None,
  31. allow_unicode=None, line_break=None):
  32. # The stream should have the methods `write` and possibly `flush`.
  33. self.stream = stream
  34. # Encoding can be overriden by STREAM-START.
  35. self.encoding = None
  36. # Emitter is a state machine with a stack of states to handle nested
  37. # structures.
  38. self.states = []
  39. self.state = self.expect_stream_start
  40. # Current event and the event queue.
  41. self.events = []
  42. self.event = None
  43. # The current indentation level and the stack of previous indents.
  44. self.indents = []
  45. self.indent = None
  46. # Flow level.
  47. self.flow_level = 0
  48. # Contexts.
  49. self.root_context = False
  50. self.sequence_context = False
  51. self.mapping_context = False
  52. self.simple_key_context = False
  53. # Characteristics of the last emitted character:
  54. # - current position.
  55. # - is it a whitespace?
  56. # - is it an indention character
  57. # (indentation space, '-', '?', or ':')?
  58. self.line = 0
  59. self.column = 0
  60. self.whitespace = True
  61. self.indention = True
  62. # Whether the document requires an explicit document indicator
  63. self.open_ended = False
  64. # Formatting details.
  65. self.canonical = canonical
  66. self.allow_unicode = allow_unicode
  67. self.best_indent = 2
  68. if indent and 1 < indent < 10:
  69. self.best_indent = indent
  70. self.best_width = 80
  71. if width and width > self.best_indent*2:
  72. self.best_width = width
  73. self.best_line_break = '\n'
  74. if line_break in ['\r', '\n', '\r\n']:
  75. self.best_line_break = line_break
  76. # Tag prefixes.
  77. self.tag_prefixes = None
  78. # Prepared anchor and tag.
  79. self.prepared_anchor = None
  80. self.prepared_tag = None
  81. # Scalar analysis and style.
  82. self.analysis = None
  83. self.style = None
  84. def dispose(self):
  85. # Reset the state attributes (to clear self-references)
  86. self.states = []
  87. self.state = None
  88. def emit(self, event):
  89. self.events.append(event)
  90. while not self.need_more_events():
  91. self.event = self.events.pop(0)
  92. self.state()
  93. self.event = None
  94. # In some cases, we wait for a few next events before emitting.
  95. def need_more_events(self):
  96. if not self.events:
  97. return True
  98. event = self.events[0]
  99. if isinstance(event, DocumentStartEvent):
  100. return self.need_events(1)
  101. elif isinstance(event, SequenceStartEvent):
  102. return self.need_events(2)
  103. elif isinstance(event, MappingStartEvent):
  104. return self.need_events(3)
  105. else:
  106. return False
  107. def need_events(self, count):
  108. level = 0
  109. for event in self.events[1:]:
  110. if isinstance(event, (DocumentStartEvent, CollectionStartEvent)):
  111. level += 1
  112. elif isinstance(event, (DocumentEndEvent, CollectionEndEvent)):
  113. level -= 1
  114. elif isinstance(event, StreamEndEvent):
  115. level = -1
  116. if level < 0:
  117. return False
  118. return (len(self.events) < count+1)
  119. def increase_indent(self, flow=False, indentless=False):
  120. self.indents.append(self.indent)
  121. if self.indent is None:
  122. if flow:
  123. self.indent = self.best_indent
  124. else:
  125. self.indent = 0
  126. elif not indentless:
  127. self.indent += self.best_indent
  128. # States.
  129. # Stream handlers.
  130. def expect_stream_start(self):
  131. if isinstance(self.event, StreamStartEvent):
  132. if self.event.encoding and not hasattr(self.stream, 'encoding'):
  133. self.encoding = self.event.encoding
  134. self.write_stream_start()
  135. self.state = self.expect_first_document_start
  136. else:
  137. raise EmitterError("expected StreamStartEvent, but got %s"
  138. % self.event)
  139. def expect_nothing(self):
  140. raise EmitterError("expected nothing, but got %s" % self.event)
  141. # Document handlers.
  142. def expect_first_document_start(self):
  143. return self.expect_document_start(first=True)
  144. def expect_document_start(self, first=False):
  145. if isinstance(self.event, DocumentStartEvent):
  146. if (self.event.version or self.event.tags) and self.open_ended:
  147. self.write_indicator('...', True)
  148. self.write_indent()
  149. if self.event.version:
  150. version_text = self.prepare_version(self.event.version)
  151. self.write_version_directive(version_text)
  152. self.tag_prefixes = self.DEFAULT_TAG_PREFIXES.copy()
  153. if self.event.tags:
  154. handles = sorted(self.event.tags.keys())
  155. for handle in handles:
  156. prefix = self.event.tags[handle]
  157. self.tag_prefixes[prefix] = handle
  158. handle_text = self.prepare_tag_handle(handle)
  159. prefix_text = self.prepare_tag_prefix(prefix)
  160. self.write_tag_directive(handle_text, prefix_text)
  161. implicit = (first and not self.event.explicit and not self.canonical
  162. and not self.event.version and not self.event.tags
  163. and not self.check_empty_document())
  164. if not implicit:
  165. self.write_indent()
  166. self.write_indicator('---', True)
  167. if self.canonical:
  168. self.write_indent()
  169. self.state = self.expect_document_root
  170. elif isinstance(self.event, StreamEndEvent):
  171. if self.open_ended:
  172. self.write_indicator('...', True)
  173. self.write_indent()
  174. self.write_stream_end()
  175. self.state = self.expect_nothing
  176. else:
  177. raise EmitterError("expected DocumentStartEvent, but got %s"
  178. % self.event)
  179. def expect_document_end(self):
  180. if isinstance(self.event, DocumentEndEvent):
  181. self.write_indent()
  182. if self.event.explicit:
  183. self.write_indicator('...', True)
  184. self.write_indent()
  185. self.flush_stream()
  186. self.state = self.expect_document_start
  187. else:
  188. raise EmitterError("expected DocumentEndEvent, but got %s"
  189. % self.event)
  190. def expect_document_root(self):
  191. self.states.append(self.expect_document_end)
  192. self.expect_node(root=True)
  193. # Node handlers.
  194. def expect_node(self, root=False, sequence=False, mapping=False,
  195. simple_key=False):
  196. self.root_context = root
  197. self.sequence_context = sequence
  198. self.mapping_context = mapping
  199. self.simple_key_context = simple_key
  200. if isinstance(self.event, AliasEvent):
  201. self.expect_alias()
  202. elif isinstance(self.event, (ScalarEvent, CollectionStartEvent)):
  203. self.process_anchor('&')
  204. self.process_tag()
  205. if isinstance(self.event, ScalarEvent):
  206. self.expect_scalar()
  207. elif isinstance(self.event, SequenceStartEvent):
  208. if self.flow_level or self.canonical or self.event.flow_style \
  209. or self.check_empty_sequence():
  210. self.expect_flow_sequence()
  211. else:
  212. self.expect_block_sequence()
  213. elif isinstance(self.event, MappingStartEvent):
  214. if self.flow_level or self.canonical or self.event.flow_style \
  215. or self.check_empty_mapping():
  216. self.expect_flow_mapping()
  217. else:
  218. self.expect_block_mapping()
  219. else:
  220. raise EmitterError("expected NodeEvent, but got %s" % self.event)
  221. def expect_alias(self):
  222. if self.event.anchor is None:
  223. raise EmitterError("anchor is not specified for alias")
  224. self.process_anchor('*')
  225. self.state = self.states.pop()
  226. def expect_scalar(self):
  227. self.increase_indent(flow=True)
  228. self.process_scalar()
  229. self.indent = self.indents.pop()
  230. self.state = self.states.pop()
  231. # Flow sequence handlers.
  232. def expect_flow_sequence(self):
  233. self.write_indicator('[', True, whitespace=True)
  234. self.flow_level += 1
  235. self.increase_indent(flow=True)
  236. self.state = self.expect_first_flow_sequence_item
  237. def expect_first_flow_sequence_item(self):
  238. if isinstance(self.event, SequenceEndEvent):
  239. self.indent = self.indents.pop()
  240. self.flow_level -= 1
  241. self.write_indicator(']', False)
  242. self.state = self.states.pop()
  243. else:
  244. if self.canonical or self.column > self.best_width:
  245. self.write_indent()
  246. self.states.append(self.expect_flow_sequence_item)
  247. self.expect_node(sequence=True)
  248. def expect_flow_sequence_item(self):
  249. if isinstance(self.event, SequenceEndEvent):
  250. self.indent = self.indents.pop()
  251. self.flow_level -= 1
  252. if self.canonical:
  253. self.write_indicator(',', False)
  254. self.write_indent()
  255. self.write_indicator(']', False)
  256. self.state = self.states.pop()
  257. else:
  258. self.write_indicator(',', False)
  259. if self.canonical or self.column > self.best_width:
  260. self.write_indent()
  261. self.states.append(self.expect_flow_sequence_item)
  262. self.expect_node(sequence=True)
  263. # Flow mapping handlers.
  264. def expect_flow_mapping(self):
  265. self.write_indicator('{', True, whitespace=True)
  266. self.flow_level += 1
  267. self.increase_indent(flow=True)
  268. self.state = self.expect_first_flow_mapping_key
  269. def expect_first_flow_mapping_key(self):
  270. if isinstance(self.event, MappingEndEvent):
  271. self.indent = self.indents.pop()
  272. self.flow_level -= 1
  273. self.write_indicator('}', False)
  274. self.state = self.states.pop()
  275. else:
  276. if self.canonical or self.column > self.best_width:
  277. self.write_indent()
  278. if not self.canonical and self.check_simple_key():
  279. self.states.append(self.expect_flow_mapping_simple_value)
  280. self.expect_node(mapping=True, simple_key=True)
  281. else:
  282. self.write_indicator('?', True)
  283. self.states.append(self.expect_flow_mapping_value)
  284. self.expect_node(mapping=True)
  285. def expect_flow_mapping_key(self):
  286. if isinstance(self.event, MappingEndEvent):
  287. self.indent = self.indents.pop()
  288. self.flow_level -= 1
  289. if self.canonical:
  290. self.write_indicator(',', False)
  291. self.write_indent()
  292. self.write_indicator('}', False)
  293. self.state = self.states.pop()
  294. else:
  295. self.write_indicator(',', False)
  296. if self.canonical or self.column > self.best_width:
  297. self.write_indent()
  298. if not self.canonical and self.check_simple_key():
  299. self.states.append(self.expect_flow_mapping_simple_value)
  300. self.expect_node(mapping=True, simple_key=True)
  301. else:
  302. self.write_indicator('?', True)
  303. self.states.append(self.expect_flow_mapping_value)
  304. self.expect_node(mapping=True)
  305. def expect_flow_mapping_simple_value(self):
  306. self.write_indicator(':', False)
  307. self.states.append(self.expect_flow_mapping_key)
  308. self.expect_node(mapping=True)
  309. def expect_flow_mapping_value(self):
  310. if self.canonical or self.column > self.best_width:
  311. self.write_indent()
  312. self.write_indicator(':', True)
  313. self.states.append(self.expect_flow_mapping_key)
  314. self.expect_node(mapping=True)
  315. # Block sequence handlers.
  316. def expect_block_sequence(self):
  317. indentless = (self.mapping_context and not self.indention)
  318. self.increase_indent(flow=False, indentless=indentless)
  319. self.state = self.expect_first_block_sequence_item
  320. def expect_first_block_sequence_item(self):
  321. return self.expect_block_sequence_item(first=True)
  322. def expect_block_sequence_item(self, first=False):
  323. if not first and isinstance(self.event, SequenceEndEvent):
  324. self.indent = self.indents.pop()
  325. self.state = self.states.pop()
  326. else:
  327. self.write_indent()
  328. self.write_indicator('-', True, indention=True)
  329. self.states.append(self.expect_block_sequence_item)
  330. self.expect_node(sequence=True)
  331. # Block mapping handlers.
  332. def expect_block_mapping(self):
  333. self.increase_indent(flow=False)
  334. self.state = self.expect_first_block_mapping_key
  335. def expect_first_block_mapping_key(self):
  336. return self.expect_block_mapping_key(first=True)
  337. def expect_block_mapping_key(self, first=False):
  338. if not first and isinstance(self.event, MappingEndEvent):
  339. self.indent = self.indents.pop()
  340. self.state = self.states.pop()
  341. else:
  342. self.write_indent()
  343. if self.check_simple_key():
  344. self.states.append(self.expect_block_mapping_simple_value)
  345. self.expect_node(mapping=True, simple_key=True)
  346. else:
  347. self.write_indicator('?', True, indention=True)
  348. self.states.append(self.expect_block_mapping_value)
  349. self.expect_node(mapping=True)
  350. def expect_block_mapping_simple_value(self):
  351. self.write_indicator(':', False)
  352. self.states.append(self.expect_block_mapping_key)
  353. self.expect_node(mapping=True)
  354. def expect_block_mapping_value(self):
  355. self.write_indent()
  356. self.write_indicator(':', True, indention=True)
  357. self.states.append(self.expect_block_mapping_key)
  358. self.expect_node(mapping=True)
  359. # Checkers.
  360. def check_empty_sequence(self):
  361. return (isinstance(self.event, SequenceStartEvent) and self.events
  362. and isinstance(self.events[0], SequenceEndEvent))
  363. def check_empty_mapping(self):
  364. return (isinstance(self.event, MappingStartEvent) and self.events
  365. and isinstance(self.events[0], MappingEndEvent))
  366. def check_empty_document(self):
  367. if not isinstance(self.event, DocumentStartEvent) or not self.events:
  368. return False
  369. event = self.events[0]
  370. return (isinstance(event, ScalarEvent) and event.anchor is None
  371. and event.tag is None and event.implicit and event.value == '')
  372. def check_simple_key(self):
  373. length = 0
  374. if isinstance(self.event, NodeEvent) and self.event.anchor is not None:
  375. if self.prepared_anchor is None:
  376. self.prepared_anchor = self.prepare_anchor(self.event.anchor)
  377. length += len(self.prepared_anchor)
  378. if isinstance(self.event, (ScalarEvent, CollectionStartEvent)) \
  379. and self.event.tag is not None:
  380. if self.prepared_tag is None:
  381. self.prepared_tag = self.prepare_tag(self.event.tag)
  382. length += len(self.prepared_tag)
  383. if isinstance(self.event, ScalarEvent):
  384. if self.analysis is None:
  385. self.analysis = self.analyze_scalar(self.event.value)
  386. length += len(self.analysis.scalar)
  387. return (length < 128 and (isinstance(self.event, AliasEvent)
  388. or (isinstance(self.event, ScalarEvent)
  389. and not self.analysis.empty and not self.analysis.multiline)
  390. or self.check_empty_sequence() or self.check_empty_mapping()))
  391. # Anchor, Tag, and Scalar processors.
  392. def process_anchor(self, indicator):
  393. if self.event.anchor is None:
  394. self.prepared_anchor = None
  395. return
  396. if self.prepared_anchor is None:
  397. self.prepared_anchor = self.prepare_anchor(self.event.anchor)
  398. if self.prepared_anchor:
  399. self.write_indicator(indicator+self.prepared_anchor, True)
  400. self.prepared_anchor = None
  401. def process_tag(self):
  402. tag = self.event.tag
  403. if isinstance(self.event, ScalarEvent):
  404. if self.style is None:
  405. self.style = self.choose_scalar_style()
  406. if ((not self.canonical or tag is None) and
  407. ((self.style == '' and self.event.implicit[0])
  408. or (self.style != '' and self.event.implicit[1]))):
  409. self.prepared_tag = None
  410. return
  411. if self.event.implicit[0] and tag is None:
  412. tag = '!'
  413. self.prepared_tag = None
  414. else:
  415. if (not self.canonical or tag is None) and self.event.implicit:
  416. self.prepared_tag = None
  417. return
  418. if tag is None:
  419. raise EmitterError("tag is not specified")
  420. if self.prepared_tag is None:
  421. self.prepared_tag = self.prepare_tag(tag)
  422. if self.prepared_tag:
  423. self.write_indicator(self.prepared_tag, True)
  424. self.prepared_tag = None
  425. def choose_scalar_style(self):
  426. if self.analysis is None:
  427. self.analysis = self.analyze_scalar(self.event.value)
  428. if self.event.style == '"' or self.canonical:
  429. return '"'
  430. if not self.event.style and self.event.implicit[0]:
  431. if (not (self.simple_key_context and
  432. (self.analysis.empty or self.analysis.multiline))
  433. and (self.flow_level and self.analysis.allow_flow_plain
  434. or (not self.flow_level and self.analysis.allow_block_plain))):
  435. return ''
  436. if self.event.style and self.event.style in '|>':
  437. if (not self.flow_level and not self.simple_key_context
  438. and self.analysis.allow_block):
  439. return self.event.style
  440. if not self.event.style or self.event.style == '\'':
  441. if (self.analysis.allow_single_quoted and
  442. not (self.simple_key_context and self.analysis.multiline)):
  443. return '\''
  444. return '"'
  445. def process_scalar(self):
  446. if self.analysis is None:
  447. self.analysis = self.analyze_scalar(self.event.value)
  448. if self.style is None:
  449. self.style = self.choose_scalar_style()
  450. split = (not self.simple_key_context)
  451. #if self.analysis.multiline and split \
  452. # and (not self.style or self.style in '\'\"'):
  453. # self.write_indent()
  454. if self.style == '"':
  455. self.write_double_quoted(self.analysis.scalar, split)
  456. elif self.style == '\'':
  457. self.write_single_quoted(self.analysis.scalar, split)
  458. elif self.style == '>':
  459. self.write_folded(self.analysis.scalar)
  460. elif self.style == '|':
  461. self.write_literal(self.analysis.scalar)
  462. else:
  463. self.write_plain(self.analysis.scalar, split)
  464. self.analysis = None
  465. self.style = None
  466. # Analyzers.
  467. def prepare_version(self, version):
  468. major, minor = version
  469. if major != 1:
  470. raise EmitterError("unsupported YAML version: %d.%d" % (major, minor))
  471. return '%d.%d' % (major, minor)
  472. def prepare_tag_handle(self, handle):
  473. if not handle:
  474. raise EmitterError("tag handle must not be empty")
  475. if handle[0] != '!' or handle[-1] != '!':
  476. raise EmitterError("tag handle must start and end with '!': %r" % handle)
  477. for ch in handle[1:-1]:
  478. if not ('0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \
  479. or ch in '-_'):
  480. raise EmitterError("invalid character %r in the tag handle: %r"
  481. % (ch, handle))
  482. return handle
  483. def prepare_tag_prefix(self, prefix):
  484. if not prefix:
  485. raise EmitterError("tag prefix must not be empty")
  486. chunks = []
  487. start = end = 0
  488. if prefix[0] == '!':
  489. end = 1
  490. while end < len(prefix):
  491. ch = prefix[end]
  492. if '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \
  493. or ch in '-;/?!:@&=+$,_.~*\'()[]':
  494. end += 1
  495. else:
  496. if start < end:
  497. chunks.append(prefix[start:end])
  498. start = end = end+1
  499. data = ch.encode('utf-8')
  500. for ch in data:
  501. chunks.append('%%%02X' % ord(ch))
  502. if start < end:
  503. chunks.append(prefix[start:end])
  504. return ''.join(chunks)
  505. def prepare_tag(self, tag):
  506. if not tag:
  507. raise EmitterError("tag must not be empty")
  508. if tag == '!':
  509. return tag
  510. handle = None
  511. suffix = tag
  512. prefixes = sorted(self.tag_prefixes.keys())
  513. for prefix in prefixes:
  514. if tag.startswith(prefix) \
  515. and (prefix == '!' or len(prefix) < len(tag)):
  516. handle = self.tag_prefixes[prefix]
  517. suffix = tag[len(prefix):]
  518. chunks = []
  519. start = end = 0
  520. while end < len(suffix):
  521. ch = suffix[end]
  522. if '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \
  523. or ch in '-;/?:@&=+$,_.~*\'()[]' \
  524. or (ch == '!' and handle != '!'):
  525. end += 1
  526. else:
  527. if start < end:
  528. chunks.append(suffix[start:end])
  529. start = end = end+1
  530. data = ch.encode('utf-8')
  531. for ch in data:
  532. chunks.append('%%%02X' % ord(ch))
  533. if start < end:
  534. chunks.append(suffix[start:end])
  535. suffix_text = ''.join(chunks)
  536. if handle:
  537. return '%s%s' % (handle, suffix_text)
  538. else:
  539. return '!<%s>' % suffix_text
  540. def prepare_anchor(self, anchor):
  541. if not anchor:
  542. raise EmitterError("anchor must not be empty")
  543. for ch in anchor:
  544. if not ('0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \
  545. or ch in '-_'):
  546. raise EmitterError("invalid character %r in the anchor: %r"
  547. % (ch, anchor))
  548. return anchor
  549. def analyze_scalar(self, scalar):
  550. # Empty scalar is a special case.
  551. if not scalar:
  552. return ScalarAnalysis(scalar=scalar, empty=True, multiline=False,
  553. allow_flow_plain=False, allow_block_plain=True,
  554. allow_single_quoted=True, allow_double_quoted=True,
  555. allow_block=False)
  556. # Indicators and special characters.
  557. block_indicators = False
  558. flow_indicators = False
  559. line_breaks = False
  560. special_characters = False
  561. # Important whitespace combinations.
  562. leading_space = False
  563. leading_break = False
  564. trailing_space = False
  565. trailing_break = False
  566. break_space = False
  567. space_break = False
  568. # Check document indicators.
  569. if scalar.startswith('---') or scalar.startswith('...'):
  570. block_indicators = True
  571. flow_indicators = True
  572. # First character or preceded by a whitespace.
  573. preceeded_by_whitespace = True
  574. # Last character or followed by a whitespace.
  575. followed_by_whitespace = (len(scalar) == 1 or
  576. scalar[1] in '\0 \t\r\n\x85\u2028\u2029')
  577. # The previous character is a space.
  578. previous_space = False
  579. # The previous character is a break.
  580. previous_break = False
  581. index = 0
  582. while index < len(scalar):
  583. ch = scalar[index]
  584. # Check for indicators.
  585. if index == 0:
  586. # Leading indicators are special characters.
  587. if ch in '#,[]{}&*!|>\'\"%@`':
  588. flow_indicators = True
  589. block_indicators = True
  590. if ch in '?:':
  591. flow_indicators = True
  592. if followed_by_whitespace:
  593. block_indicators = True
  594. if ch == '-' and followed_by_whitespace:
  595. flow_indicators = True
  596. block_indicators = True
  597. else:
  598. # Some indicators cannot appear within a scalar as well.
  599. if ch in ',?[]{}':
  600. flow_indicators = True
  601. if ch == ':':
  602. flow_indicators = True
  603. if followed_by_whitespace:
  604. block_indicators = True
  605. if ch == '#' and preceeded_by_whitespace:
  606. flow_indicators = True
  607. block_indicators = True
  608. # Check for line breaks, special, and unicode characters.
  609. if ch in '\n\x85\u2028\u2029':
  610. line_breaks = True
  611. if not (ch == '\n' or '\x20' <= ch <= '\x7E'):
  612. if (ch == '\x85' or '\xA0' <= ch <= '\uD7FF'
  613. or '\uE000' <= ch <= '\uFFFD') and ch != '\uFEFF':
  614. unicode_characters = True
  615. if not self.allow_unicode:
  616. special_characters = True
  617. else:
  618. special_characters = True
  619. # Detect important whitespace combinations.
  620. if ch == ' ':
  621. if index == 0:
  622. leading_space = True
  623. if index == len(scalar)-1:
  624. trailing_space = True
  625. if previous_break:
  626. break_space = True
  627. previous_space = True
  628. previous_break = False
  629. elif ch in '\n\x85\u2028\u2029':
  630. if index == 0:
  631. leading_break = True
  632. if index == len(scalar)-1:
  633. trailing_break = True
  634. if previous_space:
  635. space_break = True
  636. previous_space = False
  637. previous_break = True
  638. else:
  639. previous_space = False
  640. previous_break = False
  641. # Prepare for the next character.
  642. index += 1
  643. preceeded_by_whitespace = (ch in '\0 \t\r\n\x85\u2028\u2029')
  644. followed_by_whitespace = (index+1 >= len(scalar) or
  645. scalar[index+1] in '\0 \t\r\n\x85\u2028\u2029')
  646. # Let's decide what styles are allowed.
  647. allow_flow_plain = True
  648. allow_block_plain = True
  649. allow_single_quoted = True
  650. allow_double_quoted = True
  651. allow_block = True
  652. # Leading and trailing whitespaces are bad for plain scalars.
  653. if (leading_space or leading_break
  654. or trailing_space or trailing_break):
  655. allow_flow_plain = allow_block_plain = False
  656. # We do not permit trailing spaces for block scalars.
  657. if trailing_space:
  658. allow_block = False
  659. # Spaces at the beginning of a new line are only acceptable for block
  660. # scalars.
  661. if break_space:
  662. allow_flow_plain = allow_block_plain = allow_single_quoted = False
  663. # Spaces followed by breaks, as well as special character are only
  664. # allowed for double quoted scalars.
  665. if space_break or special_characters:
  666. allow_flow_plain = allow_block_plain = \
  667. allow_single_quoted = allow_block = False
  668. # Although the plain scalar writer supports breaks, we never emit
  669. # multiline plain scalars.
  670. if line_breaks:
  671. allow_flow_plain = allow_block_plain = False
  672. # Flow indicators are forbidden for flow plain scalars.
  673. if flow_indicators:
  674. allow_flow_plain = False
  675. # Block indicators are forbidden for block plain scalars.
  676. if block_indicators:
  677. allow_block_plain = False
  678. return ScalarAnalysis(scalar=scalar,
  679. empty=False, multiline=line_breaks,
  680. allow_flow_plain=allow_flow_plain,
  681. allow_block_plain=allow_block_plain,
  682. allow_single_quoted=allow_single_quoted,
  683. allow_double_quoted=allow_double_quoted,
  684. allow_block=allow_block)
  685. # Writers.
  686. def flush_stream(self):
  687. if hasattr(self.stream, 'flush'):
  688. self.stream.flush()
  689. def write_stream_start(self):
  690. # Write BOM if needed.
  691. if self.encoding and self.encoding.startswith('utf-16'):
  692. self.stream.write('\uFEFF'.encode(self.encoding))
  693. def write_stream_end(self):
  694. self.flush_stream()
  695. def write_indicator(self, indicator, need_whitespace,
  696. whitespace=False, indention=False):
  697. if self.whitespace or not need_whitespace:
  698. data = indicator
  699. else:
  700. data = ' '+indicator
  701. self.whitespace = whitespace
  702. self.indention = self.indention and indention
  703. self.column += len(data)
  704. self.open_ended = False
  705. if self.encoding:
  706. data = data.encode(self.encoding)
  707. self.stream.write(data)
  708. def write_indent(self):
  709. indent = self.indent or 0
  710. if not self.indention or self.column > indent \
  711. or (self.column == indent and not self.whitespace):
  712. self.write_line_break()
  713. if self.column < indent:
  714. self.whitespace = True
  715. data = ' '*(indent-self.column)
  716. self.column = indent
  717. if self.encoding:
  718. data = data.encode(self.encoding)
  719. self.stream.write(data)
  720. def write_line_break(self, data=None):
  721. if data is None:
  722. data = self.best_line_break
  723. self.whitespace = True
  724. self.indention = True
  725. self.line += 1
  726. self.column = 0
  727. if self.encoding:
  728. data = data.encode(self.encoding)
  729. self.stream.write(data)
  730. def write_version_directive(self, version_text):
  731. data = '%%YAML %s' % version_text
  732. if self.encoding:
  733. data = data.encode(self.encoding)
  734. self.stream.write(data)
  735. self.write_line_break()
  736. def write_tag_directive(self, handle_text, prefix_text):
  737. data = '%%TAG %s %s' % (handle_text, prefix_text)
  738. if self.encoding:
  739. data = data.encode(self.encoding)
  740. self.stream.write(data)
  741. self.write_line_break()
  742. # Scalar streams.
  743. def write_single_quoted(self, text, split=True):
  744. self.write_indicator('\'', True)
  745. spaces = False
  746. breaks = False
  747. start = end = 0
  748. while end <= len(text):
  749. ch = None
  750. if end < len(text):
  751. ch = text[end]
  752. if spaces:
  753. if ch is None or ch != ' ':
  754. if start+1 == end and self.column > self.best_width and split \
  755. and start != 0 and end != len(text):
  756. self.write_indent()
  757. else:
  758. data = text[start:end]
  759. self.column += len(data)
  760. if self.encoding:
  761. data = data.encode(self.encoding)
  762. self.stream.write(data)
  763. start = end
  764. elif breaks:
  765. if ch is None or ch not in '\n\x85\u2028\u2029':
  766. if text[start] == '\n':
  767. self.write_line_break()
  768. for br in text[start:end]:
  769. if br == '\n':
  770. self.write_line_break()
  771. else:
  772. self.write_line_break(br)
  773. self.write_indent()
  774. start = end
  775. else:
  776. if ch is None or ch in ' \n\x85\u2028\u2029' or ch == '\'':
  777. if start < end:
  778. data = text[start:end]
  779. self.column += len(data)
  780. if self.encoding:
  781. data = data.encode(self.encoding)
  782. self.stream.write(data)
  783. start = end
  784. if ch == '\'':
  785. data = '\'\''
  786. self.column += 2
  787. if self.encoding:
  788. data = data.encode(self.encoding)
  789. self.stream.write(data)
  790. start = end + 1
  791. if ch is not None:
  792. spaces = (ch == ' ')
  793. breaks = (ch in '\n\x85\u2028\u2029')
  794. end += 1
  795. self.write_indicator('\'', False)
  796. ESCAPE_REPLACEMENTS = {
  797. '\0': '0',
  798. '\x07': 'a',
  799. '\x08': 'b',
  800. '\x09': 't',
  801. '\x0A': 'n',
  802. '\x0B': 'v',
  803. '\x0C': 'f',
  804. '\x0D': 'r',
  805. '\x1B': 'e',
  806. '\"': '\"',
  807. '\\': '\\',
  808. '\x85': 'N',
  809. '\xA0': '_',
  810. '\u2028': 'L',
  811. '\u2029': 'P',
  812. }
  813. def write_double_quoted(self, text, split=True):
  814. self.write_indicator('"', True)
  815. start = end = 0
  816. while end <= len(text):
  817. ch = None
  818. if end < len(text):
  819. ch = text[end]
  820. if ch is None or ch in '"\\\x85\u2028\u2029\uFEFF' \
  821. or not ('\x20' <= ch <= '\x7E'
  822. or (self.allow_unicode
  823. and ('\xA0' <= ch <= '\uD7FF'
  824. or '\uE000' <= ch <= '\uFFFD'))):
  825. if start < end:
  826. data = text[start:end]
  827. self.column += len(data)
  828. if self.encoding:
  829. data = data.encode(self.encoding)
  830. self.stream.write(data)
  831. start = end
  832. if ch is not None:
  833. if ch in self.ESCAPE_REPLACEMENTS:
  834. data = '\\'+self.ESCAPE_REPLACEMENTS[ch]
  835. elif ch <= '\xFF':
  836. data = '\\x%02X' % ord(ch)
  837. elif ch <= '\uFFFF':
  838. data = '\\u%04X' % ord(ch)
  839. else:
  840. data = '\\U%08X' % ord(ch)
  841. self.column += len(data)
  842. if self.encoding:
  843. data = data.encode(self.encoding)
  844. self.stream.write(data)
  845. start = end+1
  846. if 0 < end < len(text)-1 and (ch == ' ' or start >= end) \
  847. and self.column+(end-start) > self.best_width and split:
  848. data = text[start:end]+'\\'
  849. if start < end:
  850. start = end
  851. self.column += len(data)
  852. if self.encoding:
  853. data = data.encode(self.encoding)
  854. self.stream.write(data)
  855. self.write_indent()
  856. self.whitespace = False
  857. self.indention = False
  858. if text[start] == ' ':
  859. data = '\\'
  860. self.column += len(data)
  861. if self.encoding:
  862. data = data.encode(self.encoding)
  863. self.stream.write(data)
  864. end += 1
  865. self.write_indicator('"', False)
  866. def determine_block_hints(self, text):
  867. hints = ''
  868. if text:
  869. if text[0] in ' \n\x85\u2028\u2029':
  870. hints += str(self.best_indent)
  871. if text[-1] not in '\n\x85\u2028\u2029':
  872. hints += '-'
  873. elif len(text) == 1 or text[-2] in '\n\x85\u2028\u2029':
  874. hints += '+'
  875. return hints
  876. def write_folded(self, text):
  877. hints = self.determine_block_hints(text)
  878. self.write_indicator('>'+hints, True)
  879. if hints[-1:] == '+':
  880. self.open_ended = True
  881. self.write_line_break()
  882. leading_space = True
  883. spaces = False
  884. breaks = True
  885. start = end = 0
  886. while end <= len(text):
  887. ch = None
  888. if end < len(text):
  889. ch = text[end]
  890. if breaks:
  891. if ch is None or ch not in '\n\x85\u2028\u2029':
  892. if not leading_space and ch is not None and ch != ' ' \
  893. and text[start] == '\n':
  894. self.write_line_break()
  895. leading_space = (ch == ' ')
  896. for br in text[start:end]:
  897. if br == '\n':
  898. self.write_line_break()
  899. else:
  900. self.write_line_break(br)
  901. if ch is not None:
  902. self.write_indent()
  903. start = end
  904. elif spaces:
  905. if ch != ' ':
  906. if start+1 == end and self.column > self.best_width:
  907. self.write_indent()
  908. else:
  909. data = text[start:end]
  910. self.column += len(data)
  911. if self.encoding:
  912. data = data.encode(self.encoding)
  913. self.stream.write(data)
  914. start = end
  915. else:
  916. if ch is None or ch in ' \n\x85\u2028\u2029':
  917. data = text[start:end]
  918. self.column += len(data)
  919. if self.encoding:
  920. data = data.encode(self.encoding)
  921. self.stream.write(data)
  922. if ch is None:
  923. self.write_line_break()
  924. start = end
  925. if ch is not None:
  926. breaks = (ch in '\n\x85\u2028\u2029')
  927. spaces = (ch == ' ')
  928. end += 1
  929. def write_literal(self, text):
  930. hints = self.determine_block_hints(text)
  931. self.write_indicator('|'+hints, True)
  932. if hints[-1:] == '+':
  933. self.open_ended = True
  934. self.write_line_break()
  935. breaks = True
  936. start = end = 0
  937. while end <= len(text):
  938. ch = None
  939. if end < len(text):
  940. ch = text[end]
  941. if breaks:
  942. if ch is None or ch not in '\n\x85\u2028\u2029':
  943. for br in text[start:end]:
  944. if br == '\n':
  945. self.write_line_break()
  946. else:
  947. self.write_line_break(br)
  948. if ch is not None:
  949. self.write_indent()
  950. start = end
  951. else:
  952. if ch is None or ch in '\n\x85\u2028\u2029':
  953. data = text[start:end]
  954. if self.encoding:
  955. data = data.encode(self.encoding)
  956. self.stream.write(data)
  957. if ch is None:
  958. self.write_line_break()
  959. start = end
  960. if ch is not None:
  961. breaks = (ch in '\n\x85\u2028\u2029')
  962. end += 1
  963. def write_plain(self, text, split=True):
  964. if self.root_context:
  965. self.open_ended = True
  966. if not text:
  967. return
  968. if not self.whitespace:
  969. data = ' '
  970. self.column += len(data)
  971. if self.encoding:
  972. data = data.encode(self.encoding)
  973. self.stream.write(data)
  974. self.whitespace = False
  975. self.indention = False
  976. spaces = False
  977. breaks = False
  978. start = end = 0
  979. while end <= len(text):
  980. ch = None
  981. if end < len(text):
  982. ch = text[end]
  983. if spaces:
  984. if ch != ' ':
  985. if start+1 == end and self.column > self.best_width and split:
  986. self.write_indent()
  987. self.whitespace = False
  988. self.indention = False
  989. else:
  990. data = text[start:end]
  991. self.column += len(data)
  992. if self.encoding:
  993. data = data.encode(self.encoding)
  994. self.stream.write(data)
  995. start = end
  996. elif breaks:
  997. if ch not in '\n\x85\u2028\u2029':
  998. if text[start] == '\n':
  999. self.write_line_break()
  1000. for br in text[start:end]:
  1001. if br == '\n':
  1002. self.write_line_break()
  1003. else:
  1004. self.write_line_break(br)
  1005. self.write_indent()
  1006. self.whitespace = False
  1007. self.indention = False
  1008. start = end
  1009. else:
  1010. if ch is None or ch in ' \n\x85\u2028\u2029':
  1011. data = text[start:end]
  1012. self.column += len(data)
  1013. if self.encoding:
  1014. data = data.encode(self.encoding)
  1015. self.stream.write(data)
  1016. start = end
  1017. if ch is not None:
  1018. spaces = (ch == ' ')
  1019. breaks = (ch in '\n\x85\u2028\u2029')
  1020. end += 1