lib: refactor tags parser: be more efficient, save SourcePos for later
This commit is contained in:
		
							parent
							
								
									4c259ff273
								
							
						
					
					
						commit
						ec85271a0b
					
				| @ -918,6 +918,9 @@ commentStartingWithp cs = do | |||||||
| -- >>> commentTags "\na b:, \nd:e, f" | -- >>> commentTags "\na b:, \nd:e, f" | ||||||
| -- [("b",""),("d","e")] | -- [("b",""),("d","e")] | ||||||
| -- | -- | ||||||
|  | -- >>> commentTags ":value" | ||||||
|  | -- [] | ||||||
|  | -- | ||||||
| commentTags :: Text -> [Tag] | commentTags :: Text -> [Tag] | ||||||
| commentTags s = | commentTags s = | ||||||
|   case runTextParser tagsp s of |   case runTextParser tagsp s of | ||||||
| @ -926,42 +929,42 @@ commentTags s = | |||||||
| 
 | 
 | ||||||
| -- | Parse all tags found in a string. | -- | Parse all tags found in a string. | ||||||
| tagsp :: SimpleTextParser [Tag] | tagsp :: SimpleTextParser [Tag] | ||||||
| tagsp = -- do | tagsp = do | ||||||
|   -- pdbg 0 $ "tagsp" |   -- pdbg 0 $ "tagsp" | ||||||
|   many (try (nontagp >> tagp)) |  | ||||||
| 
 | 
 | ||||||
| -- | Parse everything up till the first tag. |   -- If we parse in a single pass, we cannot know whether some text | ||||||
| -- |   -- belongs to a tag label until we have reached a colon (in which case | ||||||
| -- >>> rtp nontagp "\na b:, \nd:e, f" |   -- it does) or whitespace (in which case it does not). Therefore, we | ||||||
| -- Right "\na " |   -- hold on to the text until we reach such a break point, and then | ||||||
| nontagp :: SimpleTextParser String |   -- decide what to do. | ||||||
| nontagp = -- do |  | ||||||
|   -- pdbg 0 "nontagp" |  | ||||||
|   -- anyChar `manyTill` (lookAhead (try (tagorbracketeddatetagsp Nothing >> return ()) <|> eof)) |  | ||||||
|   anyChar `manyTill` lookAhead (try (void tagp) <|> eof) |  | ||||||
|   -- XXX costly ? |  | ||||||
| 
 | 
 | ||||||
| -- | Tags begin with a colon-suffixed tag name (a word beginning with |   potentialTagName <- tillNextBreak | ||||||
| -- a letter) and are followed by a tag value (any text up to a comma |   atSpaceChar <|> atColon potentialTagName <|> atEof | ||||||
| -- or newline, whitespace-stripped). |  | ||||||
| -- |  | ||||||
| -- >>> rtp tagp "a:b b , c AuxDate: 4/2" |  | ||||||
| -- Right ("a","b b") |  | ||||||
| -- |  | ||||||
| tagp :: SimpleTextParser Tag |  | ||||||
| tagp = do |  | ||||||
|   -- pdbg 0 "tagp" |  | ||||||
|   n <- tagnamep |  | ||||||
|   v <- tagvaluep |  | ||||||
|   return (n,v) |  | ||||||
| 
 | 
 | ||||||
| -- | |   where | ||||||
| -- >>> rtp tagnamep "a:" | 
 | ||||||
| -- Right "a" |     break :: SimpleTextParser () | ||||||
| tagnamep :: SimpleTextParser Text |     break = void spaceChar <|> void (char ':') <|> eof | ||||||
| tagnamep = -- do | 
 | ||||||
|   -- pdbg 0 "tagnamep" |     tillNextBreak :: SimpleTextParser Text | ||||||
|   T.pack <$> some (noneOf (": \t\n" :: [Char])) <* char ':' |     tillNextBreak = T.pack <$> anyChar `manyTill` lookAhead break | ||||||
|  | 
 | ||||||
|  |     tagValue :: SimpleTextParser Text | ||||||
|  |     tagValue = | ||||||
|  |       T.strip . T.pack <$> anyChar `manyTill` (void (char ',') <|> eolof) | ||||||
|  | 
 | ||||||
|  |     atSpaceChar :: SimpleTextParser [Tag] | ||||||
|  |     atSpaceChar = skipSome spaceChar *> tagsp | ||||||
|  | 
 | ||||||
|  |     atColon :: Text -> SimpleTextParser [Tag] | ||||||
|  |     atColon tagName = do | ||||||
|  |       char ':' | ||||||
|  |       if T.null tagName | ||||||
|  |         then tagsp | ||||||
|  |         else (:) <$> fmap (tagName,) tagValue <*> tagsp | ||||||
|  | 
 | ||||||
|  |     atEof :: SimpleTextParser [Tag] | ||||||
|  |     atEof = eof *> pure [] | ||||||
| 
 | 
 | ||||||
| tagvaluep :: TextParser m Text | tagvaluep :: TextParser m Text | ||||||
| tagvaluep = do | tagvaluep = do | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user