lib: refactor tags parser: be more efficient, save SourcePos for later
This commit is contained in:
parent
4c259ff273
commit
ec85271a0b
@ -918,6 +918,9 @@ commentStartingWithp cs = do
|
|||||||
-- >>> commentTags "\na b:, \nd:e, f"
|
-- >>> commentTags "\na b:, \nd:e, f"
|
||||||
-- [("b",""),("d","e")]
|
-- [("b",""),("d","e")]
|
||||||
--
|
--
|
||||||
|
-- >>> commentTags ":value"
|
||||||
|
-- []
|
||||||
|
--
|
||||||
commentTags :: Text -> [Tag]
|
commentTags :: Text -> [Tag]
|
||||||
commentTags s =
|
commentTags s =
|
||||||
case runTextParser tagsp s of
|
case runTextParser tagsp s of
|
||||||
@ -926,42 +929,42 @@ commentTags s =
|
|||||||
|
|
||||||
-- | Parse all tags found in a string.
|
-- | Parse all tags found in a string.
|
||||||
tagsp :: SimpleTextParser [Tag]
|
tagsp :: SimpleTextParser [Tag]
|
||||||
tagsp = -- do
|
tagsp = do
|
||||||
-- pdbg 0 $ "tagsp"
|
-- pdbg 0 $ "tagsp"
|
||||||
many (try (nontagp >> tagp))
|
|
||||||
|
|
||||||
-- | Parse everything up till the first tag.
|
-- If we parse in a single pass, we cannot know whether some text
|
||||||
--
|
-- belongs to a tag label until we have reached a colon (in which case
|
||||||
-- >>> rtp nontagp "\na b:, \nd:e, f"
|
-- it does) or whitespace (in which case it does not). Therefore, we
|
||||||
-- Right "\na "
|
-- hold on to the text until we reach such a break point, and then
|
||||||
nontagp :: SimpleTextParser String
|
-- decide what to do.
|
||||||
nontagp = -- do
|
|
||||||
-- pdbg 0 "nontagp"
|
|
||||||
-- anyChar `manyTill` (lookAhead (try (tagorbracketeddatetagsp Nothing >> return ()) <|> eof))
|
|
||||||
anyChar `manyTill` lookAhead (try (void tagp) <|> eof)
|
|
||||||
-- XXX costly ?
|
|
||||||
|
|
||||||
-- | Tags begin with a colon-suffixed tag name (a word beginning with
|
potentialTagName <- tillNextBreak
|
||||||
-- a letter) and are followed by a tag value (any text up to a comma
|
atSpaceChar <|> atColon potentialTagName <|> atEof
|
||||||
-- or newline, whitespace-stripped).
|
|
||||||
--
|
|
||||||
-- >>> rtp tagp "a:b b , c AuxDate: 4/2"
|
|
||||||
-- Right ("a","b b")
|
|
||||||
--
|
|
||||||
tagp :: SimpleTextParser Tag
|
|
||||||
tagp = do
|
|
||||||
-- pdbg 0 "tagp"
|
|
||||||
n <- tagnamep
|
|
||||||
v <- tagvaluep
|
|
||||||
return (n,v)
|
|
||||||
|
|
||||||
-- |
|
where
|
||||||
-- >>> rtp tagnamep "a:"
|
|
||||||
-- Right "a"
|
break :: SimpleTextParser ()
|
||||||
tagnamep :: SimpleTextParser Text
|
break = void spaceChar <|> void (char ':') <|> eof
|
||||||
tagnamep = -- do
|
|
||||||
-- pdbg 0 "tagnamep"
|
tillNextBreak :: SimpleTextParser Text
|
||||||
T.pack <$> some (noneOf (": \t\n" :: [Char])) <* char ':'
|
tillNextBreak = T.pack <$> anyChar `manyTill` lookAhead break
|
||||||
|
|
||||||
|
tagValue :: SimpleTextParser Text
|
||||||
|
tagValue =
|
||||||
|
T.strip . T.pack <$> anyChar `manyTill` (void (char ',') <|> eolof)
|
||||||
|
|
||||||
|
atSpaceChar :: SimpleTextParser [Tag]
|
||||||
|
atSpaceChar = skipSome spaceChar *> tagsp
|
||||||
|
|
||||||
|
atColon :: Text -> SimpleTextParser [Tag]
|
||||||
|
atColon tagName = do
|
||||||
|
char ':'
|
||||||
|
if T.null tagName
|
||||||
|
then tagsp
|
||||||
|
else (:) <$> fmap (tagName,) tagValue <*> tagsp
|
||||||
|
|
||||||
|
atEof :: SimpleTextParser [Tag]
|
||||||
|
atEof = eof *> pure []
|
||||||
|
|
||||||
tagvaluep :: TextParser m Text
|
tagvaluep :: TextParser m Text
|
||||||
tagvaluep = do
|
tagvaluep = do
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user