tools: add sample journals with wide characters
generatejournal now has --chinese and --mixed options, which are used to generate some additional small sample journals.
This commit is contained in:
		
							parent
							
								
									939f7184c9
								
							
						
					
					
						commit
						e6d8a9d1bc
					
				
							
								
								
									
										21
									
								
								Makefile
									
									
									
									
									
								
							
							
						
						
									
										21
									
								
								Makefile
									
									
									
									
									
								
							| @ -794,7 +794,17 @@ ghci-web: \ | |||||||
| 		$(call def-help,ghci-web, start a GHCI REPL and load the hledger-lib, hledger and hledger-web packages) | 		$(call def-help,ghci-web, start a GHCI REPL and load the hledger-lib, hledger and hledger-web packages) | ||||||
| 	stack exec $(GHCI) -- $(BUILDFLAGS) hledger-web/app/main.hs | 	stack exec $(GHCI) -- $(BUILDFLAGS) hledger-web/app/main.hs | ||||||
| 
 | 
 | ||||||
| samplejournals: data/sample.journal data/100x100x10.journal data/1000x1000x10.journal data/1000x10000x10.journal data/10000x1000x10.journal data/10000x10000x10.journal data/100000x1000x10.journal \ | samplejournals: \ | ||||||
|  | 	data/sample.journal \
 | ||||||
|  | 	data/100x100x10.journal \
 | ||||||
|  | 	data/1000x1000x10.journal \
 | ||||||
|  | 	data/1000x10000x10.journal \
 | ||||||
|  | 	data/10000x1000x10.journal \
 | ||||||
|  | 	data/10000x10000x10.journal \
 | ||||||
|  | 	data/100000x1000x10.journal \
 | ||||||
|  | 	data/ascii.journal \
 | ||||||
|  | 	data/chinese.journal \
 | ||||||
|  | 	data/mixed.journal \
 | ||||||
| 	$(call def-help,samplejournals, regenerate standard sample journals in data/ ) | 	$(call def-help,samplejournals, regenerate standard sample journals in data/ ) | ||||||
| 
 | 
 | ||||||
| data/sample.journal: | data/sample.journal: | ||||||
| @ -818,6 +828,15 @@ data/10000x10000x10.journal: tools/generatejournal | |||||||
| data/100000x1000x10.journal: tools/generatejournal | data/100000x1000x10.journal: tools/generatejournal | ||||||
| 	tools/generatejournal 100000 1000 10 >$@ | 	tools/generatejournal 100000 1000 10 >$@ | ||||||
| 
 | 
 | ||||||
|  | data/ascii.journal: tools/generatejournal | ||||||
|  | 	tools/generatejournal 3 5 5 >$@ | ||||||
|  | 
 | ||||||
|  | data/chinese.journal: tools/generatejournal | ||||||
|  | 	tools/generatejournal 3 5 5 --chinese >$@ | ||||||
|  | 
 | ||||||
|  | data/mixed.journal: tools/generatejournal | ||||||
|  | 	tools/generatejournal 3 5 5 --mixed >$@ | ||||||
|  | 
 | ||||||
| ###############################################################################
 | ###############################################################################
 | ||||||
| $(call def-help-subsection,DOCUMENTATION:) | $(call def-help-subsection,DOCUMENTATION:) | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -477,7 +477,7 @@ Finally, for quick, fine-grained performance measurements when troubleshooting o | |||||||
| 
 | 
 | ||||||
| ### Generate sample journal files | ### Generate sample journal files | ||||||
| 
 | 
 | ||||||
| Synthetic data files like `data/100x100x10.journal` useful for benchmarks and testing. | Synthetic data files like `data/100x100x10.journal` are useful for benchmarks and testing. | ||||||
| The numbers describe the number of transactions, number of accounts, and maximum account depth respectively. | The numbers describe the number of transactions, number of accounts, and maximum account depth respectively. | ||||||
| They are generated by [`tools/generatejournal.hs`](https://github.com/simonmichael/hledger/blob/master/tools/generatejournal.hs). | They are generated by [`tools/generatejournal.hs`](https://github.com/simonmichael/hledger/blob/master/tools/generatejournal.hs). | ||||||
| They should be built as needed, if not you can use `make samplejournals` rule: | They should be built as needed, if not you can use `make samplejournals` rule: | ||||||
| @ -493,6 +493,9 @@ tools/generatejournal 1000 10000 10 >data/1000x10000x10.journal | |||||||
| tools/generatejournal 10000 1000 10 >data/10000x1000x10.journal | tools/generatejournal 10000 1000 10 >data/10000x1000x10.journal | ||||||
| tools/generatejournal 10000 10000 10 >data/10000x10000x10.journal | tools/generatejournal 10000 10000 10 >data/10000x10000x10.journal | ||||||
| tools/generatejournal 100000 1000 10 >data/100000x1000x10.journal | tools/generatejournal 100000 1000 10 >data/100000x1000x10.journal | ||||||
|  | tools/generatejournal 3 5 5 >data/ascii.journal | ||||||
|  | tools/generatejournal 3 5 5 --chinese >data/chinese.journal | ||||||
|  | tools/generatejournal 3 5 5 --mixed >data/mixed.journal | ||||||
| ``` | ``` | ||||||
| 
 | 
 | ||||||
| ### Run developer tests | ### Run developer tests | ||||||
|  | |||||||
| @ -1,30 +1,34 @@ | |||||||
| #!/usr/bin/env runhaskell | #!/usr/bin/env runhaskell | ||||||
| {- | {- | ||||||
| generateledger.hs NUMTXNS NUMACCTS ACCTDEPTH | generatejournal.hs NUMTXNS NUMACCTS ACCTDEPTH [--chinese|--mixed] | ||||||
| 
 |  | ||||||
| Outputs a dummy ledger file with the specified number of transactions, |  | ||||||
| number of accounts, and account tree depth. Useful for |  | ||||||
| testing/profiling/benchmarking. |  | ||||||
| 
 | 
 | ||||||
|  | Outputs a dummy journal file with the specified number of | ||||||
|  | transactions, number of accounts, and account tree depth. By default | ||||||
|  | it uses only ascii characters, with --chinese it uses wide chinese | ||||||
|  | characters, or with --mixed it uses both.  These files are used for | ||||||
|  | testing, benchmarking, profiling, etc. | ||||||
| -} | -} | ||||||
| 
 | 
 | ||||||
| module Main | module Main | ||||||
| where | where | ||||||
| import System.Environment | import Data.Char | ||||||
| import Control.Monad | import Data.List | ||||||
| import Data.Time.LocalTime |  | ||||||
| import Data.Time.Calendar | import Data.Time.Calendar | ||||||
| import Text.Printf | import Data.Time.LocalTime | ||||||
| import Numeric | import Numeric | ||||||
|  | import System.Environment | ||||||
|  | import Text.Printf | ||||||
|  | -- import Hledger.Utils.Debug | ||||||
| 
 | 
 | ||||||
| main = do | main = do | ||||||
|   args <- getArgs |   rawargs <- getArgs | ||||||
|  |   let (opts,args) = partition (isPrefixOf "-") rawargs | ||||||
|   let [numtxns, numaccts, acctdepth] = map read args :: [Int] |   let [numtxns, numaccts, acctdepth] = map read args :: [Int] | ||||||
|   today <- getCurrentDay |   today <- getCurrentDay | ||||||
|   let (year,_,_) = toGregorian today |   let (year,_,_) = toGregorian today | ||||||
|   let d = fromGregorian (year-1) 1 1 |   let d = fromGregorian (year-1) 1 1 | ||||||
|   let dates = iterate (addDays 1) d |   let dates = iterate (addDays 1) d | ||||||
|   let accts = pair $ cycle $ take numaccts $ uniqueacctnames acctdepth |   let accts = pair $ cycle $ take numaccts $ uniqueAccountNames opts acctdepth | ||||||
|   mapM_ (\(n,d,(a,b)) -> putStr $ showtxn n d a b) $ take numtxns $ zip3 [1..] dates accts |   mapM_ (\(n,d,(a,b)) -> putStr $ showtxn n d a b) $ take numtxns $ zip3 [1..] dates accts | ||||||
|   return () |   return () | ||||||
| 
 | 
 | ||||||
| @ -35,17 +39,41 @@ showtxn txnno date acct1 acct2 = | |||||||
|       d = show date |       d = show date | ||||||
|       amt = 1::Int |       amt = 1::Int | ||||||
| 
 | 
 | ||||||
| uniqueacctnames :: Int -> [String] | uniqueAccountNames :: [String] -> Int -> [String] | ||||||
| uniqueacctnames depth = uniqueacctnames' depth uniquenames | uniqueAccountNames opts depth = | ||||||
|     where uniquenames = map hex [1..] where hex = flip showHex "" |   mkacctnames uniquenames | ||||||
|  |   where | ||||||
|  |     mkacctnames names = mkacctnamestodepth some ++ mkacctnames rest | ||||||
|  |       where | ||||||
|  |         (some, rest) = splitAt depth names | ||||||
|  |         -- mkacctnamestodepth ["a", "b", "c"] = ["a","a:b","a:b:c"] | ||||||
|  |         mkacctnamestodepth :: [String] -> [String] | ||||||
|  |         mkacctnamestodepth [] = [] | ||||||
|  |         mkacctnamestodepth (a:as) = a : map ((a++":")++) (mkacctnamestodepth as) | ||||||
|  |     uniquenames | ||||||
|  |       | "--mixed" `elem` opts   = concat $ zipWith (\a b -> [a,b]) uniqueNamesHex uniqueNamesWide | ||||||
|  |       | "--chinese" `elem` opts = uniqueNamesWide | ||||||
|  |       | otherwise               = uniqueNamesHex | ||||||
| 
 | 
 | ||||||
| uniqueacctnames' depth uniquenames = group some ++ uniqueacctnames' depth rest | uniqueNamesHex = map hex [1..] where hex = flip showHex "" | ||||||
|     where (some, rest) = splitAt depth uniquenames | 
 | ||||||
|  | uniqueNamesWide = concat [sequences n wideChars | n <- [1..]] | ||||||
|  | 
 | ||||||
|  | -- Get the sequences of specified size starting at each element of a list, | ||||||
|  | -- cycling it if needed to fill the last sequence. If the list's elements | ||||||
|  | -- are unique, then the sequences will be too. | ||||||
|  | sequences :: Show a => Int -> [a] -> [[a]] | ||||||
|  | sequences n l = go l | ||||||
|  |   where | ||||||
|  |     go [] = [] | ||||||
|  |     go l' = s : go (tail l') | ||||||
|  |       where | ||||||
|  |         s' = take n l' | ||||||
|  |         s | length s' == n = s' | ||||||
|  |           | otherwise      = take n (l' ++ cycle l) | ||||||
|  | 
 | ||||||
|  | wideChars = map chr [0x3400..0x4db0] | ||||||
| 
 | 
 | ||||||
| -- group ["a", "b", "c"] = ["a","a:b","a:b:c"] |  | ||||||
| group :: [String] -> [String] |  | ||||||
| group [] = [] |  | ||||||
| group (a:as) = a : map ((a++":")++) (group as) |  | ||||||
| 
 | 
 | ||||||
| pair :: [a] -> [(a,a)] | pair :: [a] -> [(a,a)] | ||||||
| pair [] = [] | pair [] = [] | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user