tools: add sample journals with wide characters
generatejournal now has --chinese and --mixed options, which are used to generate some additional small sample journals.
This commit is contained in:
		
							parent
							
								
									939f7184c9
								
							
						
					
					
						commit
						e6d8a9d1bc
					
				
							
								
								
									
										21
									
								
								Makefile
									
									
									
									
									
								
							
							
						
						
									
										21
									
								
								Makefile
									
									
									
									
									
								
							| @ -794,7 +794,17 @@ ghci-web: \ | ||||
| 		$(call def-help,ghci-web, start a GHCI REPL and load the hledger-lib, hledger and hledger-web packages) | ||||
| 	stack exec $(GHCI) -- $(BUILDFLAGS) hledger-web/app/main.hs | ||||
| 
 | ||||
| samplejournals: data/sample.journal data/100x100x10.journal data/1000x1000x10.journal data/1000x10000x10.journal data/10000x1000x10.journal data/10000x10000x10.journal data/100000x1000x10.journal \ | ||||
| samplejournals: \ | ||||
| 	data/sample.journal \
 | ||||
| 	data/100x100x10.journal \
 | ||||
| 	data/1000x1000x10.journal \
 | ||||
| 	data/1000x10000x10.journal \
 | ||||
| 	data/10000x1000x10.journal \
 | ||||
| 	data/10000x10000x10.journal \
 | ||||
| 	data/100000x1000x10.journal \
 | ||||
| 	data/ascii.journal \
 | ||||
| 	data/chinese.journal \
 | ||||
| 	data/mixed.journal \
 | ||||
| 	$(call def-help,samplejournals, regenerate standard sample journals in data/ ) | ||||
| 
 | ||||
| data/sample.journal: | ||||
| @ -818,6 +828,15 @@ data/10000x10000x10.journal: tools/generatejournal | ||||
| data/100000x1000x10.journal: tools/generatejournal | ||||
| 	tools/generatejournal 100000 1000 10 >$@ | ||||
| 
 | ||||
| data/ascii.journal: tools/generatejournal | ||||
| 	tools/generatejournal 3 5 5 >$@ | ||||
| 
 | ||||
| data/chinese.journal: tools/generatejournal | ||||
| 	tools/generatejournal 3 5 5 --chinese >$@ | ||||
| 
 | ||||
| data/mixed.journal: tools/generatejournal | ||||
| 	tools/generatejournal 3 5 5 --mixed >$@ | ||||
| 
 | ||||
| ###############################################################################
 | ||||
| $(call def-help-subsection,DOCUMENTATION:) | ||||
| 
 | ||||
|  | ||||
| @ -477,7 +477,7 @@ Finally, for quick, fine-grained performance measurements when troubleshooting o | ||||
| 
 | ||||
| ### Generate sample journal files | ||||
| 
 | ||||
| Synthetic data files like `data/100x100x10.journal` useful for benchmarks and testing. | ||||
| Synthetic data files like `data/100x100x10.journal` are useful for benchmarks and testing. | ||||
| The numbers describe the number of transactions, number of accounts, and maximum account depth respectively. | ||||
| They are generated by [`tools/generatejournal.hs`](https://github.com/simonmichael/hledger/blob/master/tools/generatejournal.hs). | ||||
| They should be built as needed, if not you can use `make samplejournals` rule: | ||||
| @ -493,6 +493,9 @@ tools/generatejournal 1000 10000 10 >data/1000x10000x10.journal | ||||
| tools/generatejournal 10000 1000 10 >data/10000x1000x10.journal | ||||
| tools/generatejournal 10000 10000 10 >data/10000x10000x10.journal | ||||
| tools/generatejournal 100000 1000 10 >data/100000x1000x10.journal | ||||
| tools/generatejournal 3 5 5 >data/ascii.journal | ||||
| tools/generatejournal 3 5 5 --chinese >data/chinese.journal | ||||
| tools/generatejournal 3 5 5 --mixed >data/mixed.journal | ||||
| ``` | ||||
| 
 | ||||
| ### Run developer tests | ||||
|  | ||||
| @ -1,30 +1,34 @@ | ||||
| #!/usr/bin/env runhaskell | ||||
| {- | ||||
| generateledger.hs NUMTXNS NUMACCTS ACCTDEPTH | ||||
| 
 | ||||
| Outputs a dummy ledger file with the specified number of transactions, | ||||
| number of accounts, and account tree depth. Useful for | ||||
| testing/profiling/benchmarking. | ||||
| generatejournal.hs NUMTXNS NUMACCTS ACCTDEPTH [--chinese|--mixed] | ||||
| 
 | ||||
| Outputs a dummy journal file with the specified number of | ||||
| transactions, number of accounts, and account tree depth. By default | ||||
| it uses only ascii characters, with --chinese it uses wide chinese | ||||
| characters, or with --mixed it uses both.  These files are used for | ||||
| testing, benchmarking, profiling, etc. | ||||
| -} | ||||
| 
 | ||||
| module Main | ||||
| where | ||||
| import System.Environment | ||||
| import Control.Monad | ||||
| import Data.Time.LocalTime | ||||
| import Data.Char | ||||
| import Data.List | ||||
| import Data.Time.Calendar | ||||
| import Text.Printf | ||||
| import Data.Time.LocalTime | ||||
| import Numeric | ||||
| import System.Environment | ||||
| import Text.Printf | ||||
| -- import Hledger.Utils.Debug | ||||
| 
 | ||||
| main = do | ||||
|   args <- getArgs | ||||
|   rawargs <- getArgs | ||||
|   let (opts,args) = partition (isPrefixOf "-") rawargs | ||||
|   let [numtxns, numaccts, acctdepth] = map read args :: [Int] | ||||
|   today <- getCurrentDay | ||||
|   let (year,_,_) = toGregorian today | ||||
|   let d = fromGregorian (year-1) 1 1 | ||||
|   let dates = iterate (addDays 1) d | ||||
|   let accts = pair $ cycle $ take numaccts $ uniqueacctnames acctdepth | ||||
|   let accts = pair $ cycle $ take numaccts $ uniqueAccountNames opts acctdepth | ||||
|   mapM_ (\(n,d,(a,b)) -> putStr $ showtxn n d a b) $ take numtxns $ zip3 [1..] dates accts | ||||
|   return () | ||||
| 
 | ||||
| @ -35,17 +39,41 @@ showtxn txnno date acct1 acct2 = | ||||
|       d = show date | ||||
|       amt = 1::Int | ||||
| 
 | ||||
| uniqueacctnames :: Int -> [String] | ||||
| uniqueacctnames depth = uniqueacctnames' depth uniquenames | ||||
|     where uniquenames = map hex [1..] where hex = flip showHex "" | ||||
| uniqueAccountNames :: [String] -> Int -> [String] | ||||
| uniqueAccountNames opts depth = | ||||
|   mkacctnames uniquenames | ||||
|   where | ||||
|     mkacctnames names = mkacctnamestodepth some ++ mkacctnames rest | ||||
|       where | ||||
|         (some, rest) = splitAt depth names | ||||
|         -- mkacctnamestodepth ["a", "b", "c"] = ["a","a:b","a:b:c"] | ||||
|         mkacctnamestodepth :: [String] -> [String] | ||||
|         mkacctnamestodepth [] = [] | ||||
|         mkacctnamestodepth (a:as) = a : map ((a++":")++) (mkacctnamestodepth as) | ||||
|     uniquenames | ||||
|       | "--mixed" `elem` opts   = concat $ zipWith (\a b -> [a,b]) uniqueNamesHex uniqueNamesWide | ||||
|       | "--chinese" `elem` opts = uniqueNamesWide | ||||
|       | otherwise               = uniqueNamesHex | ||||
| 
 | ||||
| uniqueacctnames' depth uniquenames = group some ++ uniqueacctnames' depth rest | ||||
|     where (some, rest) = splitAt depth uniquenames | ||||
| uniqueNamesHex = map hex [1..] where hex = flip showHex "" | ||||
| 
 | ||||
| uniqueNamesWide = concat [sequences n wideChars | n <- [1..]] | ||||
| 
 | ||||
| -- Get the sequences of specified size starting at each element of a list, | ||||
| -- cycling it if needed to fill the last sequence. If the list's elements | ||||
| -- are unique, then the sequences will be too. | ||||
| sequences :: Show a => Int -> [a] -> [[a]] | ||||
| sequences n l = go l | ||||
|   where | ||||
|     go [] = [] | ||||
|     go l' = s : go (tail l') | ||||
|       where | ||||
|         s' = take n l' | ||||
|         s | length s' == n = s' | ||||
|           | otherwise      = take n (l' ++ cycle l) | ||||
| 
 | ||||
| wideChars = map chr [0x3400..0x4db0] | ||||
| 
 | ||||
| -- group ["a", "b", "c"] = ["a","a:b","a:b:c"] | ||||
| group :: [String] -> [String] | ||||
| group [] = [] | ||||
| group (a:as) = a : map ((a++":")++) (group as) | ||||
| 
 | ||||
| pair :: [a] -> [(a,a)] | ||||
| pair [] = [] | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user