Let's try out the "snippets" idea.

Represent strings (byte strings) as three-tuples of (git sha hash,
offset, length) immutable datastructures.
This commit is contained in:
Simon Forman 2022-03-01 14:13:07 -08:00
parent 61d18f8491
commit 142d6e53b0
1 changed files with 34 additions and 0 deletions

View File

@ -0,0 +1,34 @@
from collections import namedtuple
from re import compile as RE
Snippet = namedtuple('Snippet', 'sha offset length')
fmt = '{%s %i %i}'
pat = (
'{'
'\s*'
'(?P<sha>[a-f0-9]+)'
'\s+'
'(?P<offset>\d+)'
'\s+'
'(?P<length>\d+)'
'\s*'
'}'
)
PAT = RE(pat)
def to_string(snip):
return fmt % _ts(*snip)
def _ts(sha, offset, length):
return sha.decode('ascii'), offset, length
def from_string(text):
m = PAT.match(text)
if not m:
raise ValueError
return _fs(**m.groupdict())
def _fs(sha, offset, length):
return Snippet(sha.encode('ascii'), int(offset), int(length))