Skip to content

Commit a959fb1

Browse files
committed
Merge remote-tracking branch 'dnephin/perf_cache_resolving'
* dnephin/perf_cache_resolving: Use lru_cache Remove DefragResult. Remove context manager from ref() validation. Perf improvements by using a cache. Add benchmark script. Fix test failures issue #158: TRY to speed-up scope & $ref url-handling by keeping fragments separated from URL (and avoid redunant frag/defrag). Conflicts: jsonschema/tests/test_benchmarks.py
2 parents a38eac9 + ee1a256 commit a959fb1

File tree

8 files changed

+172
-44
lines changed

8 files changed

+172
-44
lines changed

bench.py

+74
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
#!/usr/env/bin python
2+
"""
3+
Benchmark the performance of jsonschema.
4+
5+
Example benchmark:
6+
7+
wget http://swagger.io/v2/schema.json
8+
wget http://petstore.swagger.io/v2/swagger.json
9+
python bench.py -r 5 schema.json swagger.json
10+
11+
"""
12+
from __future__ import print_function
13+
import argparse
14+
import cProfile
15+
import json
16+
import time
17+
18+
import jsonschema
19+
20+
21+
def parse_args():
22+
parser = argparse.ArgumentParser()
23+
parser.add_argument('schema', help="path to a schema used to benchmark")
24+
parser.add_argument('document', help="document to validate with schema")
25+
parser.add_argument('-r', '--repeat', type=int, help="number of iterations")
26+
parser.add_argument('--profile',
27+
help="Enable profiling, write profile to this filepath")
28+
return parser.parse_args()
29+
30+
31+
def run(filename, schema, document):
32+
resolver = jsonschema.RefResolver(
33+
'file://{0}'.format(filename),
34+
schema,
35+
store={schema['id']: schema})
36+
jsonschema.validate(document, schema, resolver=resolver)
37+
38+
39+
def format_time(time_):
40+
return "%.3fms" % (time_ * 1000)
41+
42+
43+
def run_timeit(schema_filename, document_filename, repeat, profile):
44+
with open(schema_filename) as schema_file:
45+
schema = json.load(schema_file)
46+
47+
with open(document_filename) as fh:
48+
document = json.load(fh)
49+
50+
if profile:
51+
profiler = cProfile.Profile()
52+
profiler.enable()
53+
54+
times = []
55+
for _ in range(repeat):
56+
start_time = time.time()
57+
run(schema_filename, schema, document)
58+
times.append(time.time() - start_time)
59+
60+
if profile:
61+
profiler.disable()
62+
profiler.dump_stats(profile)
63+
64+
print(", ".join(map(format_time, sorted(times))))
65+
print("Mean: {0}".format(format_time(sum(times) / repeat)))
66+
67+
68+
def main():
69+
args = parse_args()
70+
run_timeit(args.schema, args.document, args.repeat, args.profile)
71+
72+
73+
if __name__ == "__main__":
74+
main()

jsonschema/__init__.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,6 @@
1919
Draft3Validator, Draft4Validator, RefResolver, validate
2020
)
2121

22-
23-
__version__ = "2.5.0-dev"
24-
22+
from jsonschema.version import __version__
2523

2624
# flake8: noqa

jsonschema/_validators.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -190,9 +190,14 @@ def enum(validator, enums, instance, schema):
190190

191191

192192
def ref(validator, ref, instance, schema):
193-
with validator.resolver.resolving(ref) as resolved:
193+
scope, resolved = validator.resolver.resolve(ref)
194+
validator.resolver.push_scope(scope)
195+
196+
try:
194197
for error in validator.descend(instance, resolved):
195198
yield error
199+
finally:
200+
validator.resolver.pop_scope()
196201

197202

198203
def type_draft3(validator, types, instance, schema):

jsonschema/compat.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
from __future__ import unicode_literals
2-
import sys
2+
33
import operator
4+
import sys
5+
46

57
try:
68
from collections import MutableMapping, Sequence # noqa
@@ -11,6 +13,7 @@
1113

1214
if PY3:
1315
zip = zip
16+
from functools import lru_cache
1417
from io import StringIO
1518
from urllib.parse import (
1619
unquote, urljoin, urlunsplit, SplitResult, urlsplit as _urlsplit
@@ -21,6 +24,7 @@
2124
iteritems = operator.methodcaller("items")
2225
else:
2326
from itertools import izip as zip # noqa
27+
from repoze.lru import lru_cache
2428
from StringIO import StringIO
2529
from urlparse import (
2630
urljoin, urlunsplit, SplitResult, urlsplit as _urlsplit # noqa

jsonschema/tests/test_validators.py

+14-14
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
from collections import deque
2-
from contextlib import contextmanager
32
import json
43

54
from jsonschema import FormatChecker, ValidationError
@@ -633,12 +632,8 @@ def test_it_delegates_to_a_ref_resolver(self):
633632
resolver = RefResolver("", {})
634633
schema = {"$ref" : mock.Mock()}
635634

636-
@contextmanager
637-
def resolving():
638-
yield {"type": "integer"}
639-
640-
with mock.patch.object(resolver, "resolving") as resolve:
641-
resolve.return_value = resolving()
635+
with mock.patch.object(resolver, "resolve") as resolve:
636+
resolve.return_value = "url", {"type": "integer"}
642637
with self.assertRaises(ValidationError):
643638
self.validator_class(schema, resolver=resolver).validate(None)
644639

@@ -775,11 +770,11 @@ def test_it_resolves_local_refs(self):
775770
self.assertEqual(resolved, self.referrer["properties"]["foo"])
776771

777772
def test_it_resolves_local_refs_with_id(self):
778-
schema = {"id": "foo://bar/schema#", "a": {"foo": "bar"}}
773+
schema = {"id": "http://bar/schema#", "a": {"foo": "bar"}}
779774
resolver = RefResolver.from_schema(schema)
780775
with resolver.resolving("#/a") as resolved:
781776
self.assertEqual(resolved, schema["a"])
782-
with resolver.resolving("foo://bar/schema#/a") as resolved:
777+
with resolver.resolving("http://bar/schema#/a") as resolved:
783778
self.assertEqual(resolved, schema["a"])
784779

785780
def test_it_retrieves_stored_refs(self):
@@ -815,7 +810,7 @@ def test_it_retrieves_unstored_refs_via_urlopen(self):
815810
def test_it_can_construct_a_base_uri_from_a_schema(self):
816811
schema = {"id" : "foo"}
817812
resolver = RefResolver.from_schema(schema)
818-
self.assertEqual(resolver.base_uri, "foo")
813+
self.assertEqual(resolver.resolution_scope, "foo")
819814
with resolver.resolving("") as resolved:
820815
self.assertEqual(resolved, schema)
821816
with resolver.resolving("#") as resolved:
@@ -828,7 +823,7 @@ def test_it_can_construct_a_base_uri_from_a_schema(self):
828823
def test_it_can_construct_a_base_uri_from_a_schema_without_id(self):
829824
schema = {}
830825
resolver = RefResolver.from_schema(schema)
831-
self.assertEqual(resolver.base_uri, "")
826+
self.assertEqual(resolver.resolution_scope, "")
832827
with resolver.resolving("") as resolved:
833828
self.assertEqual(resolved, schema)
834829
with resolver.resolving("#") as resolved:
@@ -863,9 +858,7 @@ def test_cache_remote_off(self):
863858
)
864859
with resolver.resolving(ref):
865860
pass
866-
with resolver.resolving(ref):
867-
pass
868-
self.assertEqual(foo_handler.call_count, 2)
861+
self.assertEqual(foo_handler.call_count, 1)
869862

870863
def test_if_you_give_it_junk_you_get_a_resolution_error(self):
871864
ref = "foo://bar"
@@ -876,6 +869,13 @@ def test_if_you_give_it_junk_you_get_a_resolution_error(self):
876869
pass
877870
self.assertEqual(str(err.exception), "Oh no! What's this?")
878871

872+
def test_helpful_error_message_on_failed_pop_scope(self):
873+
resolver = RefResolver("", {})
874+
resolver.pop_scope()
875+
with self.assertRaises(RefResolutionError) as exc:
876+
resolver.pop_scope()
877+
self.assertIn("Failed to pop the scope", str(exc.exception))
878+
879879

880880
def sorted_errors(errors):
881881
def key(error):

jsonschema/validators.py

+58-23
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from jsonschema import _utils, _validators
1313
from jsonschema.compat import (
1414
Sequence, urljoin, urlsplit, urldefrag, unquote, urlopen,
15-
str_types, int_types, iteritems,
15+
str_types, int_types, iteritems, lru_cache,
1616
)
1717
from jsonschema.exceptions import ErrorTree # Backwards compatibility # noqa
1818
from jsonschema.exceptions import RefResolutionError, SchemaError, UnknownType
@@ -79,7 +79,10 @@ def iter_errors(self, instance, _schema=None):
7979
if _schema is None:
8080
_schema = self.schema
8181

82-
with self.resolver.in_scope(_schema.get(u"id", u"")):
82+
scope = _schema.get(u"id")
83+
if scope:
84+
self.resolver.push_scope(scope)
85+
try:
8386
ref = _schema.get(u"$ref")
8487
if ref is not None:
8588
validators = [(u"$ref", ref)]
@@ -103,6 +106,9 @@ def iter_errors(self, instance, _schema=None):
103106
if k != u"$ref":
104107
error.schema_path.appendleft(k)
105108
yield error
109+
finally:
110+
if scope:
111+
self.resolver.pop_scope()
106112

107113
def descend(self, instance, schema, path=None, schema_path=None):
108114
for error in self.iter_errors(instance, schema):
@@ -227,26 +233,32 @@ class RefResolver(object):
227233
first resolution
228234
:argument dict handlers: a mapping from URI schemes to functions that
229235
should be used to retrieve them
230-
236+
:arguments callable cache_func: a function decorator used to cache
237+
expensive calls. Should support the `functools.lru_cache` interface.
238+
:argument int cache_maxsize: number of items to store in the cache. Set
239+
this to 0 to disable caching. Defaults to 1000.
231240
"""
232241

233242
def __init__(
234243
self, base_uri, referrer, store=(), cache_remote=True, handlers=(),
244+
cache_func=lru_cache, cache_maxsize=1000,
235245
):
236-
self.base_uri = base_uri
237-
self.resolution_scope = base_uri
238246
# This attribute is not used, it is for backwards compatibility
239247
self.referrer = referrer
240248
self.cache_remote = cache_remote
241249
self.handlers = dict(handlers)
242250

251+
self._scopes_stack = [base_uri]
243252
self.store = _utils.URIDict(
244253
(id, validator.META_SCHEMA)
245254
for id, validator in iteritems(meta_schemas)
246255
)
247256
self.store.update(store)
248257
self.store[base_uri] = referrer
249258

259+
self._urljoin_cache = cache_func(cache_maxsize)(urljoin)
260+
self._resolve_cache = cache_func(cache_maxsize)(self.resolve_from_url)
261+
250262
@classmethod
251263
def from_schema(cls, schema, *args, **kwargs):
252264
"""
@@ -259,44 +271,67 @@ def from_schema(cls, schema, *args, **kwargs):
259271

260272
return cls(schema.get(u"id", u""), schema, *args, **kwargs)
261273

274+
def push_scope(self, scope):
275+
self._scopes_stack.append(
276+
self._urljoin_cache(self.resolution_scope, scope))
277+
278+
def pop_scope(self):
279+
try:
280+
self._scopes_stack.pop()
281+
except IndexError:
282+
raise RefResolutionError(
283+
"Failed to pop the scope from an empty stack. "
284+
"`pop_scope()` should only be called once for every "
285+
"`push_scope()`")
286+
287+
@property
288+
def resolution_scope(self):
289+
return self._scopes_stack[-1]
290+
291+
292+
# Deprecated, this function is no longer used, but is preserved for
293+
# backwards compatibility
262294
@contextlib.contextmanager
263295
def in_scope(self, scope):
264-
old_scope = self.resolution_scope
265-
self.resolution_scope = urljoin(old_scope, scope)
296+
self.push_scope(scope)
266297
try:
267298
yield
268299
finally:
269-
self.resolution_scope = old_scope
300+
self.pop_scope()
270301

302+
# Deprecated, this function is no longer used, but is preserved for
303+
# backwards compatibility
271304
@contextlib.contextmanager
272305
def resolving(self, ref):
306+
url, resolved = self.resolve(ref)
307+
self.push_scope(url)
308+
try:
309+
yield resolved
310+
finally:
311+
self.pop_scope()
312+
313+
def resolve(self, ref):
273314
"""
274315
Context manager which resolves a JSON ``ref`` and enters the
275316
resolution scope of this ref.
276317
277318
:argument str ref: reference to resolve
278319
279320
"""
321+
url = self._urljoin_cache(self.resolution_scope, ref)
322+
return url, self._resolve_cache(url)
280323

281-
full_uri = urljoin(self.resolution_scope, ref)
282-
uri, fragment = urldefrag(full_uri)
283-
if not uri:
284-
uri = self.base_uri
285-
286-
if uri in self.store:
287-
document = self.store[uri]
288-
else:
324+
def resolve_from_url(self, url):
325+
url, fragment = urldefrag(url)
326+
try:
327+
document = self.store[url]
328+
except KeyError:
289329
try:
290-
document = self.resolve_remote(uri)
330+
document = self.resolve_remote(url)
291331
except Exception as exc:
292332
raise RefResolutionError(exc)
293333

294-
old_base_uri, self.base_uri = self.base_uri, uri
295-
try:
296-
with self.in_scope(uri):
297-
yield self.resolve_fragment(document, fragment)
298-
finally:
299-
self.base_uri = old_base_uri
334+
return self.resolve_fragment(document, fragment)
300335

301336
def resolve_fragment(self, document, fragment):
302337
"""

jsonschema/version.py

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
__version__ = "2.5.0-dev"

setup.py

+13-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,12 @@
1+
import os.path
12
from setuptools import setup
3+
import sys
24

3-
from jsonschema import __version__
4-
5+
# Load __version__ info globals without importing anything
6+
with open(
7+
os.path.join(os.path.dirname(__file__), 'jsonschema', 'version.py')
8+
) as fh:
9+
exec(fh.read())
510

611
with open("README.rst") as readme:
712
long_description = readme.read()
@@ -21,6 +26,11 @@
2126
"Programming Language :: Python :: Implementation :: PyPy",
2227
]
2328

29+
install_requires = []
30+
31+
if sys.version_info < (3, 2):
32+
install_requires.append('repoze.lru >= 0.6')
33+
2434
setup(
2535
name="jsonschema",
2636
version=__version__,
@@ -34,4 +44,5 @@
3444
long_description=long_description,
3545
url="https://github.com./Julian/jsonschema",
3646
entry_points={"console_scripts": ["jsonschema = jsonschema.cli:main"]},
47+
install_requires=install_requires,
3748
)

0 commit comments

Comments
 (0)