From 9967d6ce76b1dce3a63c92302f99e0f9e69cc88e Mon Sep 17 00:00:00 2001
From: Dinesh Dutt
Date: Thu, 20 Jun 2019 14:38:21 -0700
Subject: [PATCH 1/9] Adding support for IPv4Network/IPv6Network as an
extension array for pandas
---
cyberpandas/__init__.py | 12 +-
cyberpandas/ipnetwork_array.py | 516 +++++++++++++++++++++++++++++++++
cyberpandas/parser.py | 43 +++
3 files changed, 570 insertions(+), 1 deletion(-)
create mode 100644 cyberpandas/ipnetwork_array.py
diff --git a/cyberpandas/__init__.py b/cyberpandas/__init__.py
index 710b1b2..371b523 100644
--- a/cyberpandas/__init__.py
+++ b/cyberpandas/__init__.py
@@ -5,8 +5,14 @@
IPArray,
IPAccessor,
)
+
+from .ipnetwork_array import (
+ IPNetworkType,
+ IPNetworkArray,
+ IPNetAccessor
+)
from .ip_methods import ip_range
-from .parser import to_ipaddress
+from .parser import to_ipaddress, to_ipnetwork
from .mac_array import MACType, MACArray
from pkg_resources import get_distribution, DistributionNotFound
@@ -25,8 +31,12 @@
'IPAccessor',
'IPArray',
'IPType',
+ 'IPNetAccessor',
+ 'IPNetworkArray'
+ 'IPNetworkType',
'MACArray',
'MACType',
'ip_range',
'to_ipaddress',
+ 'to_ipnetwork',
]
diff --git a/cyberpandas/ipnetwork_array.py b/cyberpandas/ipnetwork_array.py
new file mode 100644
index 0000000..255fff2
--- /dev/null
+++ b/cyberpandas/ipnetwork_array.py
@@ -0,0 +1,516 @@
+import abc
+import collections
+from ipaddress import IPv4Network, IPv6Network, ip_network
+from ipaddress import IPv4Address, IPv6Address
+
+import six
+import numpy as np
+import pandas as pd
+from pandas.api.extensions import ExtensionDtype, take
+from pandas.api.types import is_list_like
+
+from cyberpandas._accessor import (DelegatedMethod, DelegatedProperty,
+ delegated_method)
+from cyberpandas.base import NumPyBackedExtensionArrayMixin
+
+# -----------------------------------------------------------------------------
+# Extension Type
+# -----------------------------------------------------------------------------
+
+
+@six.add_metaclass(abc.ABCMeta)
+class IPv4v6Network(object):
+ """Metaclass providing a common base class for the two scalar IP types."""
+ pass
+
+
+IPv4v6Network.register(IPv4Network)
+IPv4v6Network.register(IPv6Network)
+
+
+@pd.api.extensions.register_extension_dtype
+class IPNetworkType(ExtensionDtype):
+ name = 'ipnetwork'
+ type = IPv4v6Network
+ kind = 'O'
+ na_value = ip_network("0.0.0.0", strict=False)
+
+ @classmethod
+ def construct_from_string(cls, string):
+ if string == cls.name:
+ return cls()
+ else:
+ raise TypeError("Cannot construct a '{}' from "
+ "'{}'".format(cls, string))
+
+ @classmethod
+ def construct_array_type(cls):
+ return IPNetworkArray
+
+
+# -----------------------------------------------------------------------------
+# Extension Container
+# -----------------------------------------------------------------------------
+
+
+class IPNetworkArray(NumPyBackedExtensionArrayMixin):
+ """Holder for IP Networks.
+
+ IPNetworkArray is a container for IPv4 or IPv6 networks. It satisfies '
+ pandas' extension array interface, and so can be stored inside
+ :class:`pandas.Series` and :class:`pandas.DataFrame`.
+
+ See :ref:`usage` for more.
+ """
+ # We store everything as ipaddress' IPv4Network or IPv6Network.
+ # An alternative is to replicate the implementation of IPxNetwork in
+ # ipaddress. The latter approach *may* provide some efficiency in being
+ # able to do array operations via numpy rather than as list comprehensions.
+ __array_priority__ = 1000
+ _dtype = IPNetworkType()
+ _itemsize = 56
+ ndim = 1
+ can_hold_na = True
+
+ def __init__(self, values, dtype=None, copy=False):
+ from .parser import _to_ipnetwork_array
+
+ values = _to_ipnetwork_array(values) # TODO: avoid potential copy
+ # TODO: dtype?
+ if copy:
+ values = values.copy()
+ self.data = values
+
+ @classmethod
+ def _from_ndarray(cls, data, copy=False):
+ """Zero-copy construction of an IPNetworkArray from an ndarray.
+
+ Parameters
+ ----------
+ data : ndarray
+ This should have IPNetworkType dtype
+ copy : bool, default False
+ Whether to copy the data.
+
+ Returns
+ -------
+ IPNetworkArray
+ """
+ if copy:
+ data = data.copy()
+ new = IPNetworkArray([])
+ new.data = data
+ return new
+
+ # -------------------------------------------------------------------------
+ # Properties
+ # -------------------------------------------------------------------------
+ @property
+ def na_value(self):
+ """The missing value sentinal for IP Neworks.
+
+ The address ``'0.0.0.0/32'`` is used.
+
+ Examples
+ --------
+ >>> IPNetworkArray([]).na_value
+ IPv4Network('0.0.0.0/32')
+ """
+ return self.dtype.na_value
+
+ def take(self, indices, allow_fill=False, fill_value=None):
+ '''This is a direct copy of the code from pandas documentation'''
+ # If the ExtensionArray is backed by an ndarray, then
+ # just pass that here instead of coercing to object.
+ data = self.astype(object)
+
+ if allow_fill and fill_value is None:
+ fill_value = self.dtype.na_value
+
+ # fill value should always be translated from the scalar
+ # type for the array, to the physical storage type for
+ # the data, before passing to take.
+
+ result = take(data, indices, fill_value=fill_value,
+ allow_fill=allow_fill)
+ return self._from_sequence(result, dtype=self.dtype)
+
+ # -------------------------------------------------------------------------
+ # Interfaces
+ # -------------------------------------------------------------------------
+
+ def __repr__(self):
+ formatted = [x.__repr__() for x in self.data]
+ return "IPNetworkArray({!r})".format(formatted)
+
+ def _format_values(self):
+ return [x.__repr__() for x in self.data]
+
+ @staticmethod
+ def _box_scalar(scalar):
+ return NotImplemented
+
+ @property
+ def _parser(self):
+ return to_ipnetwork
+
+ def __setitem__(self, key, value):
+ value = to_ipnetwork(value).data
+ self.data[key] = value
+
+ def __iter__(self):
+ return iter(self.to_pyipnetwork())
+
+ # ------------------------------------------------------------------------
+ # Serializaiton / Export
+ # ------------------------------------------------------------------------
+
+ def to_pyipnetwork(self):
+ """Convert the array to a list of scalar IP Network objects.
+
+ Returns
+ -------
+ networks : List
+ Each element of the list will be an :class:`ipaddress.IPv4Network`
+ or :class:`ipaddress.IPv6Network`, depending on the size of that
+ element.
+
+ Examples
+ ---------
+ >>> IPNetworkArray(['192.168.1.1/24', '2001:db8::1000/128']).to_pyipaddress()
+ [IPv4Network('192.168.1.0/24'), IPv6Network('2001:db8::1000/128')]
+ """
+ return [x for x in self.data]
+
+ def astype(self, dtype, copy=True):
+ if isinstance(dtype, IPNetworkType):
+ if copy:
+ self = self.copy()
+ return self
+ return super(IPNetworkArray, self).astype(dtype)
+
+ # ------------------------------------------------------------------------
+ # Ops
+ # ------------------------------------------------------------------------
+
+ def __eq__(self, other):
+ if isinstance(other, str):
+ pyips = self.to_pyipnetwork()
+ try:
+ match = ip_network(other, strict=False)
+ except:
+ return NotImplemented
+ return np.array([ip == match for ip in pyips])
+ elif isinstance(other, IPNetworkArray):
+ return self.data == other.data
+ else:
+ return NotImplemented
+
+ def __lt__(self, other):
+ # TDOO: scalar ipaddress
+ if not isinstance(other, IPNetworkArray):
+ return NotImplemented
+ return (self.data < other.data)
+
+ def __le__(self, other):
+ if not isinstance(other, IPNetworkArray):
+ return NotImplemented
+ return (self.data <= other.data)
+
+ def __gt__(self, other):
+ if not isinstance(other, IPNetworkArray):
+ return NotImplemented
+ return (self.data > other.data)
+
+ def __ge__(self, other):
+ if not isinstance(other, IPNetworkArray):
+ return NotImplemented
+ return (self.data >= other.data)
+
+ def equals(self, other):
+ if not isinstance(other, IPNetworkArray):
+ raise TypeError("Cannot compare 'IPNetworkArray' "
+ "to type '{}'".format(type(other)))
+ # TODO: missing
+ return (self.data == other.data).all()
+
+ def value_counts(self, sort=True, ascending=False, normalize=False,
+ bins=None, dropna=True):
+
+ from pandas.core.algorithms import value_counts
+
+ pyips = self.to_pyipnetwork()
+ return value_counts(pyips, sort, ascending, normalize, bins, dropna)
+
+ def _reduce(self, name, **kwargs):
+ if name == 'max':
+ return self._max(**kwargs)
+ elif name == 'min':
+ return self._min(**kwargs)
+ return NotImplemented
+
+ def _max(self, **kwargs):
+ pyips = self.to_pyipnetwork()
+ skipna = kwargs.get('skipna', True)
+ result = None
+
+ for ip in pyips:
+ if (skipna and ip != self.na_value) or not skipna:
+ if not result:
+ result = ip
+ continue
+ if ip > result:
+ result = ip
+
+ return result
+
+ def _min(self, **kwargs):
+ pyips = self.to_pyipnetwork()
+ skipna = kwargs.get('skipna', True)
+ result = None
+
+ for ip in pyips:
+ if (skipna and ip != self.na_value) or not skipna:
+ if not result:
+ result = ip
+ continue
+ if ip < result:
+ result = ip
+
+ return result
+
+ def isna(self):
+ """Indicator for whether each element is missing.
+
+ The IPNetwork '0.0.0.0/32' is used to indicate missing values.
+
+ Examples
+ --------
+ >>> IPNetworkArray(['0.0.0.0/32', '192.168.1.1/24']).isna()
+ array([ True, False])
+ """
+ ips = self.data
+ return (ips == ip_network('0.0.0.0', strict=False))
+
+ def isin(self, other):
+ """Check whether elements of `self` are in `other`.
+
+ Comparison is done elementwise.
+
+ Parameters
+ ----------
+ other : str or list of str or IPNetworkArray
+ For ``str`` `other`, the argument is attempted to
+ be converted to an :class:`ipaddress.IPv4Network` or
+ a :class:`ipaddress.IPv6Network`. If the conversion fails,
+ a TypeError is raised.
+
+ For a sequence of strings, the same conversion is attempted.
+ You should not mix networks with addresses.
+
+ Finally, other may be an ``IPNetworkArray`` of networks to compare.
+
+ Returns
+ -------
+ contained : ndarray
+ A 1-D boolean ndarray with the same length as self.
+
+ Examples
+ --------
+ Comparison to a single network
+
+ >>> s = IPNetworkArray(['192.168.1.0/32', '10.1.1.1/32'])
+ >>> s.isin('192.168.1.0/24')
+ array([ True, False])
+
+ Comparison to many networks
+ >>> s.isin(['192.168.1.0/24', '192.168.2.0/24'])
+ array([ True, False])
+ """
+ from pandas.core.algorithms import isin
+
+ if not is_list_like(other):
+ other = [other]
+ if isinstance(other, IPNetworkArray):
+ to_match = other
+ else:
+ to_match = [ip_network(x, strict=False) for x in other]
+
+ mask = np.zeros(len(self), dtype='bool')
+ mask |= isin(self, to_match)
+ return mask
+
+ def supernet_of(self, addr):
+ """Returns true if addr is supernet; includes default route"""
+ if isinstance(addr, str):
+ ips = self.data
+ match = ip_network(addr, strict=False)
+ if match._version == 4:
+ return np.array([match.subnet_of(ip)
+ if ip._version == 4 else False
+ for ip in ips])
+ else:
+ return np.array([match.subnet_of(ip)
+ if ip._version == 6 else False
+ for ip in ips])
+
+ return NotImplemented
+
+ def subnet_of(self, addr):
+ """Returns true if addr is in subnet"""
+ if isinstance(addr, str):
+ ips = self.data
+ match = ip_network(addr, strict=False)
+ if match._version == 4:
+ return np.array([ip.subnet_of(match)
+ if ip._version == 4 else False
+ for ip in ips])
+ else:
+ return np.array([ip.subnet_of(match)
+ if ip._version == 6 else False
+ for ip in ips])
+
+ return NotImplemented
+
+ # ------------------------------------------------------------------------
+ # IP Specific
+ # ------------------------------------------------------------------------
+
+ @property
+ def is_ipv4(self):
+ """Indicator for whether each address fits in the IPv4 space."""
+ # TODO: NA should be NA
+ pyips = self.to_pyipnetwork()
+ return np.array([ip._version == 4 for ip in pyips])
+
+ @property
+ def is_ipv6(self):
+ """Indicator for whether each address requires IPv6."""
+ pyips = self.to_pyipnetwork()
+ return np.array([ip._version == 6 for ip in pyips])
+
+ @property
+ def version(self):
+ """IP version (4 or 6)."""
+ return np.where(self.is_ipv4, 4, 6)
+
+ @property
+ def is_multicast(self):
+ """Indiciator for whether each address is multicast."""
+ pyips = self.to_pyipnetwork()
+ return np.array([ip.is_multicast for ip in pyips])
+
+ @property
+ def is_default(self):
+ """Indiciator for whether each prefix is the default route."""
+ pyips = self.to_pyipnetwork()
+ dflt = ip_network('0.0.0.0/0')
+ return np.array([ip == dflt for ip in pyips])
+
+ @property
+ def is_private(self):
+ """Indiciator for whether each address is private."""
+ pyips = self.to_pyipnetwork()
+ return np.array([ip.is_private for ip in pyips])
+
+ @property
+ def is_global(self):
+ """Indiciator for whether each address is global."""
+ pyips = self.to_pyipnetwork()
+ return np.array([ip.is_global for ip in pyips])
+
+ @property
+ def is_unspecified(self):
+ """Indiciator for whether each address is unspecified."""
+ pyips = self.to_pyipnetwork()
+ return np.array([ip.is_unspecified for ip in pyips])
+
+ @property
+ def is_reserved(self):
+ """Indiciator for whether each address is reserved."""
+ pyips = self.to_pyipnetwork()
+ return np.array([ip.is_reserved for ip in pyips])
+
+ @property
+ def is_loopback(self):
+ """Indiciator for whether each address is loopback."""
+ pyips = self.to_pyipnetwork()
+ return np.array([ip.is_loopback for ip in pyips])
+
+ @property
+ def is_link_local(self):
+ """Indiciator for whether each address is link local."""
+ pyips = self.to_pyipnetwork()
+ return np.array([ip.is_link_local for ip in pyips])
+
+ @property
+ def packed(self):
+ """Bytestring of the IP addresses
+
+ Each address takes 16 bytes. IPv4 addresses are prefixed
+ by zeros.
+ """
+ # TODO: I wonder if that should be post-fixed by 0s.
+ return self.data.tobytes()
+
+ @property
+ def prefixlen(self):
+ """Return the prefixlen of each prefix in the array"""
+ pyips = self.to_pyipnetwork()
+ return np.array([ip.prefixlen for ip in pyips])
+
+# -----------------------------------------------------------------------------
+# Accessor
+# -----------------------------------------------------------------------------
+
+
+@pd.api.extensions.register_series_accessor("ipnet")
+class IPNetAccessor:
+
+ is_ipv4 = DelegatedProperty("is_ipv4")
+ is_ipv6 = DelegatedProperty("is_ipv6")
+ version = DelegatedProperty("version")
+ is_multicast = DelegatedProperty("is_multicast")
+ is_private = DelegatedProperty("is_private")
+ is_global = DelegatedProperty("is_global")
+ is_unspecified = DelegatedProperty("is_unspecified")
+ is_reserved = DelegatedProperty("is_reserved")
+ is_loopback = DelegatedProperty("is_loopback")
+ is_link_local = DelegatedProperty("is_link_local")
+ is_default = DelegatedProperty("is_default")
+ prefixlen = DelegatedProperty("prefixlen")
+
+ isna = DelegatedMethod("isna")
+
+ def __init__(self, obj):
+ self._validate(obj)
+ self._data = obj.values
+ self._index = obj.index
+ self._name = obj.name
+
+ @staticmethod
+ def _validate(obj):
+ if not is_ipnetwork_type(obj):
+ raise AttributeError("Cannot use 'ipnet' accessor on objects of "
+ "dtype '{}'.".format(obj.dtype))
+
+ def isin(self, other):
+ return delegated_method(self._data.isin, self._index,
+ self._name, other)
+
+ def subnet_of(self, other):
+ return delegated_method(self._data.subnet_of, self._index,
+ self._name, other)
+
+ def supernet_of(self, other):
+ return delegated_method(self._data.supernet_of, self._index,
+ self._name, other)
+
+
+def is_ipnetwork_type(obj):
+
+ t = getattr(obj, 'dtype', obj)
+ try:
+ return isinstance(t, IPNetworkType) or issubclass(t, IPNetworkType)
+ except Exception:
+ return False
diff --git a/cyberpandas/parser.py b/cyberpandas/parser.py
index 215381b..313ef49 100644
--- a/cyberpandas/parser.py
+++ b/cyberpandas/parser.py
@@ -108,3 +108,46 @@ def _as_ip_object(val):
except ValueError:
raise ValueError("Could not parse {} is an address or "
"network".format(val))
+
+
+def _to_ipnetwork_array(values):
+ from . import IPNetworkType, IPNetworkArray
+
+ if isinstance(values, IPNetworkArray):
+ return values.data
+
+ if (isinstance(values, np.ndarray) and
+ values.ndim == 1 and
+ np.issubdtype(values.dtype, np.string_)):
+ values = np.asarray(values, dtype=IPNetworkType)
+ else:
+ values = [ipaddress.ip_network(x, strict=False) for x in values]
+
+ return np.atleast_1d(np.asarray(values, dtype=IPNetworkType))
+
+
+def to_ipnetwork(values):
+ """Convert values to IPNetworkArray
+
+ Parameters
+ ----------
+ values : int, str, bytes, or sequence of those
+
+ Returns
+ -------
+ addresses : IPNetworkArray
+
+ Examples
+ --------
+ Parse strings
+ >>> to_ipnetwork(['192.168.1.1/24',
+ ... '2001:0db8:85a3:0000:0000:8a2e:0370:7334/128'])
+
+ """
+ from . import IPNetworkArray
+
+ if not is_list_like(values):
+ values = [values]
+
+ return IPNetworkArray(_to_ipnetwork_array(values))
+
From 3be72442479339580078ad50394c64a7d9c87eec Mon Sep 17 00:00:00 2001
From: Dinesh Dutt
Date: Tue, 17 Mar 2020 08:07:24 -0700
Subject: [PATCH 2/9] Return string representation of IP network
---
cyberpandas/ipnetwork_array.py | 15 ++++++++++-----
1 file changed, 10 insertions(+), 5 deletions(-)
diff --git a/cyberpandas/ipnetwork_array.py b/cyberpandas/ipnetwork_array.py
index 255fff2..f29875c 100644
--- a/cyberpandas/ipnetwork_array.py
+++ b/cyberpandas/ipnetwork_array.py
@@ -1,7 +1,7 @@
import abc
import collections
from ipaddress import IPv4Network, IPv6Network, ip_network
-from ipaddress import IPv4Address, IPv6Address
+from ipaddress import IPv4Address, IPv6Address, ip_address
import six
import numpy as np
@@ -24,7 +24,12 @@ class IPv4v6Network(object):
pass
-IPv4v6Network.register(IPv4Network)
+class cybIPv4Network(IPv4Network):
+ ndim = 1
+ pass
+
+
+IPv4v6Network.register(cybIPv4Network)
IPv4v6Network.register(IPv6Network)
@@ -140,15 +145,15 @@ def take(self, indices, allow_fill=False, fill_value=None):
# -------------------------------------------------------------------------
def __repr__(self):
- formatted = [x.__repr__() for x in self.data]
+ formatted = [x.__str__() for x in self.data]
return "IPNetworkArray({!r})".format(formatted)
def _format_values(self):
- return [x.__repr__() for x in self.data]
+ return [x.__str__() for x in self.data]
@staticmethod
def _box_scalar(scalar):
- return NotImplemented
+ return ip_address(combine(*scalar))
@property
def _parser(self):
From d1ec0e28f79e4b396922fd8a19636e683d7a6c35 Mon Sep 17 00:00:00 2001
From: Dinesh Dutt
Date: Sat, 11 Apr 2020 22:53:20 -0700
Subject: [PATCH 3/9] Add __getitem__ for ipnetwork to ignore ndim as the
underlying data struct has no ndim
---
cyberpandas/ipnetwork_array.py | 13 ++++++++++---
1 file changed, 10 insertions(+), 3 deletions(-)
diff --git a/cyberpandas/ipnetwork_array.py b/cyberpandas/ipnetwork_array.py
index f29875c..1ac36a5 100644
--- a/cyberpandas/ipnetwork_array.py
+++ b/cyberpandas/ipnetwork_array.py
@@ -1,8 +1,7 @@
import abc
-import collections
from ipaddress import IPv4Network, IPv6Network, ip_network
-from ipaddress import IPv4Address, IPv6Address, ip_address
+import operator
import six
import numpy as np
import pandas as pd
@@ -12,6 +11,7 @@
from cyberpandas._accessor import (DelegatedMethod, DelegatedProperty,
delegated_method)
from cyberpandas.base import NumPyBackedExtensionArrayMixin
+from ._utils import combine
# -----------------------------------------------------------------------------
# Extension Type
@@ -153,12 +153,19 @@ def _format_values(self):
@staticmethod
def _box_scalar(scalar):
- return ip_address(combine(*scalar))
+ return ip_network(combine(*scalar))
@property
def _parser(self):
return to_ipnetwork
+ def __getitem__(self, *args):
+ result = operator.getitem(self.data, *args)
+ if isinstance(result, tuple):
+ return self._box_scalar(result)
+ else:
+ return result
+
def __setitem__(self, key, value):
value = to_ipnetwork(value).data
self.data[key] = value
From bdbd9234d2b757d6f1e5fe5973940adabc595cff Mon Sep 17 00:00:00 2001
From: Dinesh Dutt
Date: Sat, 2 May 2020 20:04:39 -0700
Subject: [PATCH 4/9] Fixing a bunch of minor issues that caused grief
---
cyberpandas/ipnetwork_array.py | 39 +++++++++++++++++++---------------
cyberpandas/parser.py | 3 +--
2 files changed, 23 insertions(+), 19 deletions(-)
diff --git a/cyberpandas/ipnetwork_array.py b/cyberpandas/ipnetwork_array.py
index 1ac36a5..0706159 100644
--- a/cyberpandas/ipnetwork_array.py
+++ b/cyberpandas/ipnetwork_array.py
@@ -1,17 +1,15 @@
-import abc
-from ipaddress import IPv4Network, IPv6Network, ip_network
-
-import operator
-import six
-import numpy as np
-import pandas as pd
-from pandas.api.extensions import ExtensionDtype, take
-from pandas.api.types import is_list_like
-
+from cyberpandas.base import NumPyBackedExtensionArrayMixin
from cyberpandas._accessor import (DelegatedMethod, DelegatedProperty,
delegated_method)
-from cyberpandas.base import NumPyBackedExtensionArrayMixin
-from ._utils import combine
+from pandas.api.types import is_list_like
+from pandas.api.extensions import ExtensionDtype, take
+import pandas as pd
+import numpy as np
+import six
+import operator
+from ipaddress import IPv4Network, IPv6Network, ip_network
+import abc
+
# -----------------------------------------------------------------------------
# Extension Type
@@ -153,20 +151,22 @@ def _format_values(self):
@staticmethod
def _box_scalar(scalar):
- return ip_network(combine(*scalar))
+ return ip_network(scalar)
@property
def _parser(self):
+ from .parser import to_ipnetwork
return to_ipnetwork
def __getitem__(self, *args):
result = operator.getitem(self.data, *args)
- if isinstance(result, tuple):
+ if isinstance(result, str):
return self._box_scalar(result)
else:
- return result
+ return type(self)(result)
def __setitem__(self, key, value):
+ from .parser import to_ipnetwork
value = to_ipnetwork(value).data
self.data[key] = value
@@ -192,14 +192,19 @@ def to_pyipnetwork(self):
>>> IPNetworkArray(['192.168.1.1/24', '2001:db8::1000/128']).to_pyipaddress()
[IPv4Network('192.168.1.0/24'), IPv6Network('2001:db8::1000/128')]
"""
- return [x for x in self.data]
+ return [ip_network(x) for x in self.data]
def astype(self, dtype, copy=True):
if isinstance(dtype, IPNetworkType):
if copy:
self = self.copy()
return self
- return super(IPNetworkArray, self).astype(dtype)
+
+ if dtype == np.dtype('str'):
+ self.data = np.asarray([x.__str__() for x in self.data])
+ return self.data
+
+ raise TypeError("Cannot convert IPNetworkArray to anything but string")
# ------------------------------------------------------------------------
# Ops
diff --git a/cyberpandas/parser.py b/cyberpandas/parser.py
index 313ef49..9b5505b 100644
--- a/cyberpandas/parser.py
+++ b/cyberpandas/parser.py
@@ -111,7 +111,7 @@ def _as_ip_object(val):
def _to_ipnetwork_array(values):
- from . import IPNetworkType, IPNetworkArray
+ from .ipnetwork_array import IPNetworkType, IPNetworkArray
if isinstance(values, IPNetworkArray):
return values.data
@@ -150,4 +150,3 @@ def to_ipnetwork(values):
values = [values]
return IPNetworkArray(_to_ipnetwork_array(values))
-
From ab868d2ba8e6ab86e44a5c59935374f25af04498 Mon Sep 17 00:00:00 2001
From: Dinesh Dutt
Date: Sun, 3 May 2020 11:52:07 -0700
Subject: [PATCH 5/9] More minor fixes
---
cyberpandas/ipnetwork_array.py | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/cyberpandas/ipnetwork_array.py b/cyberpandas/ipnetwork_array.py
index 0706159..414d594 100644
--- a/cyberpandas/ipnetwork_array.py
+++ b/cyberpandas/ipnetwork_array.py
@@ -192,7 +192,7 @@ def to_pyipnetwork(self):
>>> IPNetworkArray(['192.168.1.1/24', '2001:db8::1000/128']).to_pyipaddress()
[IPv4Network('192.168.1.0/24'), IPv6Network('2001:db8::1000/128')]
"""
- return [ip_network(x) for x in self.data]
+ return [ip_network(x) for x in self._format_values()]
def astype(self, dtype, copy=True):
if isinstance(dtype, IPNetworkType):
@@ -201,10 +201,11 @@ def astype(self, dtype, copy=True):
return self
if dtype == np.dtype('str'):
+ if copy:
+ self = self.copy()
self.data = np.asarray([x.__str__() for x in self.data])
return self.data
-
- raise TypeError("Cannot convert IPNetworkArray to anything but string")
+ raise TypeError(f'Cannot convert ipnetwork to {dtype}')
# ------------------------------------------------------------------------
# Ops
From e713ca2b0448a5df69be0f27fd773dd18ccdff00 Mon Sep 17 00:00:00 2001
From: Dinesh Dutt
Date: Tue, 5 May 2020 08:20:14 -0700
Subject: [PATCH 6/9] Another silly fix to fix the thing I broke with my prev
fix
---
cyberpandas/ipnetwork_array.py | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/cyberpandas/ipnetwork_array.py b/cyberpandas/ipnetwork_array.py
index 414d594..96e5d40 100644
--- a/cyberpandas/ipnetwork_array.py
+++ b/cyberpandas/ipnetwork_array.py
@@ -200,11 +200,13 @@ def astype(self, dtype, copy=True):
self = self.copy()
return self
- if dtype == np.dtype('str'):
+ if dtype == np.str:
if copy:
self = self.copy()
self.data = np.asarray([x.__str__() for x in self.data])
return self.data
+ elif dtype == np.object:
+ return super(IPNetworkArray, self).astype(dtype, copy=copy)
raise TypeError(f'Cannot convert ipnetwork to {dtype}')
# ------------------------------------------------------------------------
From 0f2ccc2e6dfbc27d08255706c363b2e906574294 Mon Sep 17 00:00:00 2001
From: Dinesh Dutt
Date: Wed, 6 May 2020 11:22:33 -0700
Subject: [PATCH 7/9] More fixes, this time to take and init
---
cyberpandas/ipnetwork_array.py | 3 +--
cyberpandas/parser.py | 4 +---
2 files changed, 2 insertions(+), 5 deletions(-)
diff --git a/cyberpandas/ipnetwork_array.py b/cyberpandas/ipnetwork_array.py
index 96e5d40..63dc242 100644
--- a/cyberpandas/ipnetwork_array.py
+++ b/cyberpandas/ipnetwork_array.py
@@ -134,8 +134,7 @@ def take(self, indices, allow_fill=False, fill_value=None):
# type for the array, to the physical storage type for
# the data, before passing to take.
- result = take(data, indices, fill_value=fill_value,
- allow_fill=allow_fill)
+ result = self.data.take(indices)
return self._from_sequence(result, dtype=self.dtype)
# -------------------------------------------------------------------------
diff --git a/cyberpandas/parser.py b/cyberpandas/parser.py
index 9b5505b..8a47943 100644
--- a/cyberpandas/parser.py
+++ b/cyberpandas/parser.py
@@ -116,9 +116,7 @@ def _to_ipnetwork_array(values):
if isinstance(values, IPNetworkArray):
return values.data
- if (isinstance(values, np.ndarray) and
- values.ndim == 1 and
- np.issubdtype(values.dtype, np.string_)):
+ if isinstance(values, ipaddress.IPv4Network):
values = np.asarray(values, dtype=IPNetworkType)
else:
values = [ipaddress.ip_network(x, strict=False) for x in values]
From 95c1b4cd7c034ca4b35c62be497515b14dece3e4 Mon Sep 17 00:00:00 2001
From: Dinesh Dutt
Date: Fri, 12 Jun 2020 15:35:49 -0700
Subject: [PATCH 8/9] IPv6-specific fixes
---
cyberpandas/ipnetwork_array.py | 7 ++++++-
cyberpandas/parser.py | 3 ++-
2 files changed, 8 insertions(+), 2 deletions(-)
diff --git a/cyberpandas/ipnetwork_array.py b/cyberpandas/ipnetwork_array.py
index 63dc242..89eb3b2 100644
--- a/cyberpandas/ipnetwork_array.py
+++ b/cyberpandas/ipnetwork_array.py
@@ -27,8 +27,13 @@ class cybIPv4Network(IPv4Network):
pass
+class cybIPv6Network(IPv6Network):
+ ndim = 1
+ pass
+
+
IPv4v6Network.register(cybIPv4Network)
-IPv4v6Network.register(IPv6Network)
+IPv4v6Network.register(cybIPv6Network)
@pd.api.extensions.register_extension_dtype
diff --git a/cyberpandas/parser.py b/cyberpandas/parser.py
index 8a47943..b851bd5 100644
--- a/cyberpandas/parser.py
+++ b/cyberpandas/parser.py
@@ -116,7 +116,8 @@ def _to_ipnetwork_array(values):
if isinstance(values, IPNetworkArray):
return values.data
- if isinstance(values, ipaddress.IPv4Network):
+ if (isinstance(values, ipaddress.IPv4Network) or
+ isinstance(values, ipaddress.IPv6Network)):
values = np.asarray(values, dtype=IPNetworkType)
else:
values = [ipaddress.ip_network(x, strict=False) for x in values]
From 91e8ba426d414fbab817606e1712f32cee6e5bb8 Mon Sep 17 00:00:00 2001
From: Dinesh Dutt
Date: Mon, 15 Jun 2020 11:08:38 -0700
Subject: [PATCH 9/9] Fix getitem to return valid data type during factorizing
---
cyberpandas/ipnetwork_array.py | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/cyberpandas/ipnetwork_array.py b/cyberpandas/ipnetwork_array.py
index 89eb3b2..952ea88 100644
--- a/cyberpandas/ipnetwork_array.py
+++ b/cyberpandas/ipnetwork_array.py
@@ -166,6 +166,8 @@ def __getitem__(self, *args):
result = operator.getitem(self.data, *args)
if isinstance(result, str):
return self._box_scalar(result)
+ elif not hasattr(result, 'ndim') or result.ndim == 0:
+ return self._box_scalar(result)
else:
return type(self)(result)
@@ -258,6 +260,9 @@ def equals(self, other):
# TODO: missing
return (self.data == other.data).all()
+ def _values_for_factorize(self):
+ return self.astype(object), IPv4Network('0.0.0.0')
+
def value_counts(self, sort=True, ascending=False, normalize=False,
bins=None, dropna=True):