Source code for convert2.parse_int
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import numpy as np
import pandas as pd
from datetime import datetime, date, timedelta
try:
from .pkg import chardet
from .pkg import rolex
from .pkg.six import string_types, binary_type
from .util import extract_number_from_string
except: # pragma: no cover
from convert2.pkg import chardet
from convert2.pkg.rolex import rolex
from convert2.pkg.six import string_types, binary_type
from convert2.util import extract_number_from_string
class RoundMethod:
round = "round"
floor = "floor"
ceiling = "ceiling"
[docs]class Anything2Int(object):
"""Parse anything to ``int``.
The logic:
- for int: force to be generic int type.
- for float: round.
- for str: extract int from string, for exmplae: "you have 5 dollar" -> 5
if there is more than 1 integer, "You got 3, he got 4", raise ValueError
- for datetime: it's utc timestamp. Ignore milliseconds.
- for date: it's days from ordinary.
- for timedelta: its total seconds, Ignore milliseconds.
"""
ROUND_FLOAT_METHOD = RoundMethod.round # could be one of 'floor', 'ceiling', 'round'
EXTRACT_NUMBER_FROM_TEXT = True
def __call__(self, value):
#--- None ---
if value is None:
return None
try:
if np.isnan(value):
return None
except:
pass
#--- int, long, np.int, np.int8, np.int16, np.int32, np.int64 ---
try:
if int(value) == value:
return int(value)
except:
pass
#--- float, np.float, np.float16, np.float32, np.float64 ---
if type(value).__name__.startswith("float"):
return int(round(value))
#--- str, unicode, np.str ---
if isinstance(value, string_types):
# if a parsable int str, like "123"
try:
return int(value)
except ValueError:
pass
# if a parsable float str, like "3.14"
try:
float_ = float(value)
return self(float_)
except ValueError:
pass
# if a extractable parsable str, like "a 3.14 b"
if self.EXTRACT_NUMBER_FROM_TEXT:
result = extract_number_from_string(value)
if len(result) == 1:
return self(float(result[0]))
else:
raise ValueError("%r is not int parsable!" % value)
#--- datetime, np.datetime64, pd.Timestamp ---
if isinstance(value, pd.Timestamp):
try:
return self((value - pd.Timestamp("1970-01-01 00:00:00Z"))
.total_seconds())
except:
raise ValueError("%r is not int parsable!" % value)
if isinstance(value, np.datetime64):
try:
return self(rolex.to_utctimestamp(value.astype(datetime)))
except:
raise ValueError("%r is not int parsable!" % value)
if isinstance(value, datetime):
try:
return self(rolex.to_utctimestamp(value))
except:
raise ValueError("%r is not int parsable!" % value)
#--- date ---
if isinstance(value, date):
try:
return rolex.to_ordinal(value)
except Exception as e:
raise ValueError("%r is not int parsable!" % value)
#--- timedelta ---
if isinstance(value, timedelta):
try:
return self(value.total_seconds())
except Exception as e:
raise ValueError("%r is not int parsable!" % value)
#--- other type ---
try:
return int(value)
except:
raise ValueError("%r is not int parsable!" % value)
any2int = Anything2Int()